LLVM 20.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/CallingConv.h"
61#include "llvm/IR/Constant.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/DataLayout.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/IntrinsicsPowerPC.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/Value.h"
76#include "llvm/MC/MCContext.h"
77#include "llvm/MC/MCExpr.h"
87#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
95#include <algorithm>
96#include <cassert>
97#include <cstdint>
98#include <iterator>
99#include <list>
100#include <optional>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "ppc-lowering"
107
109 "disable-p10-store-forward",
110 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
111 cl::init(false));
112
113static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
114cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
117cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
120cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
121
122static cl::opt<bool> DisableSCO("disable-ppc-sco",
123cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
124
125static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
126cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
127
128static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
129cl::desc("use absolute jump tables on ppc"), cl::Hidden);
130
131static cl::opt<bool>
132 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
133 cl::desc("disable vector permute decomposition"),
134 cl::init(true), cl::Hidden);
135
137 "disable-auto-paired-vec-st",
138 cl::desc("disable automatically generated 32byte paired vector stores"),
139 cl::init(true), cl::Hidden);
140
142 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
143 cl::desc("Set minimum number of entries to use a jump table on PPC"));
144
146 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
147 cl::desc("max depth when checking alias info in GatherAllAliases()"));
148
150 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
151 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
152 "function to use initial-exec"));
153
154STATISTIC(NumTailCalls, "Number of tail calls");
155STATISTIC(NumSiblingCalls, "Number of sibling calls");
156STATISTIC(ShufflesHandledWithVPERM,
157 "Number of shuffles lowered to a VPERM or XXPERM");
158STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
159
160static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
161
162static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
163
164static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186
187 // Set up the register classes.
188 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
189 if (!useSoftFloat()) {
190 if (hasSPE()) {
191 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
192 // EFPU2 APU only supports f32
193 if (!Subtarget.hasEFPU2())
194 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
195 } else {
196 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
197 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
198 }
199 }
200
201 // Match BITREVERSE to customized fast code sequence in the td file.
204
205 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
207
208 // Custom lower inline assembly to check for special registers.
211
212 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
213 for (MVT VT : MVT::integer_valuetypes()) {
216 }
217
218 if (Subtarget.isISA3_0()) {
219 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
220 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
221 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
222 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
223 } else {
224 // No extending loads from f16 or HW conversions back and forth.
225 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
228 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
231 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
232 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
233 }
234
235 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
236
237 // PowerPC has pre-inc load and store's.
248 if (!Subtarget.hasSPE()) {
253 }
254
255 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
256 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
257 for (MVT VT : ScalarIntVTs) {
262 }
263
264 if (Subtarget.useCRBits()) {
266
267 if (isPPC64 || Subtarget.hasFPCVT()) {
270 isPPC64 ? MVT::i64 : MVT::i32);
273 isPPC64 ? MVT::i64 : MVT::i32);
274
277 isPPC64 ? MVT::i64 : MVT::i32);
280 isPPC64 ? MVT::i64 : MVT::i32);
281
284 isPPC64 ? MVT::i64 : MVT::i32);
287 isPPC64 ? MVT::i64 : MVT::i32);
288
291 isPPC64 ? MVT::i64 : MVT::i32);
294 isPPC64 ? MVT::i64 : MVT::i32);
295 } else {
300 }
301
302 // PowerPC does not support direct load/store of condition registers.
305
306 // FIXME: Remove this once the ANDI glue bug is fixed:
307 if (ANDIGlueBug)
309
310 for (MVT VT : MVT::integer_valuetypes()) {
313 setTruncStoreAction(VT, MVT::i1, Expand);
314 }
315
316 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
317 }
318
319 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
320 // PPC (the libcall is not available).
325
326 // We do not currently implement these libm ops for PowerPC.
327 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
328 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
329 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
330 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
332 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
333
334 // PowerPC has no SREM/UREM instructions unless we are on P9
335 // On P9 we may use a hardware instruction to compute the remainder.
336 // When the result of both the remainder and the division is required it is
337 // more efficient to compute the remainder from the result of the division
338 // rather than use the remainder instruction. The instructions are legalized
339 // directly because the DivRemPairsPass performs the transformation at the IR
340 // level.
341 if (Subtarget.isISA3_0()) {
346 } else {
351 }
352
353 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
362
363 // Handle constrained floating-point operations of scalar.
364 // TODO: Handle SPE specific operation.
370
375
376 if (!Subtarget.hasSPE()) {
379 }
380
381 if (Subtarget.hasVSX()) {
384 }
385
386 if (Subtarget.hasFSQRT()) {
389 }
390
391 if (Subtarget.hasFPRND()) {
396
401 }
402
403 // We don't support sin/cos/sqrt/fmod/pow
414
415 // MASS transformation for LLVM intrinsics with replicating fast-math flag
416 // to be consistent to PPCGenScalarMASSEntries pass
417 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
430 }
431
432 if (Subtarget.hasSPE()) {
435 } else {
436 setOperationAction(ISD::FMA , MVT::f64, Legal);
437 setOperationAction(ISD::FMA , MVT::f32, Legal);
438 }
439
440 if (Subtarget.hasSPE())
441 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
442
444
445 // If we're enabling GP optimizations, use hardware square root
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
448 Subtarget.hasFRE()))
450
451 if (!Subtarget.hasFSQRT() &&
452 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
453 Subtarget.hasFRES()))
455
456 if (Subtarget.hasFCPSGN()) {
459 } else {
462 }
463
464 if (Subtarget.hasFPRND()) {
469
474 }
475
476 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
477 // instruction xxbrd to speed up scalar BSWAP64.
478 if (Subtarget.isISA3_1()) {
481 } else {
484 ISD::BSWAP, MVT::i64,
485 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
486 }
487
488 // CTPOP or CTTZ were introduced in P8/P9 respectively
489 if (Subtarget.isISA3_0()) {
490 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
491 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
492 } else {
493 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
494 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
495 }
496
497 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
500 } else {
503 }
504
505 // PowerPC does not have ROTR
508
509 if (!Subtarget.useCRBits()) {
510 // PowerPC does not have Select
515 }
516
517 // PowerPC wants to turn select_cc of FP into fsel when possible.
520
521 // PowerPC wants to optimize integer setcc a bit
522 if (!Subtarget.useCRBits())
524
525 if (Subtarget.hasFPU()) {
529
533 }
534
535 // PowerPC does not have BRCOND which requires SetCC
536 if (!Subtarget.useCRBits())
538
540
541 if (Subtarget.hasSPE()) {
542 // SPE has built-in conversions
549
550 // SPE supports signaling compare of f32/f64.
553 } else {
554 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
557
558 // PowerPC does not have [U|S]INT_TO_FP
563 }
564
565 if (Subtarget.hasDirectMove() && isPPC64) {
570 if (TM.Options.UnsafeFPMath) {
579 }
580 } else {
585 }
586
587 // We cannot sextinreg(i1). Expand to shifts.
589
590 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
591 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
592 // support continuation, user-level threading, and etc.. As a result, no
593 // other SjLj exception interfaces are implemented and please don't build
594 // your own exception handling based on them.
595 // LLVM/Clang supports zero-cost DWARF exception handling.
598
599 // We want to legalize GlobalAddress and ConstantPool nodes into the
600 // appropriate instructions to materialize the address.
611
612 // TRAP is legal.
613 setOperationAction(ISD::TRAP, MVT::Other, Legal);
614
615 // TRAMPOLINE is custom lowered.
618
619 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
621
622 if (Subtarget.is64BitELFABI()) {
623 // VAARG always uses double-word chunks, so promote anything smaller.
625 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
627 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
629 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
631 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
633 } else if (Subtarget.is32BitELFABI()) {
634 // VAARG is custom lowered with the 32-bit SVR4 ABI.
637 } else
639
640 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
641 if (Subtarget.is32BitELFABI())
643 else
645
646 // Use the default implementation.
647 setOperationAction(ISD::VAEND , MVT::Other, Expand);
656
657 // We want to custom lower some of our intrinsics.
663
664 // To handle counter-based loop conditions.
666
671
672 // Comparisons that require checking two conditions.
673 if (Subtarget.hasSPE()) {
678 }
691
694
695 if (Subtarget.has64BitSupport()) {
696 // They also have instructions for converting between i64 and fp.
705 // This is just the low 32 bits of a (signed) fp->i64 conversion.
706 // We cannot do this with Promote because i64 is not a legal type.
709
710 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
713 }
714 } else {
715 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
716 if (Subtarget.hasSPE()) {
719 } else {
722 }
723 }
724
725 // With the instructions enabled under FPCVT, we can do everything.
726 if (Subtarget.hasFPCVT()) {
727 if (Subtarget.has64BitSupport()) {
736 }
737
746 }
747
748 if (Subtarget.use64BitRegs()) {
749 // 64-bit PowerPC implementations can support i64 types directly
750 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
751 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
753 // 64-bit PowerPC wants to expand i128 shifts itself.
757 } else {
758 // 32-bit PowerPC wants to expand i64 shifts itself.
762 }
763
764 // PowerPC has better expansions for funnel shifts than the generic
765 // TargetLowering::expandFunnelShift.
766 if (Subtarget.has64BitSupport()) {
769 }
772
773 if (Subtarget.hasVSX()) {
778 }
779
780 if (Subtarget.hasAltivec()) {
781 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
786 }
787 // First set operation action for all vector types to expand. Then we
788 // will selectively turn on ones that can be effectively codegen'd.
790 // add/sub are legal for all supported vector VT's.
793
794 // For v2i64, these are only valid with P8Vector. This is corrected after
795 // the loop.
796 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
801 }
802 else {
807 }
808
809 if (Subtarget.hasVSX()) {
812 }
813
814 // Vector instructions introduced in P8
815 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
818 }
819 else {
822 }
823
824 // Vector instructions introduced in P9
825 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
827 else
829
830 // We promote all shuffles to v16i8.
832 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
833
834 // We promote all non-typed operations to v4i32.
836 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
838 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
840 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
842 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
844 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
847 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
849 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
850
851 // No other operations are legal.
890
891 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
892 setTruncStoreAction(VT, InnerVT, Expand);
895 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
896 }
897 }
899 if (!Subtarget.hasP8Vector()) {
900 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
901 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
902 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
903 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
904 }
905
906 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
907 // with merges, splats, etc.
909
910 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
911 // are cheap, so handle them before they get expanded to scalar.
917
918 setOperationAction(ISD::AND , MVT::v4i32, Legal);
919 setOperationAction(ISD::OR , MVT::v4i32, Legal);
920 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
921 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
923 Subtarget.useCRBits() ? Legal : Expand);
924 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
934 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
937
938 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
939 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
940 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
941 if (Subtarget.hasAltivec())
942 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
944 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
945 if (Subtarget.hasP8Altivec())
946 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
947
948 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
949 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
950 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
951 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
952
953 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
954 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
955
956 if (Subtarget.hasVSX()) {
957 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
958 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
960 }
961
962 if (Subtarget.hasP8Altivec())
963 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
964 else
965 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
966
967 if (Subtarget.isISA3_1()) {
968 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
969 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
970 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
971 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
972 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
973 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
974 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
975 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
976 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
977 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
978 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
979 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
980 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
981 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
982 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
983 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
984 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
985 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
986 }
987
988 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
989 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
990
993 // LE is P8+/64-bit so direct moves are supported and these operations
994 // are legal. The custom transformation requires 64-bit since we need a
995 // pair of stores that will cover a 128-bit load for P10.
996 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1000 }
1001
1006
1007 // Altivec does not contain unordered floating-point compare instructions
1008 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1009 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1010 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1011 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1012
1013 if (Subtarget.hasVSX()) {
1016 if (Subtarget.hasP8Vector()) {
1019 }
1020 if (Subtarget.hasDirectMove() && isPPC64) {
1029 }
1031
1032 // The nearbyint variants are not allowed to raise the inexact exception
1033 // so we can only code-gen them with unsafe math.
1034 if (TM.Options.UnsafeFPMath) {
1037 }
1038
1039 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1041 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1044 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1047
1049 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1053
1054 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1055 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1056
1057 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1058 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1059
1060 // Share the Altivec comparison restrictions.
1061 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1062 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1063 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1064 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1065
1066 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1067 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1068
1070
1071 if (Subtarget.hasP8Vector())
1072 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1073
1074 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1075
1076 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1077 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1078 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1079
1080 if (Subtarget.hasP8Altivec()) {
1081 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1082 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1083 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1084
1085 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1086 // SRL, but not for SRA because of the instructions available:
1087 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1088 // doing
1089 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1090 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1091 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1092
1093 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1094 }
1095 else {
1096 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1097 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1098 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1099
1100 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1101
1102 // VSX v2i64 only supports non-arithmetic operations.
1103 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1104 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1105 }
1106
1107 if (Subtarget.isISA3_1())
1108 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1109 else
1110 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1111
1112 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1113 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1115 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1116
1118
1127
1128 // Custom handling for partial vectors of integers converted to
1129 // floating point. We already have optimal handling for v2i32 through
1130 // the DAG combine, so those aren't necessary.
1147
1148 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1149 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1150 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1151 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1154
1157
1158 // Handle constrained floating-point operations of vector.
1159 // The predictor is `hasVSX` because altivec instruction has
1160 // no exception but VSX vector instruction has.
1174
1188
1189 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1190 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1191
1192 for (MVT FPT : MVT::fp_valuetypes())
1193 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1194
1195 // Expand the SELECT to SELECT_CC
1197
1198 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1199 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1200
1201 // No implementation for these ops for PowerPC.
1203 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1204 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1205 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1207 setOperationAction(ISD::FREM, MVT::f128, Expand);
1208 }
1209
1210 if (Subtarget.hasP8Altivec()) {
1211 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1212 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1213 }
1214
1215 if (Subtarget.hasP9Vector()) {
1218
1219 // Test data class instructions store results in CR bits.
1220 if (Subtarget.useCRBits()) {
1224 }
1225
1226 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1227 // SRL, but not for SRA because of the instructions available:
1228 // VS{RL} and VS{RL}O.
1229 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1230 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1231 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1232
1233 setOperationAction(ISD::FADD, MVT::f128, Legal);
1234 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1235 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1236 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1238
1239 setOperationAction(ISD::FMA, MVT::f128, Legal);
1246
1248 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1250 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1253
1257
1258 // Handle constrained floating-point operations of fp128
1275 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1276 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1277 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1278 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1279 } else if (Subtarget.hasVSX()) {
1282
1283 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1284 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1285
1286 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1287 // fp_to_uint and int_to_fp.
1290
1291 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1292 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1293 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1294 setOperationAction(ISD::FABS, MVT::f128, Expand);
1296 setOperationAction(ISD::FMA, MVT::f128, Expand);
1298
1299 // Expand the fp_extend if the target type is fp128.
1302
1303 // Expand the fp_round if the source type is fp128.
1304 for (MVT VT : {MVT::f32, MVT::f64}) {
1307 }
1308
1313
1314 // Lower following f128 select_cc pattern:
1315 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1317
1318 // We need to handle f128 SELECT_CC with integer result type.
1320 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1321 }
1322
1323 if (Subtarget.hasP9Altivec()) {
1324 if (Subtarget.isISA3_1()) {
1329 } else {
1332 }
1340
1341 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1342 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1343 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1344 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1345 }
1346
1347 if (Subtarget.hasP10Vector()) {
1349 }
1350 }
1351
1352 if (Subtarget.pairedVectorMemops()) {
1353 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1354 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1355 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1356 }
1357 if (Subtarget.hasMMA()) {
1358 if (Subtarget.isISAFuture())
1359 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1360 else
1361 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1362 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1363 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1365 }
1366
1367 if (Subtarget.has64BitSupport())
1369
1370 if (Subtarget.isISA3_1())
1371 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1372
1373 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1374
1375 if (!isPPC64) {
1378 }
1379
1384 }
1385
1387
1388 if (Subtarget.hasAltivec()) {
1389 // Altivec instructions set fields to all zeros or all ones.
1391 }
1392
1395 else if (isPPC64)
1397 else
1399
1400 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1401
1402 // We have target-specific dag combine patterns for the following nodes:
1405 if (Subtarget.hasFPCVT())
1408 if (Subtarget.useCRBits())
1412
1414
1416
1417 if (Subtarget.useCRBits()) {
1419 }
1420
1421 setLibcallName(RTLIB::LOG_F128, "logf128");
1422 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1423 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1424 setLibcallName(RTLIB::EXP_F128, "expf128");
1425 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1426 setLibcallName(RTLIB::SIN_F128, "sinf128");
1427 setLibcallName(RTLIB::COS_F128, "cosf128");
1428 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1429 setLibcallName(RTLIB::POW_F128, "powf128");
1430 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1431 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1432 setLibcallName(RTLIB::REM_F128, "fmodf128");
1433 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1434 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1435 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1436 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1437 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1438 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1439 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1440 setLibcallName(RTLIB::RINT_F128, "rintf128");
1441 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1442 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1443 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1444 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1445 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1446
1447 if (Subtarget.isAIXABI()) {
1448 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1449 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1450 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1451 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1452 }
1453
1454 // With 32 condition bits, we don't need to sink (and duplicate) compares
1455 // aggressively in CodeGenPrep.
1456 if (Subtarget.useCRBits()) {
1459 }
1460
1461 // TODO: The default entry number is set to 64. This stops most jump table
1462 // generation on PPC. But it is good for current PPC HWs because the indirect
1463 // branch instruction mtctr to the jump table may lead to bad branch predict.
1464 // Re-evaluate this value on future HWs that can do better with mtctr.
1466
1468
1469 switch (Subtarget.getCPUDirective()) {
1470 default: break;
1471 case PPC::DIR_970:
1472 case PPC::DIR_A2:
1473 case PPC::DIR_E500:
1474 case PPC::DIR_E500mc:
1475 case PPC::DIR_E5500:
1476 case PPC::DIR_PWR4:
1477 case PPC::DIR_PWR5:
1478 case PPC::DIR_PWR5X:
1479 case PPC::DIR_PWR6:
1480 case PPC::DIR_PWR6X:
1481 case PPC::DIR_PWR7:
1482 case PPC::DIR_PWR8:
1483 case PPC::DIR_PWR9:
1484 case PPC::DIR_PWR10:
1485 case PPC::DIR_PWR11:
1489 break;
1490 }
1491
1492 if (Subtarget.enableMachineScheduler())
1494 else
1496
1498
1499 // The Freescale cores do better with aggressive inlining of memcpy and
1500 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1501 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1502 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1503 MaxStoresPerMemset = 32;
1505 MaxStoresPerMemcpy = 32;
1509 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1510 // The A2 also benefits from (very) aggressive inlining of memcpy and
1511 // friends. The overhead of a the function call, even when warm, can be
1512 // over one hundred cycles.
1513 MaxStoresPerMemset = 128;
1514 MaxStoresPerMemcpy = 128;
1515 MaxStoresPerMemmove = 128;
1516 MaxLoadsPerMemcmp = 128;
1517 } else {
1520 }
1521
1522 IsStrictFPEnabled = true;
1523
1524 // Let the subtarget (CPU) decide if a predictable select is more expensive
1525 // than the corresponding branch. This information is used in CGP to decide
1526 // when to convert selects into branches.
1528
1530}
1531
1532// *********************************** NOTE ************************************
1533// For selecting load and store instructions, the addressing modes are defined
1534// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1535// patterns to match the load the store instructions.
1536//
1537// The TD definitions for the addressing modes correspond to their respective
1538// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1539// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1540// address mode flags of a particular node. Afterwards, the computed address
1541// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1542// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1543// accordingly, based on the preferred addressing mode.
1544//
1545// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1546// MemOpFlags contains all the possible flags that can be used to compute the
1547// optimal addressing mode for load and store instructions.
1548// AddrMode contains all the possible load and store addressing modes available
1549// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1550//
1551// When adding new load and store instructions, it is possible that new address
1552// flags may need to be added into MemOpFlags, and a new addressing mode will
1553// need to be added to AddrMode. An entry of the new addressing mode (consisting
1554// of the minimal and main distinguishing address flags for the new load/store
1555// instructions) will need to be added into initializeAddrModeMap() below.
1556// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1557// need to be updated to account for selecting the optimal addressing mode.
1558// *****************************************************************************
1559/// Initialize the map that relates the different addressing modes of the load
1560/// and store instructions to a set of flags. This ensures the load/store
1561/// instruction is correctly matched during instruction selection.
1562void PPCTargetLowering::initializeAddrModeMap() {
1563 AddrModesMap[PPC::AM_DForm] = {
1564 // LWZ, STW
1569 // LBZ, LHZ, STB, STH
1574 // LHA
1579 // LFS, LFD, STFS, STFD
1584 };
1585 AddrModesMap[PPC::AM_DSForm] = {
1586 // LWA
1590 // LD, STD
1594 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1598 };
1599 AddrModesMap[PPC::AM_DQForm] = {
1600 // LXV, STXV
1604 };
1605 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1607 // TODO: Add mapping for quadword load/store.
1608}
1609
1610/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1611/// the desired ByVal argument alignment.
1612static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1613 if (MaxAlign == MaxMaxAlign)
1614 return;
1615 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1616 if (MaxMaxAlign >= 32 &&
1617 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1618 MaxAlign = Align(32);
1619 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1620 MaxAlign < 16)
1621 MaxAlign = Align(16);
1622 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1623 Align EltAlign;
1624 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1625 if (EltAlign > MaxAlign)
1626 MaxAlign = EltAlign;
1627 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1628 for (auto *EltTy : STy->elements()) {
1629 Align EltAlign;
1630 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1631 if (EltAlign > MaxAlign)
1632 MaxAlign = EltAlign;
1633 if (MaxAlign == MaxMaxAlign)
1634 break;
1635 }
1636 }
1637}
1638
1639/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1640/// function arguments in the caller parameter area.
1642 const DataLayout &DL) const {
1643 // 16byte and wider vectors are passed on 16byte boundary.
1644 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1645 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1646 if (Subtarget.hasAltivec())
1647 getMaxByValAlign(Ty, Alignment, Align(16));
1648 return Alignment.value();
1649}
1650
1652 return Subtarget.useSoftFloat();
1653}
1654
1656 return Subtarget.hasSPE();
1657}
1658
1660 return VT.isScalarInteger();
1661}
1662
1664 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1665 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1666 return false;
1667
1668 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1669 if (VTy->getScalarType()->isIntegerTy()) {
1670 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1671 if (ElemSizeInBits == 32) {
1672 Index = Subtarget.isLittleEndian() ? 2 : 1;
1673 return true;
1674 }
1675 if (ElemSizeInBits == 64) {
1676 Index = Subtarget.isLittleEndian() ? 1 : 0;
1677 return true;
1678 }
1679 }
1680 }
1681 return false;
1682}
1683
1684const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1685 switch ((PPCISD::NodeType)Opcode) {
1686 case PPCISD::FIRST_NUMBER: break;
1687 case PPCISD::FSEL: return "PPCISD::FSEL";
1688 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1689 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1690 case PPCISD::FCFID: return "PPCISD::FCFID";
1691 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1692 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1693 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1694 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1695 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1696 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1697 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1698 case PPCISD::FRE: return "PPCISD::FRE";
1699 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1700 case PPCISD::FTSQRT:
1701 return "PPCISD::FTSQRT";
1702 case PPCISD::FSQRT:
1703 return "PPCISD::FSQRT";
1704 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1705 case PPCISD::VPERM: return "PPCISD::VPERM";
1706 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1708 return "PPCISD::XXSPLTI_SP_TO_DP";
1710 return "PPCISD::XXSPLTI32DX";
1711 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1712 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1713 case PPCISD::XXPERM:
1714 return "PPCISD::XXPERM";
1715 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1716 case PPCISD::CMPB: return "PPCISD::CMPB";
1717 case PPCISD::Hi: return "PPCISD::Hi";
1718 case PPCISD::Lo: return "PPCISD::Lo";
1719 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1720 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1721 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1722 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1723 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1724 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1725 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1726 case PPCISD::SRL: return "PPCISD::SRL";
1727 case PPCISD::SRA: return "PPCISD::SRA";
1728 case PPCISD::SHL: return "PPCISD::SHL";
1729 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1730 case PPCISD::CALL: return "PPCISD::CALL";
1731 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1732 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1733 case PPCISD::CALL_RM:
1734 return "PPCISD::CALL_RM";
1736 return "PPCISD::CALL_NOP_RM";
1738 return "PPCISD::CALL_NOTOC_RM";
1739 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1740 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1741 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1742 case PPCISD::BCTRL_RM:
1743 return "PPCISD::BCTRL_RM";
1745 return "PPCISD::BCTRL_LOAD_TOC_RM";
1746 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1747 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1748 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1749 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1750 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1751 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1752 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1753 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1754 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1755 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1757 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1759 return "PPCISD::ANDI_rec_1_EQ_BIT";
1761 return "PPCISD::ANDI_rec_1_GT_BIT";
1762 case PPCISD::VCMP: return "PPCISD::VCMP";
1763 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1764 case PPCISD::LBRX: return "PPCISD::LBRX";
1765 case PPCISD::STBRX: return "PPCISD::STBRX";
1766 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1767 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1768 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1769 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1770 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1771 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1772 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1773 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1774 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1776 return "PPCISD::ST_VSR_SCAL_INT";
1777 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1778 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1779 case PPCISD::BDZ: return "PPCISD::BDZ";
1780 case PPCISD::MFFS: return "PPCISD::MFFS";
1781 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1782 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1783 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1784 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1785 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1786 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1787 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1788 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1789 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1790 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1791 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1792 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1793 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1794 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1795 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1796 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1797 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1798 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1799 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1800 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1801 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1802 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1803 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1805 return "PPCISD::PADDI_DTPREL";
1806 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1807 case PPCISD::SC: return "PPCISD::SC";
1808 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1809 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1810 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1811 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1812 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1813 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1814 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1815 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1816 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1817 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1818 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1819 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1821 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1823 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1824 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1825 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1826 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1827 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1828 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1829 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1830 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1831 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1833 return "PPCISD::STRICT_FADDRTZ";
1835 return "PPCISD::STRICT_FCTIDZ";
1837 return "PPCISD::STRICT_FCTIWZ";
1839 return "PPCISD::STRICT_FCTIDUZ";
1841 return "PPCISD::STRICT_FCTIWUZ";
1843 return "PPCISD::STRICT_FCFID";
1845 return "PPCISD::STRICT_FCFIDU";
1847 return "PPCISD::STRICT_FCFIDS";
1849 return "PPCISD::STRICT_FCFIDUS";
1850 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1851 case PPCISD::STORE_COND:
1852 return "PPCISD::STORE_COND";
1853 }
1854 return nullptr;
1855}
1856
1858 EVT VT) const {
1859 if (!VT.isVector())
1860 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1861
1863}
1864
1866 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1867 return true;
1868}
1869
1870//===----------------------------------------------------------------------===//
1871// Node matching predicates, for use by the tblgen matching code.
1872//===----------------------------------------------------------------------===//
1873
1874/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1876 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1877 return CFP->getValueAPF().isZero();
1878 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1879 // Maybe this has already been legalized into the constant pool?
1880 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1881 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1882 return CFP->getValueAPF().isZero();
1883 }
1884 return false;
1885}
1886
1887/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1888/// true if Op is undef or if it matches the specified value.
1889static bool isConstantOrUndef(int Op, int Val) {
1890 return Op < 0 || Op == Val;
1891}
1892
1893/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1894/// VPKUHUM instruction.
1895/// The ShuffleKind distinguishes between big-endian operations with
1896/// two different inputs (0), either-endian operations with two identical
1897/// inputs (1), and little-endian operations with two different inputs (2).
1898/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1900 SelectionDAG &DAG) {
1901 bool IsLE = DAG.getDataLayout().isLittleEndian();
1902 if (ShuffleKind == 0) {
1903 if (IsLE)
1904 return false;
1905 for (unsigned i = 0; i != 16; ++i)
1906 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1907 return false;
1908 } else if (ShuffleKind == 2) {
1909 if (!IsLE)
1910 return false;
1911 for (unsigned i = 0; i != 16; ++i)
1912 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1913 return false;
1914 } else if (ShuffleKind == 1) {
1915 unsigned j = IsLE ? 0 : 1;
1916 for (unsigned i = 0; i != 8; ++i)
1917 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1918 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1919 return false;
1920 }
1921 return true;
1922}
1923
1924/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1925/// VPKUWUM instruction.
1926/// The ShuffleKind distinguishes between big-endian operations with
1927/// two different inputs (0), either-endian operations with two identical
1928/// inputs (1), and little-endian operations with two different inputs (2).
1929/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1931 SelectionDAG &DAG) {
1932 bool IsLE = DAG.getDataLayout().isLittleEndian();
1933 if (ShuffleKind == 0) {
1934 if (IsLE)
1935 return false;
1936 for (unsigned i = 0; i != 16; i += 2)
1937 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1938 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1939 return false;
1940 } else if (ShuffleKind == 2) {
1941 if (!IsLE)
1942 return false;
1943 for (unsigned i = 0; i != 16; i += 2)
1944 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1945 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1946 return false;
1947 } else if (ShuffleKind == 1) {
1948 unsigned j = IsLE ? 0 : 2;
1949 for (unsigned i = 0; i != 8; i += 2)
1950 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1951 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1952 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1953 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1954 return false;
1955 }
1956 return true;
1957}
1958
1959/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1960/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1961/// current subtarget.
1962///
1963/// The ShuffleKind distinguishes between big-endian operations with
1964/// two different inputs (0), either-endian operations with two identical
1965/// inputs (1), and little-endian operations with two different inputs (2).
1966/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1968 SelectionDAG &DAG) {
1969 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1970 if (!Subtarget.hasP8Vector())
1971 return false;
1972
1973 bool IsLE = DAG.getDataLayout().isLittleEndian();
1974 if (ShuffleKind == 0) {
1975 if (IsLE)
1976 return false;
1977 for (unsigned i = 0; i != 16; i += 4)
1978 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1979 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1980 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1981 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1982 return false;
1983 } else if (ShuffleKind == 2) {
1984 if (!IsLE)
1985 return false;
1986 for (unsigned i = 0; i != 16; i += 4)
1987 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1988 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1989 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1990 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1991 return false;
1992 } else if (ShuffleKind == 1) {
1993 unsigned j = IsLE ? 0 : 4;
1994 for (unsigned i = 0; i != 8; i += 4)
1995 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1996 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1997 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1998 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1999 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
2000 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
2001 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
2002 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
2003 return false;
2004 }
2005 return true;
2006}
2007
2008/// isVMerge - Common function, used to match vmrg* shuffles.
2009///
2010static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2011 unsigned LHSStart, unsigned RHSStart) {
2012 if (N->getValueType(0) != MVT::v16i8)
2013 return false;
2014 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2015 "Unsupported merge size!");
2016
2017 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2018 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2019 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2020 LHSStart+j+i*UnitSize) ||
2021 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2022 RHSStart+j+i*UnitSize))
2023 return false;
2024 }
2025 return true;
2026}
2027
2028/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2029/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2030/// The ShuffleKind distinguishes between big-endian merges with two
2031/// different inputs (0), either-endian merges with two identical inputs (1),
2032/// and little-endian merges with two different inputs (2). For the latter,
2033/// the input operands are swapped (see PPCInstrAltivec.td).
2035 unsigned ShuffleKind, SelectionDAG &DAG) {
2036 if (DAG.getDataLayout().isLittleEndian()) {
2037 if (ShuffleKind == 1) // unary
2038 return isVMerge(N, UnitSize, 0, 0);
2039 else if (ShuffleKind == 2) // swapped
2040 return isVMerge(N, UnitSize, 0, 16);
2041 else
2042 return false;
2043 } else {
2044 if (ShuffleKind == 1) // unary
2045 return isVMerge(N, UnitSize, 8, 8);
2046 else if (ShuffleKind == 0) // normal
2047 return isVMerge(N, UnitSize, 8, 24);
2048 else
2049 return false;
2050 }
2051}
2052
2053/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2054/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2055/// The ShuffleKind distinguishes between big-endian merges with two
2056/// different inputs (0), either-endian merges with two identical inputs (1),
2057/// and little-endian merges with two different inputs (2). For the latter,
2058/// the input operands are swapped (see PPCInstrAltivec.td).
2060 unsigned ShuffleKind, SelectionDAG &DAG) {
2061 if (DAG.getDataLayout().isLittleEndian()) {
2062 if (ShuffleKind == 1) // unary
2063 return isVMerge(N, UnitSize, 8, 8);
2064 else if (ShuffleKind == 2) // swapped
2065 return isVMerge(N, UnitSize, 8, 24);
2066 else
2067 return false;
2068 } else {
2069 if (ShuffleKind == 1) // unary
2070 return isVMerge(N, UnitSize, 0, 0);
2071 else if (ShuffleKind == 0) // normal
2072 return isVMerge(N, UnitSize, 0, 16);
2073 else
2074 return false;
2075 }
2076}
2077
2078/**
2079 * Common function used to match vmrgew and vmrgow shuffles
2080 *
2081 * The indexOffset determines whether to look for even or odd words in
2082 * the shuffle mask. This is based on the of the endianness of the target
2083 * machine.
2084 * - Little Endian:
2085 * - Use offset of 0 to check for odd elements
2086 * - Use offset of 4 to check for even elements
2087 * - Big Endian:
2088 * - Use offset of 0 to check for even elements
2089 * - Use offset of 4 to check for odd elements
2090 * A detailed description of the vector element ordering for little endian and
2091 * big endian can be found at
2092 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2093 * Targeting your applications - what little endian and big endian IBM XL C/C++
2094 * compiler differences mean to you
2095 *
2096 * The mask to the shuffle vector instruction specifies the indices of the
2097 * elements from the two input vectors to place in the result. The elements are
2098 * numbered in array-access order, starting with the first vector. These vectors
2099 * are always of type v16i8, thus each vector will contain 16 elements of size
2100 * 8. More info on the shuffle vector can be found in the
2101 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2102 * Language Reference.
2103 *
2104 * The RHSStartValue indicates whether the same input vectors are used (unary)
2105 * or two different input vectors are used, based on the following:
2106 * - If the instruction uses the same vector for both inputs, the range of the
2107 * indices will be 0 to 15. In this case, the RHSStart value passed should
2108 * be 0.
2109 * - If the instruction has two different vectors then the range of the
2110 * indices will be 0 to 31. In this case, the RHSStart value passed should
2111 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2112 * to 31 specify elements in the second vector).
2113 *
2114 * \param[in] N The shuffle vector SD Node to analyze
2115 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2116 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2117 * vector to the shuffle_vector instruction
2118 * \return true iff this shuffle vector represents an even or odd word merge
2119 */
2120static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2121 unsigned RHSStartValue) {
2122 if (N->getValueType(0) != MVT::v16i8)
2123 return false;
2124
2125 for (unsigned i = 0; i < 2; ++i)
2126 for (unsigned j = 0; j < 4; ++j)
2127 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2128 i*RHSStartValue+j+IndexOffset) ||
2129 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2130 i*RHSStartValue+j+IndexOffset+8))
2131 return false;
2132 return true;
2133}
2134
2135/**
2136 * Determine if the specified shuffle mask is suitable for the vmrgew or
2137 * vmrgow instructions.
2138 *
2139 * \param[in] N The shuffle vector SD Node to analyze
2140 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2141 * \param[in] ShuffleKind Identify the type of merge:
2142 * - 0 = big-endian merge with two different inputs;
2143 * - 1 = either-endian merge with two identical inputs;
2144 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2145 * little-endian merges).
2146 * \param[in] DAG The current SelectionDAG
2147 * \return true iff this shuffle mask
2148 */
2150 unsigned ShuffleKind, SelectionDAG &DAG) {
2151 if (DAG.getDataLayout().isLittleEndian()) {
2152 unsigned indexOffset = CheckEven ? 4 : 0;
2153 if (ShuffleKind == 1) // Unary
2154 return isVMerge(N, indexOffset, 0);
2155 else if (ShuffleKind == 2) // swapped
2156 return isVMerge(N, indexOffset, 16);
2157 else
2158 return false;
2159 }
2160 else {
2161 unsigned indexOffset = CheckEven ? 0 : 4;
2162 if (ShuffleKind == 1) // Unary
2163 return isVMerge(N, indexOffset, 0);
2164 else if (ShuffleKind == 0) // Normal
2165 return isVMerge(N, indexOffset, 16);
2166 else
2167 return false;
2168 }
2169 return false;
2170}
2171
2172/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2173/// amount, otherwise return -1.
2174/// The ShuffleKind distinguishes between big-endian operations with two
2175/// different inputs (0), either-endian operations with two identical inputs
2176/// (1), and little-endian operations with two different inputs (2). For the
2177/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2178int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2179 SelectionDAG &DAG) {
2180 if (N->getValueType(0) != MVT::v16i8)
2181 return -1;
2182
2183 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2184
2185 // Find the first non-undef value in the shuffle mask.
2186 unsigned i;
2187 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2188 /*search*/;
2189
2190 if (i == 16) return -1; // all undef.
2191
2192 // Otherwise, check to see if the rest of the elements are consecutively
2193 // numbered from this value.
2194 unsigned ShiftAmt = SVOp->getMaskElt(i);
2195 if (ShiftAmt < i) return -1;
2196
2197 ShiftAmt -= i;
2198 bool isLE = DAG.getDataLayout().isLittleEndian();
2199
2200 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2201 // Check the rest of the elements to see if they are consecutive.
2202 for (++i; i != 16; ++i)
2203 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2204 return -1;
2205 } else if (ShuffleKind == 1) {
2206 // Check the rest of the elements to see if they are consecutive.
2207 for (++i; i != 16; ++i)
2208 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2209 return -1;
2210 } else
2211 return -1;
2212
2213 if (isLE)
2214 ShiftAmt = 16 - ShiftAmt;
2215
2216 return ShiftAmt;
2217}
2218
2219/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2220/// specifies a splat of a single element that is suitable for input to
2221/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2223 EVT VT = N->getValueType(0);
2224 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2225 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2226
2227 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2228 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2229
2230 // The consecutive indices need to specify an element, not part of two
2231 // different elements. So abandon ship early if this isn't the case.
2232 if (N->getMaskElt(0) % EltSize != 0)
2233 return false;
2234
2235 // This is a splat operation if each element of the permute is the same, and
2236 // if the value doesn't reference the second vector.
2237 unsigned ElementBase = N->getMaskElt(0);
2238
2239 // FIXME: Handle UNDEF elements too!
2240 if (ElementBase >= 16)
2241 return false;
2242
2243 // Check that the indices are consecutive, in the case of a multi-byte element
2244 // splatted with a v16i8 mask.
2245 for (unsigned i = 1; i != EltSize; ++i)
2246 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2247 return false;
2248
2249 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2250 if (N->getMaskElt(i) < 0) continue;
2251 for (unsigned j = 0; j != EltSize; ++j)
2252 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2253 return false;
2254 }
2255 return true;
2256}
2257
2258/// Check that the mask is shuffling N byte elements. Within each N byte
2259/// element of the mask, the indices could be either in increasing or
2260/// decreasing order as long as they are consecutive.
2261/// \param[in] N the shuffle vector SD Node to analyze
2262/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2263/// Word/DoubleWord/QuadWord).
2264/// \param[in] StepLen the delta indices number among the N byte element, if
2265/// the mask is in increasing/decreasing order then it is 1/-1.
2266/// \return true iff the mask is shuffling N byte elements.
2267static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2268 int StepLen) {
2269 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2270 "Unexpected element width.");
2271 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2272
2273 unsigned NumOfElem = 16 / Width;
2274 unsigned MaskVal[16]; // Width is never greater than 16
2275 for (unsigned i = 0; i < NumOfElem; ++i) {
2276 MaskVal[0] = N->getMaskElt(i * Width);
2277 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2278 return false;
2279 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2280 return false;
2281 }
2282
2283 for (unsigned int j = 1; j < Width; ++j) {
2284 MaskVal[j] = N->getMaskElt(i * Width + j);
2285 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2286 return false;
2287 }
2288 }
2289 }
2290
2291 return true;
2292}
2293
2294bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2295 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2296 if (!isNByteElemShuffleMask(N, 4, 1))
2297 return false;
2298
2299 // Now we look at mask elements 0,4,8,12
2300 unsigned M0 = N->getMaskElt(0) / 4;
2301 unsigned M1 = N->getMaskElt(4) / 4;
2302 unsigned M2 = N->getMaskElt(8) / 4;
2303 unsigned M3 = N->getMaskElt(12) / 4;
2304 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2305 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2306
2307 // Below, let H and L be arbitrary elements of the shuffle mask
2308 // where H is in the range [4,7] and L is in the range [0,3].
2309 // H, 1, 2, 3 or L, 5, 6, 7
2310 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2311 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2312 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2313 InsertAtByte = IsLE ? 12 : 0;
2314 Swap = M0 < 4;
2315 return true;
2316 }
2317 // 0, H, 2, 3 or 4, L, 6, 7
2318 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2319 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2320 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2321 InsertAtByte = IsLE ? 8 : 4;
2322 Swap = M1 < 4;
2323 return true;
2324 }
2325 // 0, 1, H, 3 or 4, 5, L, 7
2326 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2327 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2328 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2329 InsertAtByte = IsLE ? 4 : 8;
2330 Swap = M2 < 4;
2331 return true;
2332 }
2333 // 0, 1, 2, H or 4, 5, 6, L
2334 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2335 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2336 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2337 InsertAtByte = IsLE ? 0 : 12;
2338 Swap = M3 < 4;
2339 return true;
2340 }
2341
2342 // If both vector operands for the shuffle are the same vector, the mask will
2343 // contain only elements from the first one and the second one will be undef.
2344 if (N->getOperand(1).isUndef()) {
2345 ShiftElts = 0;
2346 Swap = true;
2347 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2348 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2349 InsertAtByte = IsLE ? 12 : 0;
2350 return true;
2351 }
2352 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2353 InsertAtByte = IsLE ? 8 : 4;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2357 InsertAtByte = IsLE ? 4 : 8;
2358 return true;
2359 }
2360 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2361 InsertAtByte = IsLE ? 0 : 12;
2362 return true;
2363 }
2364 }
2365
2366 return false;
2367}
2368
2370 bool &Swap, bool IsLE) {
2371 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2372 // Ensure each byte index of the word is consecutive.
2373 if (!isNByteElemShuffleMask(N, 4, 1))
2374 return false;
2375
2376 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2377 unsigned M0 = N->getMaskElt(0) / 4;
2378 unsigned M1 = N->getMaskElt(4) / 4;
2379 unsigned M2 = N->getMaskElt(8) / 4;
2380 unsigned M3 = N->getMaskElt(12) / 4;
2381
2382 // If both vector operands for the shuffle are the same vector, the mask will
2383 // contain only elements from the first one and the second one will be undef.
2384 if (N->getOperand(1).isUndef()) {
2385 assert(M0 < 4 && "Indexing into an undef vector?");
2386 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2387 return false;
2388
2389 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2390 Swap = false;
2391 return true;
2392 }
2393
2394 // Ensure each word index of the ShuffleVector Mask is consecutive.
2395 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2396 return false;
2397
2398 if (IsLE) {
2399 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2400 // Input vectors don't need to be swapped if the leading element
2401 // of the result is one of the 3 left elements of the second vector
2402 // (or if there is no shift to be done at all).
2403 Swap = false;
2404 ShiftElts = (8 - M0) % 8;
2405 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2406 // Input vectors need to be swapped if the leading element
2407 // of the result is one of the 3 left elements of the first vector
2408 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2409 Swap = true;
2410 ShiftElts = (4 - M0) % 4;
2411 }
2412
2413 return true;
2414 } else { // BE
2415 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2416 // Input vectors don't need to be swapped if the leading element
2417 // of the result is one of the 4 elements of the first vector.
2418 Swap = false;
2419 ShiftElts = M0;
2420 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2421 // Input vectors need to be swapped if the leading element
2422 // of the result is one of the 4 elements of the right vector.
2423 Swap = true;
2424 ShiftElts = M0 - 4;
2425 }
2426
2427 return true;
2428 }
2429}
2430
2432 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2433
2434 if (!isNByteElemShuffleMask(N, Width, -1))
2435 return false;
2436
2437 for (int i = 0; i < 16; i += Width)
2438 if (N->getMaskElt(i) != i + Width - 1)
2439 return false;
2440
2441 return true;
2442}
2443
2445 return isXXBRShuffleMaskHelper(N, 2);
2446}
2447
2449 return isXXBRShuffleMaskHelper(N, 4);
2450}
2451
2453 return isXXBRShuffleMaskHelper(N, 8);
2454}
2455
2457 return isXXBRShuffleMaskHelper(N, 16);
2458}
2459
2460/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2461/// if the inputs to the instruction should be swapped and set \p DM to the
2462/// value for the immediate.
2463/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2464/// AND element 0 of the result comes from the first input (LE) or second input
2465/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2466/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2467/// mask.
2469 bool &Swap, bool IsLE) {
2470 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2471
2472 // Ensure each byte index of the double word is consecutive.
2473 if (!isNByteElemShuffleMask(N, 8, 1))
2474 return false;
2475
2476 unsigned M0 = N->getMaskElt(0) / 8;
2477 unsigned M1 = N->getMaskElt(8) / 8;
2478 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2479
2480 // If both vector operands for the shuffle are the same vector, the mask will
2481 // contain only elements from the first one and the second one will be undef.
2482 if (N->getOperand(1).isUndef()) {
2483 if ((M0 | M1) < 2) {
2484 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2485 Swap = false;
2486 return true;
2487 } else
2488 return false;
2489 }
2490
2491 if (IsLE) {
2492 if (M0 > 1 && M1 < 2) {
2493 Swap = false;
2494 } else if (M0 < 2 && M1 > 1) {
2495 M0 = (M0 + 2) % 4;
2496 M1 = (M1 + 2) % 4;
2497 Swap = true;
2498 } else
2499 return false;
2500
2501 // Note: if control flow comes here that means Swap is already set above
2502 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2503 return true;
2504 } else { // BE
2505 if (M0 < 2 && M1 > 1) {
2506 Swap = false;
2507 } else if (M0 > 1 && M1 < 2) {
2508 M0 = (M0 + 2) % 4;
2509 M1 = (M1 + 2) % 4;
2510 Swap = true;
2511 } else
2512 return false;
2513
2514 // Note: if control flow comes here that means Swap is already set above
2515 DM = (M0 << 1) + (M1 & 1);
2516 return true;
2517 }
2518}
2519
2520
2521/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2522/// appropriate for PPC mnemonics (which have a big endian bias - namely
2523/// elements are counted from the left of the vector register).
2524unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2525 SelectionDAG &DAG) {
2526 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2527 assert(isSplatShuffleMask(SVOp, EltSize));
2528 EVT VT = SVOp->getValueType(0);
2529
2530 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2531 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2532 : SVOp->getMaskElt(0);
2533
2534 if (DAG.getDataLayout().isLittleEndian())
2535 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2536 else
2537 return SVOp->getMaskElt(0) / EltSize;
2538}
2539
2540/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2541/// by using a vspltis[bhw] instruction of the specified element size, return
2542/// the constant being splatted. The ByteSize field indicates the number of
2543/// bytes of each element [124] -> [bhw].
2545 SDValue OpVal;
2546
2547 // If ByteSize of the splat is bigger than the element size of the
2548 // build_vector, then we have a case where we are checking for a splat where
2549 // multiple elements of the buildvector are folded together into a single
2550 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2551 unsigned EltSize = 16/N->getNumOperands();
2552 if (EltSize < ByteSize) {
2553 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2554 SDValue UniquedVals[4];
2555 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2556
2557 // See if all of the elements in the buildvector agree across.
2558 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2559 if (N->getOperand(i).isUndef()) continue;
2560 // If the element isn't a constant, bail fully out.
2561 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2562
2563 if (!UniquedVals[i&(Multiple-1)].getNode())
2564 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2565 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2566 return SDValue(); // no match.
2567 }
2568
2569 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2570 // either constant or undef values that are identical for each chunk. See
2571 // if these chunks can form into a larger vspltis*.
2572
2573 // Check to see if all of the leading entries are either 0 or -1. If
2574 // neither, then this won't fit into the immediate field.
2575 bool LeadingZero = true;
2576 bool LeadingOnes = true;
2577 for (unsigned i = 0; i != Multiple-1; ++i) {
2578 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2579
2580 LeadingZero &= isNullConstant(UniquedVals[i]);
2581 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2582 }
2583 // Finally, check the least significant entry.
2584 if (LeadingZero) {
2585 if (!UniquedVals[Multiple-1].getNode())
2586 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2587 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2588 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2589 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2590 }
2591 if (LeadingOnes) {
2592 if (!UniquedVals[Multiple-1].getNode())
2593 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2594 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2595 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2596 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2597 }
2598
2599 return SDValue();
2600 }
2601
2602 // Check to see if this buildvec has a single non-undef value in its elements.
2603 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2604 if (N->getOperand(i).isUndef()) continue;
2605 if (!OpVal.getNode())
2606 OpVal = N->getOperand(i);
2607 else if (OpVal != N->getOperand(i))
2608 return SDValue();
2609 }
2610
2611 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2612
2613 unsigned ValSizeInBytes = EltSize;
2614 uint64_t Value = 0;
2615 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2616 Value = CN->getZExtValue();
2617 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2618 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2619 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2620 }
2621
2622 // If the splat value is larger than the element value, then we can never do
2623 // this splat. The only case that we could fit the replicated bits into our
2624 // immediate field for would be zero, and we prefer to use vxor for it.
2625 if (ValSizeInBytes < ByteSize) return SDValue();
2626
2627 // If the element value is larger than the splat value, check if it consists
2628 // of a repeated bit pattern of size ByteSize.
2629 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2630 return SDValue();
2631
2632 // Properly sign extend the value.
2633 int MaskVal = SignExtend32(Value, ByteSize * 8);
2634
2635 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2636 if (MaskVal == 0) return SDValue();
2637
2638 // Finally, if this value fits in a 5 bit sext field, return it
2639 if (SignExtend32<5>(MaskVal) == MaskVal)
2640 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2641 return SDValue();
2642}
2643
2644//===----------------------------------------------------------------------===//
2645// Addressing Mode Selection
2646//===----------------------------------------------------------------------===//
2647
2648/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2649/// or 64-bit immediate, and if the value can be accurately represented as a
2650/// sign extension from a 16-bit value. If so, this returns true and the
2651/// immediate.
2652bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2653 if (!isa<ConstantSDNode>(N))
2654 return false;
2655
2656 Imm = (int16_t)N->getAsZExtVal();
2657 if (N->getValueType(0) == MVT::i32)
2658 return Imm == (int32_t)N->getAsZExtVal();
2659 else
2660 return Imm == (int64_t)N->getAsZExtVal();
2661}
2663 return isIntS16Immediate(Op.getNode(), Imm);
2664}
2665
2666/// Used when computing address flags for selecting loads and stores.
2667/// If we have an OR, check if the LHS and RHS are provably disjoint.
2668/// An OR of two provably disjoint values is equivalent to an ADD.
2669/// Most PPC load/store instructions compute the effective address as a sum,
2670/// so doing this conversion is useful.
2671static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2672 if (N.getOpcode() != ISD::OR)
2673 return false;
2674 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2675 if (!LHSKnown.Zero.getBoolValue())
2676 return false;
2677 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2678 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2679}
2680
2681/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2682/// be represented as an indexed [r+r] operation.
2684 SDValue &Index,
2685 SelectionDAG &DAG) const {
2686 for (SDNode *U : N->uses()) {
2687 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2688 if (Memop->getMemoryVT() == MVT::f64) {
2689 Base = N.getOperand(0);
2690 Index = N.getOperand(1);
2691 return true;
2692 }
2693 }
2694 }
2695 return false;
2696}
2697
2698/// isIntS34Immediate - This method tests if value of node given can be
2699/// accurately represented as a sign extension from a 34-bit value. If so,
2700/// this returns true and the immediate.
2701bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2702 if (!isa<ConstantSDNode>(N))
2703 return false;
2704
2705 Imm = (int64_t)N->getAsZExtVal();
2706 return isInt<34>(Imm);
2707}
2709 return isIntS34Immediate(Op.getNode(), Imm);
2710}
2711
2712/// SelectAddressRegReg - Given the specified addressed, check to see if it
2713/// can be represented as an indexed [r+r] operation. Returns false if it
2714/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2715/// non-zero and N can be represented by a base register plus a signed 16-bit
2716/// displacement, make a more precise judgement by checking (displacement % \p
2717/// EncodingAlignment).
2720 MaybeAlign EncodingAlignment) const {
2721 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2722 // a [pc+imm].
2724 return false;
2725
2726 int16_t Imm = 0;
2727 if (N.getOpcode() == ISD::ADD) {
2728 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2729 // SPE load/store can only handle 8-bit offsets.
2730 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2731 return true;
2732 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2733 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2734 return false; // r+i
2735 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2736 return false; // r+i
2737
2738 Base = N.getOperand(0);
2739 Index = N.getOperand(1);
2740 return true;
2741 } else if (N.getOpcode() == ISD::OR) {
2742 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2743 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2744 return false; // r+i can fold it if we can.
2745
2746 // If this is an or of disjoint bitfields, we can codegen this as an add
2747 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2748 // disjoint.
2749 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2750
2751 if (LHSKnown.Zero.getBoolValue()) {
2752 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2753 // If all of the bits are known zero on the LHS or RHS, the add won't
2754 // carry.
2755 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2756 Base = N.getOperand(0);
2757 Index = N.getOperand(1);
2758 return true;
2759 }
2760 }
2761 }
2762
2763 return false;
2764}
2765
2766// If we happen to be doing an i64 load or store into a stack slot that has
2767// less than a 4-byte alignment, then the frame-index elimination may need to
2768// use an indexed load or store instruction (because the offset may not be a
2769// multiple of 4). The extra register needed to hold the offset comes from the
2770// register scavenger, and it is possible that the scavenger will need to use
2771// an emergency spill slot. As a result, we need to make sure that a spill slot
2772// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2773// stack slot.
2774static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2775 // FIXME: This does not handle the LWA case.
2776 if (VT != MVT::i64)
2777 return;
2778
2779 // NOTE: We'll exclude negative FIs here, which come from argument
2780 // lowering, because there are no known test cases triggering this problem
2781 // using packed structures (or similar). We can remove this exclusion if
2782 // we find such a test case. The reason why this is so test-case driven is
2783 // because this entire 'fixup' is only to prevent crashes (from the
2784 // register scavenger) on not-really-valid inputs. For example, if we have:
2785 // %a = alloca i1
2786 // %b = bitcast i1* %a to i64*
2787 // store i64* a, i64 b
2788 // then the store should really be marked as 'align 1', but is not. If it
2789 // were marked as 'align 1' then the indexed form would have been
2790 // instruction-selected initially, and the problem this 'fixup' is preventing
2791 // won't happen regardless.
2792 if (FrameIdx < 0)
2793 return;
2794
2796 MachineFrameInfo &MFI = MF.getFrameInfo();
2797
2798 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2799 return;
2800
2801 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2802 FuncInfo->setHasNonRISpills();
2803}
2804
2805/// Returns true if the address N can be represented by a base register plus
2806/// a signed 16-bit displacement [r+imm], and if it is not better
2807/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2808/// displacements that are multiples of that value.
2810 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2811 MaybeAlign EncodingAlignment) const {
2812 // FIXME dl should come from parent load or store, not from address
2813 SDLoc dl(N);
2814
2815 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2816 // a [pc+imm].
2818 return false;
2819
2820 // If this can be more profitably realized as r+r, fail.
2821 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2822 return false;
2823
2824 if (N.getOpcode() == ISD::ADD) {
2825 int16_t imm = 0;
2826 if (isIntS16Immediate(N.getOperand(1), imm) &&
2827 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2828 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2829 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2830 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2831 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2832 } else {
2833 Base = N.getOperand(0);
2834 }
2835 return true; // [r+i]
2836 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2837 // Match LOAD (ADD (X, Lo(G))).
2838 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2839 "Cannot handle constant offsets yet!");
2840 Disp = N.getOperand(1).getOperand(0); // The global address.
2845 Base = N.getOperand(0);
2846 return true; // [&g+r]
2847 }
2848 } else if (N.getOpcode() == ISD::OR) {
2849 int16_t imm = 0;
2850 if (isIntS16Immediate(N.getOperand(1), imm) &&
2851 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2852 // If this is an or of disjoint bitfields, we can codegen this as an add
2853 // (for better address arithmetic) if the LHS and RHS of the OR are
2854 // provably disjoint.
2855 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2856
2857 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2858 // If all of the bits are known zero on the LHS or RHS, the add won't
2859 // carry.
2860 if (FrameIndexSDNode *FI =
2861 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2862 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2863 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2864 } else {
2865 Base = N.getOperand(0);
2866 }
2867 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2868 return true;
2869 }
2870 }
2871 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2872 // Loading from a constant address.
2873
2874 // If this address fits entirely in a 16-bit sext immediate field, codegen
2875 // this as "d, 0"
2876 int16_t Imm;
2877 if (isIntS16Immediate(CN, Imm) &&
2878 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2879 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2880 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2881 CN->getValueType(0));
2882 return true;
2883 }
2884
2885 // Handle 32-bit sext immediates with LIS + addr mode.
2886 if ((CN->getValueType(0) == MVT::i32 ||
2887 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2888 (!EncodingAlignment ||
2889 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2890 int Addr = (int)CN->getZExtValue();
2891
2892 // Otherwise, break this down into an LIS + disp.
2893 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2894
2895 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2896 MVT::i32);
2897 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2898 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2899 return true;
2900 }
2901 }
2902
2903 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2904 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2905 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2906 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2907 } else
2908 Base = N;
2909 return true; // [r+0]
2910}
2911
2912/// Similar to the 16-bit case but for instructions that take a 34-bit
2913/// displacement field (prefixed loads/stores).
2915 SDValue &Base,
2916 SelectionDAG &DAG) const {
2917 // Only on 64-bit targets.
2918 if (N.getValueType() != MVT::i64)
2919 return false;
2920
2921 SDLoc dl(N);
2922 int64_t Imm = 0;
2923
2924 if (N.getOpcode() == ISD::ADD) {
2925 if (!isIntS34Immediate(N.getOperand(1), Imm))
2926 return false;
2927 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2928 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2929 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2930 else
2931 Base = N.getOperand(0);
2932 return true;
2933 }
2934
2935 if (N.getOpcode() == ISD::OR) {
2936 if (!isIntS34Immediate(N.getOperand(1), Imm))
2937 return false;
2938 // If this is an or of disjoint bitfields, we can codegen this as an add
2939 // (for better address arithmetic) if the LHS and RHS of the OR are
2940 // provably disjoint.
2941 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2942 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2943 return false;
2944 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2945 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2946 else
2947 Base = N.getOperand(0);
2948 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2949 return true;
2950 }
2951
2952 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2953 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2954 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2955 return true;
2956 }
2957
2958 return false;
2959}
2960
2961/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2962/// represented as an indexed [r+r] operation.
2964 SDValue &Index,
2965 SelectionDAG &DAG) const {
2966 // Check to see if we can easily represent this as an [r+r] address. This
2967 // will fail if it thinks that the address is more profitably represented as
2968 // reg+imm, e.g. where imm = 0.
2969 if (SelectAddressRegReg(N, Base, Index, DAG))
2970 return true;
2971
2972 // If the address is the result of an add, we will utilize the fact that the
2973 // address calculation includes an implicit add. However, we can reduce
2974 // register pressure if we do not materialize a constant just for use as the
2975 // index register. We only get rid of the add if it is not an add of a
2976 // value and a 16-bit signed constant and both have a single use.
2977 int16_t imm = 0;
2978 if (N.getOpcode() == ISD::ADD &&
2979 (!isIntS16Immediate(N.getOperand(1), imm) ||
2980 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2981 Base = N.getOperand(0);
2982 Index = N.getOperand(1);
2983 return true;
2984 }
2985
2986 // Otherwise, do it the hard way, using R0 as the base register.
2987 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2988 N.getValueType());
2989 Index = N;
2990 return true;
2991}
2992
2993template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2994 Ty *PCRelCand = dyn_cast<Ty>(N);
2995 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2996}
2997
2998/// Returns true if this address is a PC Relative address.
2999/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3000/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3002 // This is a materialize PC Relative node. Always select this as PC Relative.
3003 Base = N;
3004 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3005 return true;
3006 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3007 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3008 isValidPCRelNode<JumpTableSDNode>(N) ||
3009 isValidPCRelNode<BlockAddressSDNode>(N))
3010 return true;
3011 return false;
3012}
3013
3014/// Returns true if we should use a direct load into vector instruction
3015/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3016static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3017
3018 // If there are any other uses other than scalar to vector, then we should
3019 // keep it as a scalar load -> direct move pattern to prevent multiple
3020 // loads.
3021 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3022 if (!LD)
3023 return false;
3024
3025 EVT MemVT = LD->getMemoryVT();
3026 if (!MemVT.isSimple())
3027 return false;
3028 switch(MemVT.getSimpleVT().SimpleTy) {
3029 case MVT::i64:
3030 break;
3031 case MVT::i32:
3032 if (!ST.hasP8Vector())
3033 return false;
3034 break;
3035 case MVT::i16:
3036 case MVT::i8:
3037 if (!ST.hasP9Vector())
3038 return false;
3039 break;
3040 default:
3041 return false;
3042 }
3043
3044 SDValue LoadedVal(N, 0);
3045 if (!LoadedVal.hasOneUse())
3046 return false;
3047
3048 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3049 UI != UE; ++UI)
3050 if (UI.getUse().get().getResNo() == 0 &&
3051 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3052 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3053 return false;
3054
3055 return true;
3056}
3057
3058/// getPreIndexedAddressParts - returns true by value, base pointer and
3059/// offset pointer and addressing mode by reference if the node's address
3060/// can be legally represented as pre-indexed load / store address.
3062 SDValue &Offset,
3064 SelectionDAG &DAG) const {
3065 if (DisablePPCPreinc) return false;
3066
3067 bool isLoad = true;
3068 SDValue Ptr;
3069 EVT VT;
3070 Align Alignment;
3071 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3072 Ptr = LD->getBasePtr();
3073 VT = LD->getMemoryVT();
3074 Alignment = LD->getAlign();
3075 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3076 Ptr = ST->getBasePtr();
3077 VT = ST->getMemoryVT();
3078 Alignment = ST->getAlign();
3079 isLoad = false;
3080 } else
3081 return false;
3082
3083 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3084 // instructions because we can fold these into a more efficient instruction
3085 // instead, (such as LXSD).
3086 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3087 return false;
3088 }
3089
3090 // PowerPC doesn't have preinc load/store instructions for vectors
3091 if (VT.isVector())
3092 return false;
3093
3094 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3095 // Common code will reject creating a pre-inc form if the base pointer
3096 // is a frame index, or if N is a store and the base pointer is either
3097 // the same as or a predecessor of the value being stored. Check for
3098 // those situations here, and try with swapped Base/Offset instead.
3099 bool Swap = false;
3100
3101 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3102 Swap = true;
3103 else if (!isLoad) {
3104 SDValue Val = cast<StoreSDNode>(N)->getValue();
3105 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3106 Swap = true;
3107 }
3108
3109 if (Swap)
3111
3112 AM = ISD::PRE_INC;
3113 return true;
3114 }
3115
3116 // LDU/STU can only handle immediates that are a multiple of 4.
3117 if (VT != MVT::i64) {
3118 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3119 return false;
3120 } else {
3121 // LDU/STU need an address with at least 4-byte alignment.
3122 if (Alignment < Align(4))
3123 return false;
3124
3125 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3126 return false;
3127 }
3128
3129 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3130 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3131 // sext i32 to i64 when addr mode is r+i.
3132 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3133 LD->getExtensionType() == ISD::SEXTLOAD &&
3134 isa<ConstantSDNode>(Offset))
3135 return false;
3136 }
3137
3138 AM = ISD::PRE_INC;
3139 return true;
3140}
3141
3142//===----------------------------------------------------------------------===//
3143// LowerOperation implementation
3144//===----------------------------------------------------------------------===//
3145
3146/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3147/// and LoOpFlags to the target MO flags.
3148static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3149 unsigned &HiOpFlags, unsigned &LoOpFlags,
3150 const GlobalValue *GV = nullptr) {
3151 HiOpFlags = PPCII::MO_HA;
3152 LoOpFlags = PPCII::MO_LO;
3153
3154 // Don't use the pic base if not in PIC relocation model.
3155 if (IsPIC) {
3156 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3157 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3158 }
3159}
3160
3161static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3162 SelectionDAG &DAG) {
3163 SDLoc DL(HiPart);
3164 EVT PtrVT = HiPart.getValueType();
3165 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3166
3167 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3168 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3169
3170 // With PIC, the first instruction is actually "GR+hi(&G)".
3171 if (isPIC)
3172 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3173 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3174
3175 // Generate non-pic code that has direct accesses to the constant pool.
3176 // The address of the global is just (hi(&g)+lo(&g)).
3177 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3178}
3179
3181 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3182 FuncInfo->setUsesTOCBasePtr();
3183}
3184
3187}
3188
3189SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3190 SDValue GA) const {
3191 const bool Is64Bit = Subtarget.isPPC64();
3192 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3193 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3194 : Subtarget.isAIXABI()
3195 ? DAG.getRegister(PPC::R2, VT)
3196 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3197 SDValue Ops[] = { GA, Reg };
3198 return DAG.getMemIntrinsicNode(
3199 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3202}
3203
3204SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3205 SelectionDAG &DAG) const {
3206 EVT PtrVT = Op.getValueType();
3207 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3208 const Constant *C = CP->getConstVal();
3209
3210 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3211 // The actual address of the GlobalValue is stored in the TOC.
3212 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3213 if (Subtarget.isUsingPCRelativeCalls()) {
3214 SDLoc DL(CP);
3215 EVT Ty = getPointerTy(DAG.getDataLayout());
3216 SDValue ConstPool = DAG.getTargetConstantPool(
3217 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3218 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3219 }
3220 setUsesTOCBasePtr(DAG);
3221 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3222 return getTOCEntry(DAG, SDLoc(CP), GA);
3223 }
3224
3225 unsigned MOHiFlag, MOLoFlag;
3226 bool IsPIC = isPositionIndependent();
3227 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3228
3229 if (IsPIC && Subtarget.isSVR4ABI()) {
3230 SDValue GA =
3231 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3232 return getTOCEntry(DAG, SDLoc(CP), GA);
3233 }
3234
3235 SDValue CPIHi =
3236 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3237 SDValue CPILo =
3238 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3239 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3240}
3241
3242// For 64-bit PowerPC, prefer the more compact relative encodings.
3243// This trades 32 bits per jump table entry for one or two instructions
3244// on the jump site.
3246 if (isJumpTableRelative())
3248
3250}
3251
3254 return false;
3255 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3256 return true;
3258}
3259
3261 SelectionDAG &DAG) const {
3262 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3264
3265 switch (getTargetMachine().getCodeModel()) {
3266 case CodeModel::Small:
3267 case CodeModel::Medium:
3269 default:
3270 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3272 }
3273}
3274
3275const MCExpr *
3277 unsigned JTI,
3278 MCContext &Ctx) const {
3279 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3281
3282 switch (getTargetMachine().getCodeModel()) {
3283 case CodeModel::Small:
3284 case CodeModel::Medium:
3286 default:
3287 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3288 }
3289}
3290
3291SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3292 EVT PtrVT = Op.getValueType();
3293 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3294
3295 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3296 if (Subtarget.isUsingPCRelativeCalls()) {
3297 SDLoc DL(JT);
3298 EVT Ty = getPointerTy(DAG.getDataLayout());
3299 SDValue GA =
3300 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3301 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3302 return MatAddr;
3303 }
3304
3305 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3306 // The actual address of the GlobalValue is stored in the TOC.
3307 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3308 setUsesTOCBasePtr(DAG);
3309 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3310 return getTOCEntry(DAG, SDLoc(JT), GA);
3311 }
3312
3313 unsigned MOHiFlag, MOLoFlag;
3314 bool IsPIC = isPositionIndependent();
3315 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3316
3317 if (IsPIC && Subtarget.isSVR4ABI()) {
3318 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3320 return getTOCEntry(DAG, SDLoc(GA), GA);
3321 }
3322
3323 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3324 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3325 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3326}
3327
3328SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3329 SelectionDAG &DAG) const {
3330 EVT PtrVT = Op.getValueType();
3331 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3332 const BlockAddress *BA = BASDN->getBlockAddress();
3333
3334 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3335 if (Subtarget.isUsingPCRelativeCalls()) {
3336 SDLoc DL(BASDN);
3337 EVT Ty = getPointerTy(DAG.getDataLayout());
3338 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3340 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3341 return MatAddr;
3342 }
3343
3344 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3345 // The actual BlockAddress is stored in the TOC.
3346 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3347 setUsesTOCBasePtr(DAG);
3348 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3349 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3350 }
3351
3352 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3353 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3354 return getTOCEntry(
3355 DAG, SDLoc(BASDN),
3356 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3357
3358 unsigned MOHiFlag, MOLoFlag;
3359 bool IsPIC = isPositionIndependent();
3360 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3361 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3362 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3363 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3364}
3365
3366SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3367 SelectionDAG &DAG) const {
3368 if (Subtarget.isAIXABI())
3369 return LowerGlobalTLSAddressAIX(Op, DAG);
3370
3371 return LowerGlobalTLSAddressLinux(Op, DAG);
3372}
3373
3374/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3375/// and then apply the update.
3377 SelectionDAG &DAG,
3378 const TargetMachine &TM) {
3379 // Initialize TLS model opt setting lazily:
3380 // (1) Use initial-exec for single TLS var references within current function.
3381 // (2) Use local-dynamic for multiple TLS var references within current
3382 // function.
3383 PPCFunctionInfo *FuncInfo =
3385 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3387 // Iterate over all instructions within current function, collect all TLS
3388 // global variables (global variables taken as the first parameter to
3389 // Intrinsic::threadlocal_address).
3390 const Function &Func = DAG.getMachineFunction().getFunction();
3391 for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
3392 ++BI)
3393 for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
3394 II != IE; ++II)
3395 if (II->getOpcode() == Instruction::Call)
3396 if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
3397 if (Function *CF = CI->getCalledFunction())
3398 if (CF->isDeclaration() &&
3399 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3400 if (const GlobalValue *GV =
3401 dyn_cast<GlobalValue>(II->getOperand(0))) {
3402 TLSModel::Model GVModel = TM.getTLSModel(GV);
3403 if (GVModel == TLSModel::LocalDynamic)
3404 TLSGV.insert(GV);
3405 }
3406
3407 unsigned TLSGVCnt = TLSGV.size();
3408 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3409 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3410 FuncInfo->setAIXFuncUseTLSIEForLD();
3412 }
3413
3414 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3415 LLVM_DEBUG(
3416 dbgs() << DAG.getMachineFunction().getName()
3417 << " function is using the TLS-IE model for TLS-LD access.\n");
3418 Model = TLSModel::InitialExec;
3419 }
3420}
3421
3422SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3423 SelectionDAG &DAG) const {
3424 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3425
3426 if (DAG.getTarget().useEmulatedTLS())
3427 report_fatal_error("Emulated TLS is not yet supported on AIX");
3428
3429 SDLoc dl(GA);
3430 const GlobalValue *GV = GA->getGlobal();
3431 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3432 bool Is64Bit = Subtarget.isPPC64();
3434
3435 // Apply update to the TLS model.
3436 if (Subtarget.hasAIXShLibTLSModelOpt())
3438
3439 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3440
3441 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3442 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3443 bool HasAIXSmallTLSGlobalAttr = false;
3444 SDValue VariableOffsetTGA =
3445 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3446 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3447 SDValue TLSReg;
3448
3449 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3450 if (GVar->hasAttribute("aix-small-tls"))
3451 HasAIXSmallTLSGlobalAttr = true;
3452
3453 if (Is64Bit) {
3454 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3455 // involves a load of the variable offset (from the TOC), followed by an
3456 // add of the loaded variable offset to R13 (the thread pointer).
3457 // This code sequence looks like:
3458 // ld reg1,var[TC](2)
3459 // add reg2, reg1, r13 // r13 contains the thread pointer
3460 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3461
3462 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3463 // global variable attribute, produce a faster access sequence for
3464 // local-exec TLS variables where the offset from the TLS base is encoded
3465 // as an immediate operand.
3466 //
3467 // We only utilize the faster local-exec access sequence when the TLS
3468 // variable has a size within the policy limit. We treat types that are
3469 // not sized or are empty as being over the policy size limit.
3470 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3471 IsTLSLocalExecModel) {
3472 Type *GVType = GV->getValueType();
3473 if (GVType->isSized() && !GVType->isEmptyTy() &&
3474 GV->getDataLayout().getTypeAllocSize(GVType) <=
3476 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3477 }
3478 } else {
3479 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3480 // involves loading the variable offset from the TOC, generating a call to
3481 // .__get_tpointer to get the thread pointer (which will be in R3), and
3482 // adding the two together:
3483 // lwz reg1,var[TC](2)
3484 // bla .__get_tpointer
3485 // add reg2, reg1, r3
3486 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3487
3488 // We do not implement the 32-bit version of the faster access sequence
3489 // for local-exec that is controlled by the -maix-small-local-exec-tls
3490 // option, or the "aix-small-tls" global variable attribute.
3491 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3492 report_fatal_error("The small-local-exec TLS access sequence is "
3493 "currently only supported on AIX (64-bit mode).");
3494 }
3495 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3496 }
3497
3498 if (Model == TLSModel::LocalDynamic) {
3499 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3500
3501 // We do not implement the 32-bit version of the faster access sequence
3502 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3503 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3504 report_fatal_error("The small-local-dynamic TLS access sequence is "
3505 "currently only supported on AIX (64-bit mode).");
3506
3507 // For local-dynamic on AIX, we need to generate one TOC entry for each
3508 // variable offset, and a single module-handle TOC entry for the entire
3509 // file.
3510
3511 SDValue VariableOffsetTGA =
3512 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3513 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3514
3516 GlobalVariable *TLSGV =
3517 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3518 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3520 assert(TLSGV && "Not able to create GV for _$TLSML.");
3521 SDValue ModuleHandleTGA =
3522 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3523 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3524 SDValue ModuleHandle =
3525 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3526
3527 // With the -maix-small-local-dynamic-tls option, produce a faster access
3528 // sequence for local-dynamic TLS variables where the offset from the
3529 // module-handle is encoded as an immediate operand.
3530 //
3531 // We only utilize the faster local-dynamic access sequence when the TLS
3532 // variable has a size within the policy limit. We treat types that are
3533 // not sized or are empty as being over the policy size limit.
3534 if (HasAIXSmallLocalDynamicTLS) {
3535 Type *GVType = GV->getValueType();
3536 if (GVType->isSized() && !GVType->isEmptyTy() &&
3537 GV->getDataLayout().getTypeAllocSize(GVType) <=
3539 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3540 ModuleHandle);
3541 }
3542
3543 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3544 }
3545
3546 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3547 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3548 // need to generate two TOC entries, one for the variable offset, one for the
3549 // region handle. The global address for the TOC entry of the region handle is
3550 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3551 // entry of the variable offset is created with MO_TLSGD_FLAG.
3552 SDValue VariableOffsetTGA =
3553 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3554 SDValue RegionHandleTGA =
3555 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3556 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3557 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3558 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3559 RegionHandle);
3560}
3561
3562SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3563 SelectionDAG &DAG) const {
3564 // FIXME: TLS addresses currently use medium model code sequences,
3565 // which is the most useful form. Eventually support for small and
3566 // large models could be added if users need it, at the cost of
3567 // additional complexity.
3568 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3569 if (DAG.getTarget().useEmulatedTLS())
3570 return LowerToTLSEmulatedModel(GA, DAG);
3571
3572 SDLoc dl(GA);
3573 const GlobalValue *GV = GA->getGlobal();
3574 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3575 bool is64bit = Subtarget.isPPC64();
3576 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3577 PICLevel::Level picLevel = M->getPICLevel();
3578
3580 TLSModel::Model Model = TM.getTLSModel(GV);
3581
3582 if (Model == TLSModel::LocalExec) {
3583 if (Subtarget.isUsingPCRelativeCalls()) {
3584 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3585 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3587 SDValue MatAddr =
3588 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3589 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3590 }
3591
3592 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3594 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3596 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3597 : DAG.getRegister(PPC::R2, MVT::i32);
3598
3599 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3600 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3601 }
3602
3603 if (Model == TLSModel::InitialExec) {
3604 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3606 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3607 SDValue TGATLS = DAG.getTargetGlobalAddress(
3608 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3609 SDValue TPOffset;
3610 if (IsPCRel) {
3611 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3612 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3614 } else {
3615 SDValue GOTPtr;
3616 if (is64bit) {
3617 setUsesTOCBasePtr(DAG);
3618 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3619 GOTPtr =
3620 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3621 } else {
3622 if (!TM.isPositionIndependent())
3623 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3624 else if (picLevel == PICLevel::SmallPIC)
3625 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3626 else
3627 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3628 }
3629 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3630 }
3631 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3632 }
3633
3634 if (Model == TLSModel::GeneralDynamic) {
3635 if (Subtarget.isUsingPCRelativeCalls()) {
3636 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3638 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3639 }
3640
3641 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3642 SDValue GOTPtr;
3643 if (is64bit) {
3644 setUsesTOCBasePtr(DAG);
3645 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3646 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3647 GOTReg, TGA);
3648 } else {
3649 if (picLevel == PICLevel::SmallPIC)
3650 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3651 else
3652 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3653 }
3654 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3655 GOTPtr, TGA, TGA);
3656 }
3657
3658 if (Model == TLSModel::LocalDynamic) {
3659 if (Subtarget.isUsingPCRelativeCalls()) {
3660 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3662 SDValue MatPCRel =
3663 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3664 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3665 }
3666
3667 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3668 SDValue GOTPtr;
3669 if (is64bit) {
3670 setUsesTOCBasePtr(DAG);
3671 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3672 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3673 GOTReg, TGA);
3674 } else {
3675 if (picLevel == PICLevel::SmallPIC)
3676 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3677 else
3678 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3679 }
3680 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3681 PtrVT, GOTPtr, TGA, TGA);
3682 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3683 PtrVT, TLSAddr, TGA);
3684 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3685 }
3686
3687 llvm_unreachable("Unknown TLS model!");
3688}
3689
3690SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3691 SelectionDAG &DAG) const {
3692 EVT PtrVT = Op.getValueType();
3693 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3694 SDLoc DL(GSDN);
3695 const GlobalValue *GV = GSDN->getGlobal();
3696
3697 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3698 // The actual address of the GlobalValue is stored in the TOC.
3699 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3700 if (Subtarget.isUsingPCRelativeCalls()) {
3701 EVT Ty = getPointerTy(DAG.getDataLayout());
3703 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3705 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3706 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3708 return Load;
3709 } else {
3710 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3712 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3713 }
3714 }
3715 setUsesTOCBasePtr(DAG);
3716 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3717 return getTOCEntry(DAG, DL, GA);
3718 }
3719
3720 unsigned MOHiFlag, MOLoFlag;
3721 bool IsPIC = isPositionIndependent();
3722 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3723
3724 if (IsPIC && Subtarget.isSVR4ABI()) {
3725 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3726 GSDN->getOffset(),
3728 return getTOCEntry(DAG, DL, GA);
3729 }
3730
3731 SDValue GAHi =
3732 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3733 SDValue GALo =
3734 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3735
3736 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3737}
3738
3739SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3740 bool IsStrict = Op->isStrictFPOpcode();
3742 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3743 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3744 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3745 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3746 EVT LHSVT = LHS.getValueType();
3747 SDLoc dl(Op);
3748
3749 // Soften the setcc with libcall if it is fp128.
3750 if (LHSVT == MVT::f128) {
3751 assert(!Subtarget.hasP9Vector() &&
3752 "SETCC for f128 is already legal under Power9!");
3753 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3754 Op->getOpcode() == ISD::STRICT_FSETCCS);
3755 if (RHS.getNode())
3756 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3757 DAG.getCondCode(CC));
3758 if (IsStrict)
3759 return DAG.getMergeValues({LHS, Chain}, dl);
3760 return LHS;
3761 }
3762
3763 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3764
3765 if (Op.getValueType() == MVT::v2i64) {
3766 // When the operands themselves are v2i64 values, we need to do something
3767 // special because VSX has no underlying comparison operations for these.
3768 if (LHS.getValueType() == MVT::v2i64) {
3769 // Equality can be handled by casting to the legal type for Altivec
3770 // comparisons, everything else needs to be expanded.
3771 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3772 return SDValue();
3773 SDValue SetCC32 = DAG.getSetCC(
3774 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3775 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3776 int ShuffV[] = {1, 0, 3, 2};
3777 SDValue Shuff =
3778 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3779 return DAG.getBitcast(MVT::v2i64,
3781 dl, MVT::v4i32, Shuff, SetCC32));
3782 }
3783
3784 // We handle most of these in the usual way.
3785 return Op;
3786 }
3787
3788 // If we're comparing for equality to zero, expose the fact that this is
3789 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3790 // fold the new nodes.
3791 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3792 return V;
3793
3794 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3795 // Leave comparisons against 0 and -1 alone for now, since they're usually
3796 // optimized. FIXME: revisit this when we can custom lower all setcc
3797 // optimizations.
3798 if (C->isAllOnes() || C->isZero())
3799 return SDValue();
3800 }
3801
3802 // If we have an integer seteq/setne, turn it into a compare against zero
3803 // by xor'ing the rhs with the lhs, which is faster than setting a
3804 // condition register, reading it back out, and masking the correct bit. The
3805 // normal approach here uses sub to do this instead of xor. Using xor exposes
3806 // the result to other bit-twiddling opportunities.
3807 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3808 EVT VT = Op.getValueType();
3809 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3810 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3811 }
3812 return SDValue();
3813}
3814
3815SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3816 SDNode *Node = Op.getNode();
3817 EVT VT = Node->getValueType(0);
3818 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3819 SDValue InChain = Node->getOperand(0);
3820 SDValue VAListPtr = Node->getOperand(1);
3821 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3822 SDLoc dl(Node);
3823
3824 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3825
3826 // gpr_index
3827 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3828 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3829 InChain = GprIndex.getValue(1);
3830
3831 if (VT == MVT::i64) {
3832 // Check if GprIndex is even
3833 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3834 DAG.getConstant(1, dl, MVT::i32));
3835 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3836 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3837 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3838 DAG.getConstant(1, dl, MVT::i32));
3839 // Align GprIndex to be even if it isn't
3840 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3841 GprIndex);
3842 }
3843
3844 // fpr index is 1 byte after gpr
3845 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3846 DAG.getConstant(1, dl, MVT::i32));
3847
3848 // fpr
3849 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3850 FprPtr, MachinePointerInfo(SV), MVT::i8);
3851 InChain = FprIndex.getValue(1);
3852
3853 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3854 DAG.getConstant(8, dl, MVT::i32));
3855
3856 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3857 DAG.getConstant(4, dl, MVT::i32));
3858
3859 // areas
3860 SDValue OverflowArea =
3861 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3862 InChain = OverflowArea.getValue(1);
3863
3864 SDValue RegSaveArea =
3865 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3866 InChain = RegSaveArea.getValue(1);
3867
3868 // select overflow_area if index > 8
3869 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3870 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3871
3872 // adjustment constant gpr_index * 4/8
3873 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3874 VT.isInteger() ? GprIndex : FprIndex,
3875 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3876 MVT::i32));
3877
3878 // OurReg = RegSaveArea + RegConstant
3879 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3880 RegConstant);
3881
3882 // Floating types are 32 bytes into RegSaveArea
3883 if (VT.isFloatingPoint())
3884 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3885 DAG.getConstant(32, dl, MVT::i32));
3886
3887 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3888 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3889 VT.isInteger() ? GprIndex : FprIndex,
3890 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3891 MVT::i32));
3892
3893 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3894 VT.isInteger() ? VAListPtr : FprPtr,
3895 MachinePointerInfo(SV), MVT::i8);
3896
3897 // determine if we should load from reg_save_area or overflow_area
3898 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3899
3900 // increase overflow_area by 4/8 if gpr/fpr > 8
3901 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3902 DAG.getConstant(VT.isInteger() ? 4 : 8,
3903 dl, MVT::i32));
3904
3905 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3906 OverflowAreaPlusN);
3907
3908 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3909 MachinePointerInfo(), MVT::i32);
3910
3911 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3912}
3913
3914SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3915 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3916
3917 // We have to copy the entire va_list struct:
3918 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3919 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3920 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3921 false, true, /*CI=*/nullptr, std::nullopt,
3923}
3924
3925SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3926 SelectionDAG &DAG) const {
3927 if (Subtarget.isAIXABI())
3928 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3929
3930 return Op.getOperand(0);
3931}
3932
3933SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3936
3937 assert((Op.getOpcode() == ISD::INLINEASM ||
3938 Op.getOpcode() == ISD::INLINEASM_BR) &&
3939 "Expecting Inline ASM node.");
3940
3941 // If an LR store is already known to be required then there is not point in
3942 // checking this ASM as well.
3943 if (MFI.isLRStoreRequired())
3944 return Op;
3945
3946 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3947 // type MVT::Glue. We want to ignore this last operand if that is the case.
3948 unsigned NumOps = Op.getNumOperands();
3949 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3950 --NumOps;
3951
3952 // Check all operands that may contain the LR.
3953 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3954 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3955 unsigned NumVals = Flags.getNumOperandRegisters();
3956 ++i; // Skip the ID value.
3957
3958 switch (Flags.getKind()) {
3959 default:
3960 llvm_unreachable("Bad flags!");
3964 i += NumVals;
3965 break;
3969 for (; NumVals; --NumVals, ++i) {
3970 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3971 if (Reg != PPC::LR && Reg != PPC::LR8)
3972 continue;
3973 MFI.setLRStoreRequired();
3974 return Op;
3975 }
3976 break;
3977 }
3978 }
3979 }
3980
3981 return Op;
3982}
3983
3984SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3985 SelectionDAG &DAG) const {
3986 if (Subtarget.isAIXABI())
3987 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3988
3989 SDValue Chain = Op.getOperand(0);
3990 SDValue Trmp = Op.getOperand(1); // trampoline
3991 SDValue FPtr = Op.getOperand(2); // nested function
3992 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3993 SDLoc dl(Op);
3994
3995 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3996 bool isPPC64 = (PtrVT == MVT::i64);
3997 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3998
4001
4002 Entry.Ty = IntPtrTy;
4003 Entry.Node = Trmp; Args.push_back(Entry);
4004
4005 // TrampSize == (isPPC64 ? 48 : 40);
4006 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
4007 isPPC64 ? MVT::i64 : MVT::i32);
4008 Args.push_back(Entry);
4009
4010 Entry.Node = FPtr; Args.push_back(Entry);
4011 Entry.Node = Nest; Args.push_back(Entry);
4012
4013 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4015 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4017 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4018
4019 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4020 return CallResult.second;
4021}
4022
4023SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4025 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4026 EVT PtrVT = getPointerTy(MF.getDataLayout());
4027
4028 SDLoc dl(Op);
4029
4030 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4031 // vastart just stores the address of the VarArgsFrameIndex slot into the
4032 // memory location argument.
4033 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4034 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4035 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4036 MachinePointerInfo(SV));
4037 }
4038
4039 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4040 // We suppose the given va_list is already allocated.
4041 //
4042 // typedef struct {
4043 // char gpr; /* index into the array of 8 GPRs
4044 // * stored in the register save area
4045 // * gpr=0 corresponds to r3,
4046 // * gpr=1 to r4, etc.
4047 // */
4048 // char fpr; /* index into the array of 8 FPRs
4049 // * stored in the register save area
4050 // * fpr=0 corresponds to f1,
4051 // * fpr=1 to f2, etc.
4052 // */
4053 // char *overflow_arg_area;
4054 // /* location on stack that holds
4055 // * the next overflow argument
4056 // */
4057 // char *reg_save_area;
4058 // /* where r3:r10 and f1:f8 (if saved)
4059 // * are stored
4060 // */
4061 // } va_list[1];
4062
4063 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4064 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4065 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4066 PtrVT);
4067 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4068 PtrVT);
4069
4070 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4071 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4072
4073 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4074 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4075
4076 uint64_t FPROffset = 1;
4077 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4078
4079 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4080
4081 // Store first byte : number of int regs
4082 SDValue firstStore =
4083 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4084 MachinePointerInfo(SV), MVT::i8);
4085 uint64_t nextOffset = FPROffset;
4086 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4087 ConstFPROffset);
4088
4089 // Store second byte : number of float regs
4090 SDValue secondStore =
4091 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4092 MachinePointerInfo(SV, nextOffset), MVT::i8);
4093 nextOffset += StackOffset;
4094 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4095
4096 // Store second word : arguments given on stack
4097 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4098 MachinePointerInfo(SV, nextOffset));
4099 nextOffset += FrameOffset;
4100 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4101
4102 // Store third word : arguments given in registers
4103 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4104 MachinePointerInfo(SV, nextOffset));
4105}
4106
4107/// FPR - The set of FP registers that should be allocated for arguments
4108/// on Darwin and AIX.
4109static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4110 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4111 PPC::F11, PPC::F12, PPC::F13};
4112
4113/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4114/// the stack.
4115static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4116 unsigned PtrByteSize) {
4117 unsigned ArgSize = ArgVT.getStoreSize();
4118 if (Flags.isByVal())
4119 ArgSize = Flags.getByValSize();
4120
4121 // Round up to multiples of the pointer size, except for array members,
4122 // which are always packed.
4123 if (!Flags.isInConsecutiveRegs())
4124 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4125
4126 return ArgSize;
4127}
4128
4129/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4130/// on the stack.
4132 ISD::ArgFlagsTy Flags,
4133 unsigned PtrByteSize) {
4134 Align Alignment(PtrByteSize);
4135
4136 // Altivec parameters are padded to a 16 byte boundary.
4137 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4138 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4139 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4140 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4141 Alignment = Align(16);
4142
4143 // ByVal parameters are aligned as requested.
4144 if (Flags.isByVal()) {
4145 auto BVAlign = Flags.getNonZeroByValAlign();
4146 if (BVAlign > PtrByteSize) {
4147 if (BVAlign.value() % PtrByteSize != 0)
4149 "ByVal alignment is not a multiple of the pointer size");
4150
4151 Alignment = BVAlign;
4152 }
4153 }
4154
4155 // Array members are always packed to their original alignment.
4156 if (Flags.isInConsecutiveRegs()) {
4157 // If the array member was split into multiple registers, the first
4158 // needs to be aligned to the size of the full type. (Except for
4159 // ppcf128, which is only aligned as its f64 components.)
4160 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4161 Alignment = Align(OrigVT.getStoreSize());
4162 else
4163 Alignment = Align(ArgVT.getStoreSize());
4164 }
4165
4166 return Alignment;
4167}
4168
4169/// CalculateStackSlotUsed - Return whether this argument will use its
4170/// stack slot (instead of being passed in registers). ArgOffset,
4171/// AvailableFPRs, and AvailableVRs must hold the current argument
4172/// position, and will be updated to account for this argument.
4173static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4174 unsigned PtrByteSize, unsigned LinkageSize,
4175 unsigned ParamAreaSize, unsigned &ArgOffset,
4176 unsigned &AvailableFPRs,
4177 unsigned &AvailableVRs) {
4178 bool UseMemory = false;
4179
4180 // Respect alignment of argument on the stack.
4181 Align Alignment =
4182 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4183 ArgOffset = alignTo(ArgOffset, Alignment);
4184 // If there's no space left in the argument save area, we must
4185 // use memory (this check also catches zero-sized arguments).
4186 if (ArgOffset >= LinkageSize + ParamAreaSize)
4187 UseMemory = true;
4188
4189 // Allocate argument on the stack.
4190 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4191 if (Flags.isInConsecutiveRegsLast())
4192 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4193 // If we overran the argument save area, we must use memory
4194 // (this check catches arguments passed partially in memory)
4195 if (ArgOffset > LinkageSize + ParamAreaSize)
4196 UseMemory = true;
4197
4198 // However, if the argument is actually passed in an FPR or a VR,
4199 // we don't use memory after all.
4200 if (!Flags.isByVal()) {
4201 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4202 if (AvailableFPRs > 0) {
4203 --AvailableFPRs;
4204 return false;
4205 }
4206 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4207 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4208 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4209 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4210 if (AvailableVRs > 0) {
4211 --AvailableVRs;
4212 return false;
4213 }
4214 }
4215
4216 return UseMemory;
4217}
4218
4219/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4220/// ensure minimum alignment required for target.
4222 unsigned NumBytes) {
4223 return alignTo(NumBytes, Lowering->getStackAlign());
4224}
4225
4226SDValue PPCTargetLowering::LowerFormalArguments(
4227 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4228 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4229 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4230 if (Subtarget.isAIXABI())
4231 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4232 InVals);
4233 if (Subtarget.is64BitELFABI())
4234 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4235 InVals);
4236 assert(Subtarget.is32BitELFABI());
4237 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4238 InVals);
4239}
4240
4241SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4242 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4243 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4244 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4245
4246 // 32-bit SVR4 ABI Stack Frame Layout:
4247 // +-----------------------------------+
4248 // +--> | Back chain |
4249 // | +-----------------------------------+
4250 // | | Floating-point register save area |
4251 // | +-----------------------------------+
4252 // | | General register save area |
4253 // | +-----------------------------------+
4254 // | | CR save word |
4255 // | +-----------------------------------+
4256 // | | VRSAVE save word |
4257 // | +-----------------------------------+
4258 // | | Alignment padding |
4259 // | +-----------------------------------+
4260 // | | Vector register save area |
4261 // | +-----------------------------------+
4262 // | | Local variable space |
4263 // | +-----------------------------------+
4264 // | | Parameter list area |
4265 // | +-----------------------------------+
4266 // | | LR save word |
4267 // | +-----------------------------------+
4268 // SP--> +--- | Back chain |
4269 // +-----------------------------------+
4270 //
4271 // Specifications:
4272 // System V Application Binary Interface PowerPC Processor Supplement
4273 // AltiVec Technology Programming Interface Manual
4274
4276 MachineFrameInfo &MFI = MF.getFrameInfo();
4277 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4278
4279 EVT PtrVT = getPointerTy(MF.getDataLayout());
4280 // Potential tail calls could cause overwriting of argument stack slots.
4281 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4282 (CallConv == CallingConv::Fast));
4283 const Align PtrAlign(4);
4284
4285 // Assign locations to all of the incoming arguments.
4287 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4288 *DAG.getContext());
4289
4290 // Reserve space for the linkage area on the stack.
4291 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4292 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4293 if (useSoftFloat())
4294 CCInfo.PreAnalyzeFormalArguments(Ins);
4295
4296 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4297 CCInfo.clearWasPPCF128();
4298
4299 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4300 CCValAssign &VA = ArgLocs[i];
4301
4302 // Arguments stored in registers.
4303 if (VA.isRegLoc()) {
4304 const TargetRegisterClass *RC;
4305 EVT ValVT = VA.getValVT();
4306
4307 switch (ValVT.getSimpleVT().SimpleTy) {
4308 default:
4309 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4310 case MVT::i1:
4311 case MVT::i32:
4312 RC = &PPC::GPRCRegClass;
4313 break;
4314 case MVT::f32:
4315 if (Subtarget.hasP8Vector())
4316 RC = &PPC::VSSRCRegClass;
4317 else if (Subtarget.hasSPE())
4318 RC = &PPC::GPRCRegClass;
4319 else
4320 RC = &PPC::F4RCRegClass;
4321 break;
4322 case MVT::f64:
4323 if (Subtarget.hasVSX())
4324 RC = &PPC::VSFRCRegClass;
4325 else if (Subtarget.hasSPE())
4326 // SPE passes doubles in GPR pairs.
4327 RC = &PPC::GPRCRegClass;
4328 else
4329 RC = &PPC::F8RCRegClass;
4330 break;
4331 case MVT::v16i8:
4332 case MVT::v8i16:
4333 case MVT::v4i32:
4334 RC = &PPC::VRRCRegClass;
4335 break;
4336 case MVT::v4f32:
4337 RC = &PPC::VRRCRegClass;
4338 break;
4339 case MVT::v2f64:
4340 case MVT::v2i64:
4341 RC = &PPC::VRRCRegClass;
4342 break;
4343 }
4344
4345 SDValue ArgValue;
4346 // Transform the arguments stored in physical registers into
4347 // virtual ones.
4348 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4349 assert(i + 1 < e && "No second half of double precision argument");
4350 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4351 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4352 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4353 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4354 if (!Subtarget.isLittleEndian())
4355 std::swap (ArgValueLo, ArgValueHi);
4356 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4357 ArgValueHi);
4358 } else {
4359 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4360 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4361 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4362 if (ValVT == MVT::i1)
4363 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4364 }
4365
4366 InVals.push_back(ArgValue);
4367 } else {
4368 // Argument stored in memory.
4369 assert(VA.isMemLoc());
4370
4371 // Get the extended size of the argument type in stack
4372 unsigned ArgSize = VA.getLocVT().getStoreSize();
4373 // Get the actual size of the argument type
4374 unsigned ObjSize = VA.getValVT().getStoreSize();
4375 unsigned ArgOffset = VA.getLocMemOffset();
4376 // Stack objects in PPC32 are right justified.
4377 ArgOffset += ArgSize - ObjSize;
4378 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4379
4380 // Create load nodes to retrieve arguments from the stack.
4381 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4382 InVals.push_back(
4383 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4384 }
4385 }
4386
4387 // Assign locations to all of the incoming aggregate by value arguments.
4388 // Aggregates passed by value are stored in the local variable space of the
4389 // caller's stack frame, right above the parameter list area.
4390 SmallVector<CCValAssign, 16> ByValArgLocs;
4391 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4392 ByValArgLocs, *DAG.getContext());
4393
4394 // Reserve stack space for the allocations in CCInfo.
4395 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4396
4397 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4398
4399 // Area that is at least reserved in the caller of this function.
4400 unsigned MinReservedArea = CCByValInfo.getStackSize();
4401 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4402
4403 // Set the size that is at least reserved in caller of this function. Tail
4404 // call optimized function's reserved stack space needs to be aligned so that
4405 // taking the difference between two stack areas will result in an aligned
4406 // stack.
4407 MinReservedArea =
4408 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4409 FuncInfo->setMinReservedArea(MinReservedArea);
4410
4412
4413 // If the function takes variable number of arguments, make a frame index for
4414 // the start of the first vararg value... for expansion of llvm.va_start.
4415 if (isVarArg) {
4416 static const MCPhysReg GPArgRegs[] = {
4417 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4418 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4419 };
4420 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4421
4422 static const MCPhysReg FPArgRegs[] = {
4423 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4424 PPC::F8
4425 };
4426 unsigned NumFPArgRegs = std::size(FPArgRegs);
4427
4428 if (useSoftFloat() || hasSPE())
4429 NumFPArgRegs = 0;
4430
4431 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4432 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4433
4434 // Make room for NumGPArgRegs and NumFPArgRegs.
4435 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4436 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4437
4439 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4440
4441 FuncInfo->setVarArgsFrameIndex(
4442 MFI.CreateStackObject(Depth, Align(8), false));
4443 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4444
4445 // The fixed integer arguments of a variadic function are stored to the
4446 // VarArgsFrameIndex on the stack so that they may be loaded by
4447 // dereferencing the result of va_next.
4448 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4449 // Get an existing live-in vreg, or add a new one.
4450 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4451 if (!VReg)
4452 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4453
4454 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4455 SDValue Store =
4456 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4457 MemOps.push_back(Store);
4458 // Increment the address by four for the next argument to store
4459 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4460 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4461 }
4462
4463 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4464 // is set.
4465 // The double arguments are stored to the VarArgsFrameIndex
4466 // on the stack.
4467 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4468 // Get an existing live-in vreg, or add a new one.
4469 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4470 if (!VReg)
4471 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4472
4473 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4474 SDValue Store =
4475 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4476 MemOps.push_back(Store);
4477 // Increment the address by eight for the next argument to store
4478 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4479 PtrVT);
4480 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4481 }
4482 }
4483
4484 if (!MemOps.empty())
4485 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4486
4487 return Chain;
4488}
4489
4490// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4491// value to MVT::i64 and then truncate to the correct register size.
4492SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4493 EVT ObjectVT, SelectionDAG &DAG,
4494 SDValue ArgVal,
4495 const SDLoc &dl) const {
4496 if (Flags.isSExt())
4497 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4498 DAG.getValueType(ObjectVT));
4499 else if (Flags.isZExt())
4500 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4501 DAG.getValueType(ObjectVT));
4502
4503 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4504}
4505
4506SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4507 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4508 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4509 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4510 // TODO: add description of PPC stack frame format, or at least some docs.
4511 //
4512 bool isELFv2ABI = Subtarget.isELFv2ABI();
4513 bool isLittleEndian = Subtarget.isLittleEndian();
4515 MachineFrameInfo &MFI = MF.getFrameInfo();
4516 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4517
4518 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4519 "fastcc not supported on varargs functions");
4520
4521 EVT PtrVT = getPointerTy(MF.getDataLayout());
4522 // Potential tail calls could cause overwriting of argument stack slots.
4523 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4524 (CallConv == CallingConv::Fast));
4525 unsigned PtrByteSize = 8;
4526 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4527
4528 static const MCPhysReg GPR[] = {
4529 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4530 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4531 };
4532 static const MCPhysReg VR[] = {
4533 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4534 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4535 };
4536
4537 const unsigned Num_GPR_Regs = std::size(GPR);
4538 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4539 const unsigned Num_VR_Regs = std::size(VR);
4540
4541 // Do a first pass over the arguments to determine whether the ABI
4542 // guarantees that our caller has allocated the parameter save area
4543 // on its stack frame. In the ELFv1 ABI, this is always the case;
4544 // in the ELFv2 ABI, it is true if this is a vararg function or if
4545 // any parameter is located in a stack slot.
4546
4547 bool HasParameterArea = !isELFv2ABI || isVarArg;
4548 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4549 unsigned NumBytes = LinkageSize;
4550 unsigned AvailableFPRs = Num_FPR_Regs;
4551 unsigned AvailableVRs = Num_VR_Regs;
4552 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4553 if (Ins[i].Flags.isNest())
4554 continue;
4555
4556 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4557 PtrByteSize, LinkageSize, ParamAreaSize,
4558 NumBytes, AvailableFPRs, AvailableVRs))
4559 HasParameterArea = true;
4560 }
4561
4562 // Add DAG nodes to load the arguments or copy them out of registers. On
4563 // entry to a function on PPC, the arguments start after the linkage area,
4564 // although the first ones are often in registers.
4565
4566 unsigned ArgOffset = LinkageSize;
4567 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4570 unsigned CurArgIdx = 0;
4571 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4572 SDValue ArgVal;
4573 bool needsLoad = false;
4574 EVT ObjectVT = Ins[ArgNo].VT;
4575 EVT OrigVT = Ins[ArgNo].ArgVT;
4576 unsigned ObjSize = ObjectVT.getStoreSize();
4577 unsigned ArgSize = ObjSize;
4578 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4579 if (Ins[ArgNo].isOrigArg()) {
4580 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4581 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4582 }
4583 // We re-align the argument offset for each argument, except when using the
4584 // fast calling convention, when we need to make sure we do that only when
4585 // we'll actually use a stack slot.
4586 unsigned CurArgOffset;
4587 Align Alignment;
4588 auto ComputeArgOffset = [&]() {
4589 /* Respect alignment of argument on the stack. */
4590 Alignment =
4591 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4592 ArgOffset = alignTo(ArgOffset, Alignment);
4593 CurArgOffset = ArgOffset;
4594 };
4595
4596 if (CallConv != CallingConv::Fast) {
4597 ComputeArgOffset();
4598
4599 /* Compute GPR index associated with argument offset. */
4600 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4601 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4602 }
4603
4604 // FIXME the codegen can be much improved in some cases.
4605 // We do not have to keep everything in memory.
4606 if (Flags.isByVal()) {
4607 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4608
4609 if (CallConv == CallingConv::Fast)
4610 ComputeArgOffset();
4611
4612 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4613 ObjSize = Flags.getByValSize();
4614 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4615 // Empty aggregate parameters do not take up registers. Examples:
4616 // struct { } a;
4617 // union { } b;
4618 // int c[0];
4619 // etc. However, we have to provide a place-holder in InVals, so
4620 // pretend we have an 8-byte item at the current address for that
4621 // purpose.
4622 if (!ObjSize) {
4623 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4624 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4625 InVals.push_back(FIN);
4626 continue;
4627 }
4628
4629 // Create a stack object covering all stack doublewords occupied
4630 // by the argument. If the argument is (fully or partially) on
4631 // the stack, or if the argument is fully in registers but the
4632 // caller has allocated the parameter save anyway, we can refer
4633 // directly to the caller's stack frame. Otherwise, create a
4634 // local copy in our own frame.
4635 int FI;
4636 if (HasParameterArea ||
4637 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4638 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4639 else
4640 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4641 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4642
4643 // Handle aggregates smaller than 8 bytes.
4644 if (ObjSize < PtrByteSize) {
4645 // The value of the object is its address, which differs from the
4646 // address of the enclosing doubleword on big-endian systems.
4647 SDValue Arg = FIN;
4648 if (!isLittleEndian) {
4649 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4650 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4651 }
4652 InVals.push_back(Arg);
4653
4654 if (GPR_idx != Num_GPR_Regs) {
4655 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4656 FuncInfo->addLiveInAttr(VReg, Flags);
4657 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4658 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4659 SDValue Store =
4660 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4661 MachinePointerInfo(&*FuncArg), ObjType);
4662 MemOps.push_back(Store);
4663 }
4664 // Whether we copied from a register or not, advance the offset
4665 // into the parameter save area by a full doubleword.
4666 ArgOffset += PtrByteSize;
4667 continue;
4668 }
4669
4670 // The value of the object is its address, which is the address of
4671 // its first stack doubleword.
4672 InVals.push_back(FIN);
4673
4674 // Store whatever pieces of the object are in registers to memory.
4675 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4676 if (GPR_idx == Num_GPR_Regs)
4677 break;
4678
4679 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4680 FuncInfo->addLiveInAttr(VReg, Flags);
4681 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4682 SDValue Addr = FIN;
4683 if (j) {
4684 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4685 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4686 }
4687 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4688 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4689 SDValue Store =
4690 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4691 MachinePointerInfo(&*FuncArg, j), ObjType);
4692 MemOps.push_back(Store);
4693 ++GPR_idx;
4694 }
4695 ArgOffset += ArgSize;
4696 continue;
4697 }
4698
4699 switch (ObjectVT.getSimpleVT().SimpleTy) {
4700 default: llvm_unreachable("Unhandled argument type!");
4701 case MVT::i1:
4702 case MVT::i32:
4703 case MVT::i64:
4704 if (Flags.isNest()) {
4705 // The 'nest' parameter, if any, is passed in R11.
4706 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4707 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4708
4709 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4710 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4711
4712 break;
4713 }
4714
4715 // These can be scalar arguments or elements of an integer array type
4716 // passed directly. Clang may use those instead of "byval" aggregate
4717 // types to avoid forcing arguments to memory unnecessarily.
4718 if (GPR_idx != Num_GPR_Regs) {
4719 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4720 FuncInfo->addLiveInAttr(VReg, Flags);
4721 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4722
4723 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4724 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4725 // value to MVT::i64 and then truncate to the correct register size.
4726 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4727 } else {
4728 if (CallConv == CallingConv::Fast)
4729 ComputeArgOffset();
4730
4731 needsLoad = true;
4732 ArgSize = PtrByteSize;
4733 }
4734 if (CallConv != CallingConv::Fast || needsLoad)
4735 ArgOffset += 8;
4736 break;
4737
4738 case MVT::f32:
4739 case MVT::f64:
4740 // These can be scalar arguments or elements of a float array type
4741 // passed directly. The latter are used to implement ELFv2 homogenous
4742 // float aggregates.
4743 if (FPR_idx != Num_FPR_Regs) {
4744 unsigned VReg;
4745
4746 if (ObjectVT == MVT::f32)
4747 VReg = MF.addLiveIn(FPR[FPR_idx],
4748 Subtarget.hasP8Vector()
4749 ? &PPC::VSSRCRegClass
4750 : &PPC::F4RCRegClass);
4751 else
4752 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4753 ? &PPC::VSFRCRegClass
4754 : &PPC::F8RCRegClass);
4755
4756 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4757 ++FPR_idx;
4758 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4759 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4760 // once we support fp <-> gpr moves.
4761
4762 // This can only ever happen in the presence of f32 array types,
4763 // since otherwise we never run out of FPRs before running out
4764 // of GPRs.
4765 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4766 FuncInfo->addLiveInAttr(VReg, Flags);
4767 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4768
4769 if (ObjectVT == MVT::f32) {
4770 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4771 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4772 DAG.getConstant(32, dl, MVT::i32));
4773 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4774 }
4775
4776 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4777 } else {
4778 if (CallConv == CallingConv::Fast)
4779 ComputeArgOffset();
4780
4781 needsLoad = true;
4782 }
4783
4784 // When passing an array of floats, the array occupies consecutive
4785 // space in the argument area; only round up to the next doubleword
4786 // at the end of the array. Otherwise, each float takes 8 bytes.
4787 if (CallConv != CallingConv::Fast || needsLoad) {
4788 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4789 ArgOffset += ArgSize;
4790 if (Flags.isInConsecutiveRegsLast())
4791 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4792 }
4793 break;
4794 case MVT::v4f32:
4795 case MVT::v4i32:
4796 case MVT::v8i16:
4797 case MVT::v16i8:
4798 case MVT::v2f64:
4799 case MVT::v2i64:
4800 case MVT::v1i128:
4801 case MVT::f128:
4802 // These can be scalar arguments or elements of a vector array type
4803 // passed directly. The latter are used to implement ELFv2 homogenous
4804 // vector aggregates.
4805 if (VR_idx != Num_VR_Regs) {
4806 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4807 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4808 ++VR_idx;
4809 } else {
4810 if (CallConv == CallingConv::Fast)
4811 ComputeArgOffset();
4812 needsLoad = true;
4813 }
4814 if (CallConv != CallingConv::Fast || needsLoad)
4815 ArgOffset += 16;
4816 break;
4817 }
4818
4819 // We need to load the argument to a virtual register if we determined
4820 // above that we ran out of physical registers of the appropriate type.
4821 if (needsLoad) {
4822 if (ObjSize < ArgSize && !isLittleEndian)
4823 CurArgOffset += ArgSize - ObjSize;
4824 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4825 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4826 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4827 }
4828
4829 InVals.push_back(ArgVal);
4830 }
4831
4832 // Area that is at least reserved in the caller of this function.
4833 unsigned MinReservedArea;
4834 if (HasParameterArea)
4835 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4836 else
4837 MinReservedArea = LinkageSize;
4838
4839 // Set the size that is at least reserved in caller of this function. Tail
4840 // call optimized functions' reserved stack space needs to be aligned so that
4841 // taking the difference between two stack areas will result in an aligned
4842 // stack.
4843 MinReservedArea =
4844 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4845 FuncInfo->setMinReservedArea(MinReservedArea);
4846
4847 // If the function takes variable number of arguments, make a frame index for
4848 // the start of the first vararg value... for expansion of llvm.va_start.
4849 // On ELFv2ABI spec, it writes:
4850 // C programs that are intended to be *portable* across different compilers
4851 // and architectures must use the header file <stdarg.h> to deal with variable
4852 // argument lists.
4853 if (isVarArg && MFI.hasVAStart()) {
4854 int Depth = ArgOffset;
4855
4856 FuncInfo->setVarArgsFrameIndex(
4857 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4858 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4859
4860 // If this function is vararg, store any remaining integer argument regs
4861 // to their spots on the stack so that they may be loaded by dereferencing
4862 // the result of va_next.
4863 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4864 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4865 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4866 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4867 SDValue Store =
4868 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4869 MemOps.push_back(Store);
4870 // Increment the address by four for the next argument to store
4871 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4872 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4873 }
4874 }
4875
4876 if (!MemOps.empty())
4877 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4878
4879 return Chain;
4880}
4881
4882/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4883/// adjusted to accommodate the arguments for the tailcall.
4884static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4885 unsigned ParamSize) {
4886
4887 if (!isTailCall) return 0;
4888
4890 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4891 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4892 // Remember only if the new adjustment is bigger.
4893 if (SPDiff < FI->getTailCallSPDelta())
4894 FI->setTailCallSPDelta(SPDiff);
4895
4896 return SPDiff;
4897}
4898
4899static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4900
4901static bool callsShareTOCBase(const Function *Caller,
4902 const GlobalValue *CalleeGV,
4903 const TargetMachine &TM) {
4904 // It does not make sense to call callsShareTOCBase() with a caller that
4905 // is PC Relative since PC Relative callers do not have a TOC.
4906#ifndef NDEBUG
4907 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4908 assert(!STICaller->isUsingPCRelativeCalls() &&
4909 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4910#endif
4911
4912 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4913 // don't have enough information to determine if the caller and callee share
4914 // the same TOC base, so we have to pessimistically assume they don't for
4915 // correctness.
4916 if (!CalleeGV)
4917 return false;
4918
4919 // If the callee is preemptable, then the static linker will use a plt-stub
4920 // which saves the toc to the stack, and needs a nop after the call
4921 // instruction to convert to a toc-restore.
4922 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4923 return false;
4924
4925 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4926 // We may need a TOC restore in the situation where the caller requires a
4927 // valid TOC but the callee is PC Relative and does not.
4928 const Function *F = dyn_cast<Function>(CalleeGV);
4929 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4930
4931 // If we have an Alias we can try to get the function from there.
4932 if (Alias) {
4933 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4934 F = dyn_cast<Function>(GlobalObj);
4935 }
4936
4937 // If we still have no valid function pointer we do not have enough
4938 // information to determine if the callee uses PC Relative calls so we must
4939 // assume that it does.
4940 if (!F)
4941 return false;
4942
4943 // If the callee uses PC Relative we cannot guarantee that the callee won't
4944 // clobber the TOC of the caller and so we must assume that the two
4945 // functions do not share a TOC base.
4946 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4947 if (STICallee->isUsingPCRelativeCalls())
4948 return false;
4949
4950 // If the GV is not a strong definition then we need to assume it can be
4951 // replaced by another function at link time. The function that replaces
4952 // it may not share the same TOC as the caller since the callee may be
4953 // replaced by a PC Relative version of the same function.
4954 if (!CalleeGV->isStrongDefinitionForLinker())
4955 return false;
4956
4957 // The medium and large code models are expected to provide a sufficiently
4958 // large TOC to provide all data addressing needs of a module with a
4959 // single TOC.
4960 if (CodeModel::Medium == TM.getCodeModel() ||
4961 CodeModel::Large == TM.getCodeModel())
4962 return true;
4963
4964 // Any explicitly-specified sections and section prefixes must also match.
4965 // Also, if we're using -ffunction-sections, then each function is always in
4966 // a different section (the same is true for COMDAT functions).
4967 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4968 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4969 return false;
4970 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4971 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4972 return false;
4973 }
4974
4975 return true;
4976}
4977
4978static bool
4980 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4981 assert(Subtarget.is64BitELFABI());
4982
4983 const unsigned PtrByteSize = 8;
4984 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4985
4986 static const MCPhysReg GPR[] = {
4987 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4988 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4989 };
4990 static const MCPhysReg VR[] = {
4991 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4992 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4993 };
4994
4995 const unsigned NumGPRs = std::size(GPR);
4996 const unsigned NumFPRs = 13;
4997 const unsigned NumVRs = std::size(VR);
4998 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4999
5000 unsigned NumBytes = LinkageSize;
5001 unsigned AvailableFPRs = NumFPRs;
5002 unsigned AvailableVRs = NumVRs;
5003
5004 for (const ISD::OutputArg& Param : Outs) {
5005 if (Param.Flags.isNest()) continue;
5006
5007 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5008 LinkageSize, ParamAreaSize, NumBytes,
5009 AvailableFPRs, AvailableVRs))
5010 return true;
5011 }
5012 return false;
5013}
5014
5015static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5016 if (CB.arg_size() != CallerFn->arg_size())
5017 return false;
5018
5019 auto CalleeArgIter = CB.arg_begin();
5020 auto CalleeArgEnd = CB.arg_end();
5021 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5022
5023 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5024 const Value* CalleeArg = *CalleeArgIter;
5025 const Value* CallerArg = &(*CallerArgIter);
5026 if (CalleeArg == CallerArg)
5027 continue;
5028
5029 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5030 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5031 // }
5032 // 1st argument of callee is undef and has the same type as caller.
5033 if (CalleeArg->getType() == CallerArg->getType() &&
5034 isa<UndefValue>(CalleeArg))
5035 continue;
5036
5037 return false;
5038 }
5039
5040 return true;
5041}
5042
5043// Returns true if TCO is possible between the callers and callees
5044// calling conventions.
5045static bool
5047 CallingConv::ID CalleeCC) {
5048 // Tail calls are possible with fastcc and ccc.
5049 auto isTailCallableCC = [] (CallingConv::ID CC){
5050 return CC == CallingConv::C || CC == CallingConv::Fast;
5051 };
5052 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5053 return false;
5054
5055 // We can safely tail call both fastcc and ccc callees from a c calling
5056 // convention caller. If the caller is fastcc, we may have less stack space
5057 // than a non-fastcc caller with the same signature so disable tail-calls in
5058 // that case.
5059 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5060}
5061
5062bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5063 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5064 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5066 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5067 bool isCalleeExternalSymbol) const {
5068 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5069
5070 if (DisableSCO && !TailCallOpt) return false;
5071
5072 // Variadic argument functions are not supported.
5073 if (isVarArg) return false;
5074
5075 // Check that the calling conventions are compatible for tco.
5076 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5077 return false;
5078
5079 // Caller contains any byval parameter is not supported.
5080 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5081 return false;
5082
5083 // Callee contains any byval parameter is not supported, too.
5084 // Note: This is a quick work around, because in some cases, e.g.
5085 // caller's stack size > callee's stack size, we are still able to apply
5086 // sibling call optimization. For example, gcc is able to do SCO for caller1
5087 // in the following example, but not for caller2.
5088 // struct test {
5089 // long int a;
5090 // char ary[56];
5091 // } gTest;
5092 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5093 // b->a = v.a;
5094 // return 0;
5095 // }
5096 // void caller1(struct test a, struct test c, struct test *b) {
5097 // callee(gTest, b); }
5098 // void caller2(struct test *b) { callee(gTest, b); }
5099 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5100 return false;
5101
5102 // If callee and caller use different calling conventions, we cannot pass
5103 // parameters on stack since offsets for the parameter area may be different.
5104 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5105 return false;
5106
5107 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5108 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5109 // callee potentially have different TOC bases then we cannot tail call since
5110 // we need to restore the TOC pointer after the call.
5111 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5112 // We cannot guarantee this for indirect calls or calls to external functions.
5113 // When PC-Relative addressing is used, the concept of the TOC is no longer
5114 // applicable so this check is not required.
5115 // Check first for indirect calls.
5116 if (!Subtarget.isUsingPCRelativeCalls() &&
5117 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5118 return false;
5119
5120 // Check if we share the TOC base.
5121 if (!Subtarget.isUsingPCRelativeCalls() &&
5122 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5123 return false;
5124
5125 // TCO allows altering callee ABI, so we don't have to check further.
5126 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5127 return true;
5128
5129 if (DisableSCO) return false;
5130
5131 // If callee use the same argument list that caller is using, then we can
5132 // apply SCO on this case. If it is not, then we need to check if callee needs
5133 // stack for passing arguments.
5134 // PC Relative tail calls may not have a CallBase.
5135 // If there is no CallBase we cannot verify if we have the same argument
5136 // list so assume that we don't have the same argument list.
5137 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5138 needStackSlotPassParameters(Subtarget, Outs))
5139 return false;
5140 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5141 return false;
5142
5143 return true;
5144}
5145
5146/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5147/// for tail call optimization. Targets which want to do tail call
5148/// optimization should implement this function.
5149bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5150 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5151 CallingConv::ID CallerCC, bool isVarArg,
5152 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5153 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5154 return false;
5155
5156 // Variable argument functions are not supported.
5157 if (isVarArg)
5158 return false;
5159
5160 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5161 // Functions containing by val parameters are not supported.
5162 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5163 return false;
5164
5165 // Non-PIC/GOT tail calls are supported.
5166 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5167 return true;
5168
5169 // At the moment we can only do local tail calls (in same module, hidden
5170 // or protected) if we are generating PIC.
5171 if (CalleeGV)
5172 return CalleeGV->hasHiddenVisibility() ||
5173 CalleeGV->hasProtectedVisibility();
5174 }
5175
5176 return false;
5177}
5178
5179/// isCallCompatibleAddress - Return the immediate to use if the specified
5180/// 32-bit value is representable in the immediate field of a BxA instruction.
5182 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5183 if (!C) return nullptr;
5184
5185 int Addr = C->getZExtValue();
5186 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5187 SignExtend32<26>(Addr) != Addr)
5188 return nullptr; // Top 6 bits have to be sext of immediate.
5189
5190 return DAG
5191 .getConstant(
5192 (int)C->getZExtValue() >> 2, SDLoc(Op),
5194 .getNode();
5195}
5196
5197namespace {
5198
5199struct TailCallArgumentInfo {
5200 SDValue Arg;
5201 SDValue FrameIdxOp;
5202 int FrameIdx = 0;
5203
5204 TailCallArgumentInfo() = default;
5205};
5206
5207} // end anonymous namespace
5208
5209/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5211 SelectionDAG &DAG, SDValue Chain,
5212 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5213 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5214 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5215 SDValue Arg = TailCallArgs[i].Arg;
5216 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5217 int FI = TailCallArgs[i].FrameIdx;
5218 // Store relative to framepointer.
5219 MemOpChains.push_back(DAG.getStore(
5220 Chain, dl, Arg, FIN,
5222 }
5223}
5224
5225/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5226/// the appropriate stack slot for the tail call optimized function call.
5228 SDValue OldRetAddr, SDValue OldFP,
5229 int SPDiff, const SDLoc &dl) {
5230 if (SPDiff) {
5231 // Calculate the new stack slot for the return address.
5233 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5234 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5235 bool isPPC64 = Subtarget.isPPC64();
5236 int SlotSize = isPPC64 ? 8 : 4;
5237 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5238 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5239 NewRetAddrLoc, true);
5240 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5241 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5242 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5243 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5244 }
5245 return Chain;
5246}
5247
5248/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5249/// the position of the argument.
5250static void
5252 SDValue Arg, int SPDiff, unsigned ArgOffset,
5253 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5254 int Offset = ArgOffset + SPDiff;
5255 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5256 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5257 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5258 SDValue FIN = DAG.getFrameIndex(FI, VT);
5259 TailCallArgumentInfo Info;
5260 Info.Arg = Arg;
5261 Info.FrameIdxOp = FIN;
5262 Info.FrameIdx = FI;
5263 TailCallArguments.push_back(Info);
5264}
5265
5266/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5267/// stack slot. Returns the chain as result and the loaded frame pointers in
5268/// LROpOut/FPOpout. Used when tail calling.
5269SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5270 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5271 SDValue &FPOpOut, const SDLoc &dl) const {
5272 if (SPDiff) {
5273 // Load the LR and FP stack slot for later adjusting.
5274 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5275 LROpOut = getReturnAddrFrameIndex(DAG);
5276 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5277 Chain = SDValue(LROpOut.getNode(), 1);
5278 }
5279 return Chain;
5280}
5281
5282/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5283/// by "Src" to address "Dst" of size "Size". Alignment information is
5284/// specified by the specific parameter attribute. The copy will be passed as
5285/// a byval function parameter.
5286/// Sometimes what we are copying is the end of a larger object, the part that
5287/// does not fit in registers.
5289 SDValue Chain, ISD::ArgFlagsTy Flags,
5290 SelectionDAG &DAG, const SDLoc &dl) {
5291 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5292 return DAG.getMemcpy(
5293 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5294 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5295}
5296
5297/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5298/// tail calls.
5300 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5301 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5302 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5303 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5305 if (!isTailCall) {
5306 if (isVector) {
5307 SDValue StackPtr;
5308 if (isPPC64)
5309 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5310 else
5311 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5312 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5313 DAG.getConstant(ArgOffset, dl, PtrVT));
5314 }
5315 MemOpChains.push_back(
5316 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5317 // Calculate and remember argument location.
5318 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5319 TailCallArguments);
5320}
5321
5322static void
5324 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5325 SDValue FPOp,
5326 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5327 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5328 // might overwrite each other in case of tail call optimization.
5329 SmallVector<SDValue, 8> MemOpChains2;
5330 // Do not flag preceding copytoreg stuff together with the following stuff.
5331 InGlue = SDValue();
5332 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5333 MemOpChains2, dl);
5334 if (!MemOpChains2.empty())
5335 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5336
5337 // Store the return address to the appropriate stack slot.
5338 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5339
5340 // Emit callseq_end just before tailcall node.
5341 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5342 InGlue = Chain.getValue(1);
5343}
5344
5345// Is this global address that of a function that can be called by name? (as
5346// opposed to something that must hold a descriptor for an indirect call).
5347static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5348 if (GV) {
5349 if (GV->isThreadLocal())
5350 return false;
5351
5352 return GV->getValueType()->isFunctionTy();
5353 }
5354
5355 return false;
5356}
5357
5358SDValue PPCTargetLowering::LowerCallResult(
5359 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5360 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5361 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5363 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5364 *DAG.getContext());
5365
5366 CCRetInfo.AnalyzeCallResult(
5367 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5369 : RetCC_PPC);
5370
5371 // Copy all of the result registers out of their specified physreg.
5372 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5373 CCValAssign &VA = RVLocs[i];
5374 assert(VA.isRegLoc() && "Can only return in registers!");
5375
5376 SDValue Val;
5377
5378 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5379 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5380 InGlue);
5381 Chain = Lo.getValue(1);
5382 InGlue = Lo.getValue(2);
5383 VA = RVLocs[++i]; // skip ahead to next loc
5384 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5385 InGlue);
5386 Chain = Hi.getValue(1);
5387 InGlue = Hi.getValue(2);
5388 if (!Subtarget.isLittleEndian())
5389 std::swap (Lo, Hi);
5390 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5391 } else {
5392 Val = DAG.getCopyFromReg(Chain, dl,
5393 VA.getLocReg(), VA.getLocVT(), InGlue);
5394 Chain = Val.getValue(1);
5395 InGlue = Val.getValue(2);
5396 }
5397
5398 switch (VA.getLocInfo()) {
5399 default: llvm_unreachable("Unknown loc info!");
5400 case CCValAssign::Full: break;
5401 case CCValAssign::AExt:
5402 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5403 break;
5404 case CCValAssign::ZExt:
5405 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5406 DAG.getValueType(VA.getValVT()));
5407 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5408 break;
5409 case CCValAssign::SExt:
5410 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5411 DAG.getValueType(VA.getValVT()));
5412 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5413 break;
5414 }
5415
5416 InVals.push_back(Val);
5417 }
5418
5419 return Chain;
5420}
5421
5422static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5423 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5424 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5425 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5426
5427 // PatchPoint calls are not indirect.
5428 if (isPatchPoint)
5429 return false;
5430
5431 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5432 return false;
5433
5434 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5435 // becuase the immediate function pointer points to a descriptor instead of
5436 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5437 // pointer immediate points to the global entry point, while the BLA would
5438 // need to jump to the local entry point (see rL211174).
5439 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5440 isBLACompatibleAddress(Callee, DAG))
5441 return false;
5442
5443 return true;
5444}
5445
5446// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5447static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5448 return Subtarget.isAIXABI() ||
5449 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5450}
5451
5453 const Function &Caller, const SDValue &Callee,
5454 const PPCSubtarget &Subtarget,
5455 const TargetMachine &TM,
5456 bool IsStrictFPCall = false) {
5457 if (CFlags.IsTailCall)
5458 return PPCISD::TC_RETURN;
5459
5460 unsigned RetOpc = 0;
5461 // This is a call through a function pointer.
5462 if (CFlags.IsIndirect) {
5463 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5464 // indirect calls. The save of the caller's TOC pointer to the stack will be
5465 // inserted into the DAG as part of call lowering. The restore of the TOC
5466 // pointer is modeled by using a pseudo instruction for the call opcode that
5467 // represents the 2 instruction sequence of an indirect branch and link,
5468 // immediately followed by a load of the TOC pointer from the stack save
5469 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5470 // as it is not saved or used.
5472 : PPCISD::BCTRL;
5473 } else if (Subtarget.isUsingPCRelativeCalls()) {
5474 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5475 RetOpc = PPCISD::CALL_NOTOC;
5476 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5477 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5478 // immediately following the call instruction if the caller and callee may
5479 // have different TOC bases. At link time if the linker determines the calls
5480 // may not share a TOC base, the call is redirected to a trampoline inserted
5481 // by the linker. The trampoline will (among other things) save the callers
5482 // TOC pointer at an ABI designated offset in the linkage area and the
5483 // linker will rewrite the nop to be a load of the TOC pointer from the
5484 // linkage area into gpr2.
5485 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5486 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5487 RetOpc =
5489 } else
5490 RetOpc = PPCISD::CALL;
5491 if (IsStrictFPCall) {
5492 switch (RetOpc) {
5493 default:
5494 llvm_unreachable("Unknown call opcode");
5497 break;
5498 case PPCISD::BCTRL:
5499 RetOpc = PPCISD::BCTRL_RM;
5500 break;
5501 case PPCISD::CALL_NOTOC:
5502 RetOpc = PPCISD::CALL_NOTOC_RM;
5503 break;
5504 case PPCISD::CALL:
5505 RetOpc = PPCISD::CALL_RM;
5506 break;
5507 case PPCISD::CALL_NOP:
5508 RetOpc = PPCISD::CALL_NOP_RM;
5509 break;
5510 }
5511 }
5512 return RetOpc;
5513}
5514
5515static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5516 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5517 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5518 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5519 return SDValue(Dest, 0);
5520
5521 // Returns true if the callee is local, and false otherwise.
5522 auto isLocalCallee = [&]() {
5523 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5524 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5525
5526 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5527 !isa_and_nonnull<GlobalIFunc>(GV);
5528 };
5529
5530 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5531 // a static relocation model causes some versions of GNU LD (2.17.50, at
5532 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5533 // built with secure-PLT.
5534 bool UsePlt =
5535 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5537
5538 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5539 const TargetMachine &TM = Subtarget.getTargetMachine();
5540 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5541 MCSymbolXCOFF *S =
5542 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5543
5545 return DAG.getMCSymbol(S, PtrVT);
5546 };
5547
5548 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5549 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5550 if (isFunctionGlobalAddress(GV)) {
5551 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5552
5553 if (Subtarget.isAIXABI()) {
5554 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5555 return getAIXFuncEntryPointSymbolSDNode(GV);
5556 }
5557 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5558 UsePlt ? PPCII::MO_PLT : 0);
5559 }
5560
5561 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5562 const char *SymName = S->getSymbol();
5563 if (Subtarget.isAIXABI()) {
5564 // If there exists a user-declared function whose name is the same as the
5565 // ExternalSymbol's, then we pick up the user-declared version.
5567 if (const Function *F =
5568 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5569 return getAIXFuncEntryPointSymbolSDNode(F);
5570
5571 // On AIX, direct function calls reference the symbol for the function's
5572 // entry point, which is named by prepending a "." before the function's
5573 // C-linkage name. A Qualname is returned here because an external
5574 // function entry point is a csect with XTY_ER property.
5575 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5576 auto &Context = DAG.getMachineFunction().getContext();
5577 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5578 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5580 return Sec->getQualNameSymbol();
5581 };
5582
5583 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5584 }
5585 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5586 UsePlt ? PPCII::MO_PLT : 0);
5587 }
5588
5589 // No transformation needed.
5590 assert(Callee.getNode() && "What no callee?");
5591 return Callee;
5592}
5593
5595 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5596 "Expected a CALLSEQ_STARTSDNode.");
5597
5598 // The last operand is the chain, except when the node has glue. If the node
5599 // has glue, then the last operand is the glue, and the chain is the second
5600 // last operand.
5601 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5602 if (LastValue.getValueType() != MVT::Glue)
5603 return LastValue;
5604
5605 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5606}
5607
5608// Creates the node that moves a functions address into the count register
5609// to prepare for an indirect call instruction.
5610static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5611 SDValue &Glue, SDValue &Chain,
5612 const SDLoc &dl) {
5613 SDValue MTCTROps[] = {Chain, Callee, Glue};
5614 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5615 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5616 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5617 // The glue is the second value produced.
5618 Glue = Chain.getValue(1);
5619}
5620
5622 SDValue &Glue, SDValue &Chain,
5623 SDValue CallSeqStart,
5624 const CallBase *CB, const SDLoc &dl,
5625 bool hasNest,
5626 const PPCSubtarget &Subtarget) {
5627 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5628 // entry point, but to the function descriptor (the function entry point
5629 // address is part of the function descriptor though).
5630 // The function descriptor is a three doubleword structure with the
5631 // following fields: function entry point, TOC base address and
5632 // environment pointer.
5633 // Thus for a call through a function pointer, the following actions need
5634 // to be performed:
5635 // 1. Save the TOC of the caller in the TOC save area of its stack
5636 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5637 // 2. Load the address of the function entry point from the function
5638 // descriptor.
5639 // 3. Load the TOC of the callee from the function descriptor into r2.
5640 // 4. Load the environment pointer from the function descriptor into
5641 // r11.
5642 // 5. Branch to the function entry point address.
5643 // 6. On return of the callee, the TOC of the caller needs to be
5644 // restored (this is done in FinishCall()).
5645 //
5646 // The loads are scheduled at the beginning of the call sequence, and the
5647 // register copies are flagged together to ensure that no other
5648 // operations can be scheduled in between. E.g. without flagging the
5649 // copies together, a TOC access in the caller could be scheduled between
5650 // the assignment of the callee TOC and the branch to the callee, which leads
5651 // to incorrect code.
5652
5653 // Start by loading the function address from the descriptor.
5654 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5655 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5659
5660 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5661
5662 // Registers used in building the DAG.
5663 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5664 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5665
5666 // Offsets of descriptor members.
5667 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5668 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5669
5670 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5671 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5672
5673 // One load for the functions entry point address.
5674 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5675 Alignment, MMOFlags);
5676
5677 // One for loading the TOC anchor for the module that contains the called
5678 // function.
5679 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5680 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5681 SDValue TOCPtr =
5682 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5683 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5684
5685 // One for loading the environment pointer.
5686 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5687 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5688 SDValue LoadEnvPtr =
5689 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5690 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5691
5692
5693 // Then copy the newly loaded TOC anchor to the TOC pointer.
5694 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5695 Chain = TOCVal.getValue(0);
5696 Glue = TOCVal.getValue(1);
5697
5698 // If the function call has an explicit 'nest' parameter, it takes the
5699 // place of the environment pointer.
5700 assert((!hasNest || !Subtarget.isAIXABI()) &&
5701 "Nest parameter is not supported on AIX.");
5702 if (!hasNest) {
5703 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5704 Chain = EnvVal.getValue(0);
5705 Glue = EnvVal.getValue(1);
5706 }
5707
5708 // The rest of the indirect call sequence is the same as the non-descriptor
5709 // DAG.
5710 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5711}
5712
5713static void
5715 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5716 SelectionDAG &DAG,
5717 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5718 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5719 const PPCSubtarget &Subtarget) {
5720 const bool IsPPC64 = Subtarget.isPPC64();
5721 // MVT for a general purpose register.
5722 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5723
5724 // First operand is always the chain.
5725 Ops.push_back(Chain);
5726
5727 // If it's a direct call pass the callee as the second operand.
5728 if (!CFlags.IsIndirect)
5729 Ops.push_back(Callee);
5730 else {
5731 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5732
5733 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5734 // on the stack (this would have been done in `LowerCall_64SVR4` or
5735 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5736 // represents both the indirect branch and a load that restores the TOC
5737 // pointer from the linkage area. The operand for the TOC restore is an add
5738 // of the TOC save offset to the stack pointer. This must be the second
5739 // operand: after the chain input but before any other variadic arguments.
5740 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5741 // saved or used.
5742 if (isTOCSaveRestoreRequired(Subtarget)) {
5743 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5744
5745 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5746 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5747 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5748 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5749 Ops.push_back(AddTOC);
5750 }
5751
5752 // Add the register used for the environment pointer.
5753 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5755 RegVT));
5756
5757
5758 // Add CTR register as callee so a bctr can be emitted later.
5759 if (CFlags.IsTailCall)
5760 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5761 }
5762
5763 // If this is a tail call add stack pointer delta.
5764 if (CFlags.IsTailCall)
5765 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5766
5767 // Add argument registers to the end of the list so that they are known live
5768 // into the call.
5769 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5770 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5771 RegsToPass[i].second.getValueType()));
5772
5773 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5774 // no way to mark dependencies as implicit here.
5775 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5776 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5777 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5778 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5779
5780 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5781 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5782 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5783
5784 // Add a register mask operand representing the call-preserved registers.
5785 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5786 const uint32_t *Mask =
5787 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5788 assert(Mask && "Missing call preserved mask for calling convention");
5789 Ops.push_back(DAG.getRegisterMask(Mask));
5790
5791 // If the glue is valid, it is the last operand.
5792 if (Glue.getNode())
5793 Ops.push_back(Glue);
5794}
5795
5796SDValue PPCTargetLowering::FinishCall(
5797 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5798 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5799 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5800 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5801 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5802
5803 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5804 Subtarget.isAIXABI())
5805 setUsesTOCBasePtr(DAG);
5806
5807 unsigned CallOpc =
5808 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5809 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5810
5811 if (!CFlags.IsIndirect)
5812 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5813 else if (Subtarget.usesFunctionDescriptors())
5814 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5815 dl, CFlags.HasNest, Subtarget);
5816 else
5817 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5818
5819 // Build the operand list for the call instruction.
5821 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5822 SPDiff, Subtarget);
5823
5824 // Emit tail call.
5825 if (CFlags.IsTailCall) {
5826 // Indirect tail call when using PC Relative calls do not have the same
5827 // constraints.
5828 assert(((Callee.getOpcode() == ISD::Register &&
5829 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5830 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5831 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5832 isa<ConstantSDNode>(Callee) ||
5833 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5834 "Expecting a global address, external symbol, absolute value, "
5835 "register or an indirect tail call when PC Relative calls are "
5836 "used.");
5837 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5838 assert(CallOpc == PPCISD::TC_RETURN &&
5839 "Unexpected call opcode for a tail call.");
5841 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5842 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5843 return Ret;
5844 }
5845
5846 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5847 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5848 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5849 Glue = Chain.getValue(1);
5850
5851 // When performing tail call optimization the callee pops its arguments off
5852 // the stack. Account for this here so these bytes can be pushed back on in
5853 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5854 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5856 ? NumBytes
5857 : 0;
5858
5859 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5860 Glue = Chain.getValue(1);
5861
5862 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5863 DAG, InVals);
5864}
5865
5867 CallingConv::ID CalleeCC = CB->getCallingConv();
5868 const Function *CallerFunc = CB->getCaller();
5869 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5870 const Function *CalleeFunc = CB->getCalledFunction();
5871 if (!CalleeFunc)
5872 return false;
5873 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5874
5877
5878 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5879 CalleeFunc->getAttributes(), Outs, *this,
5880 CalleeFunc->getDataLayout());
5881
5882 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5883 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5884 false /*isCalleeExternalSymbol*/);
5885}
5886
5887bool PPCTargetLowering::isEligibleForTCO(
5888 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5889 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5891 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5892 bool isCalleeExternalSymbol) const {
5893 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5894 return false;
5895
5896 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5897 return IsEligibleForTailCallOptimization_64SVR4(
5898 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5899 isCalleeExternalSymbol);
5900 else
5901 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5902 isVarArg, Ins);
5903}
5904
5905SDValue
5906PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5907 SmallVectorImpl<SDValue> &InVals) const {
5908 SelectionDAG &DAG = CLI.DAG;
5909 SDLoc &dl = CLI.DL;
5911 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5913 SDValue Chain = CLI.Chain;
5914 SDValue Callee = CLI.Callee;
5915 bool &isTailCall = CLI.IsTailCall;
5916 CallingConv::ID CallConv = CLI.CallConv;
5917 bool isVarArg = CLI.IsVarArg;
5918 bool isPatchPoint = CLI.IsPatchPoint;
5919 const CallBase *CB = CLI.CB;
5920
5921 if (isTailCall) {
5923 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5924 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5925 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5926 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5927
5928 isTailCall =
5929 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5930 &(MF.getFunction()), IsCalleeExternalSymbol);
5931 if (isTailCall) {
5932 ++NumTailCalls;
5933 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5934 ++NumSiblingCalls;
5935
5936 // PC Relative calls no longer guarantee that the callee is a Global
5937 // Address Node. The callee could be an indirect tail call in which
5938 // case the SDValue for the callee could be a load (to load the address
5939 // of a function pointer) or it may be a register copy (to move the
5940 // address of the callee from a function parameter into a virtual
5941 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5942 assert((Subtarget.isUsingPCRelativeCalls() ||
5943 isa<GlobalAddressSDNode>(Callee)) &&
5944 "Callee should be an llvm::Function object.");
5945
5946 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5947 << "\nTCO callee: ");
5948 LLVM_DEBUG(Callee.dump());
5949 }
5950 }
5951
5952 if (!isTailCall && CB && CB->isMustTailCall())
5953 report_fatal_error("failed to perform tail call elimination on a call "
5954 "site marked musttail");
5955
5956 // When long calls (i.e. indirect calls) are always used, calls are always
5957 // made via function pointer. If we have a function name, first translate it
5958 // into a pointer.
5959 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5960 !isTailCall)
5961 Callee = LowerGlobalAddress(Callee, DAG);
5962
5963 CallFlags CFlags(
5964 CallConv, isTailCall, isVarArg, isPatchPoint,
5965 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5966 // hasNest
5967 Subtarget.is64BitELFABI() &&
5968 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5969 CLI.NoMerge);
5970
5971 if (Subtarget.isAIXABI())
5972 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5973 InVals, CB);
5974
5975 assert(Subtarget.isSVR4ABI());
5976 if (Subtarget.isPPC64())
5977 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5978 InVals, CB);
5979 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5980 InVals, CB);
5981}
5982
5983SDValue PPCTargetLowering::LowerCall_32SVR4(
5984 SDValue Chain, SDValue Callee, CallFlags CFlags,
5986 const SmallVectorImpl<SDValue> &OutVals,
5987 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5989 const CallBase *CB) const {
5990 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5991 // of the 32-bit SVR4 ABI stack frame layout.
5992
5993 const CallingConv::ID CallConv = CFlags.CallConv;
5994 const bool IsVarArg = CFlags.IsVarArg;
5995 const bool IsTailCall = CFlags.IsTailCall;
5996
5997 assert((CallConv == CallingConv::C ||
5998 CallConv == CallingConv::Cold ||
5999 CallConv == CallingConv::Fast) && "Unknown calling convention!");
6000
6001 const Align PtrAlign(4);
6002
6004
6005 // Mark this function as potentially containing a function that contains a
6006 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6007 // and restoring the callers stack pointer in this functions epilog. This is
6008 // done because by tail calling the called function might overwrite the value
6009 // in this function's (MF) stack pointer stack slot 0(SP).
6010 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6011 CallConv == CallingConv::Fast)
6012 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6013
6014 // Count how many bytes are to be pushed on the stack, including the linkage
6015 // area, parameter list area and the part of the local variable space which
6016 // contains copies of aggregates which are passed by value.
6017
6018 // Assign locations to all of the outgoing arguments.
6020 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6021
6022 // Reserve space for the linkage area on the stack.
6023 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6024 PtrAlign);
6025 if (useSoftFloat())
6026 CCInfo.PreAnalyzeCallOperands(Outs);
6027
6028 if (IsVarArg) {
6029 // Handle fixed and variable vector arguments differently.
6030 // Fixed vector arguments go into registers as long as registers are
6031 // available. Variable vector arguments always go into memory.
6032 unsigned NumArgs = Outs.size();
6033
6034 for (unsigned i = 0; i != NumArgs; ++i) {
6035 MVT ArgVT = Outs[i].VT;
6036 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6037 bool Result;
6038
6039 if (Outs[i].IsFixed) {
6040 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6041 CCInfo);
6042 } else {
6044 ArgFlags, CCInfo);
6045 }
6046
6047 if (Result) {
6048#ifndef NDEBUG
6049 errs() << "Call operand #" << i << " has unhandled type "
6050 << ArgVT << "\n";
6051#endif
6052 llvm_unreachable(nullptr);
6053 }
6054 }
6055 } else {
6056 // All arguments are treated the same.
6057 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6058 }
6059 CCInfo.clearWasPPCF128();
6060
6061 // Assign locations to all of the outgoing aggregate by value arguments.
6062 SmallVector<CCValAssign, 16> ByValArgLocs;
6063 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6064
6065 // Reserve stack space for the allocations in CCInfo.
6066 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6067
6068 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6069
6070 // Size of the linkage area, parameter list area and the part of the local
6071 // space variable where copies of aggregates which are passed by value are
6072 // stored.
6073 unsigned NumBytes = CCByValInfo.getStackSize();
6074
6075 // Calculate by how many bytes the stack has to be adjusted in case of tail
6076 // call optimization.
6077 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6078
6079 // Adjust the stack pointer for the new arguments...
6080 // These operations are automatically eliminated by the prolog/epilog pass
6081 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6082 SDValue CallSeqStart = Chain;
6083
6084 // Load the return address and frame pointer so it can be moved somewhere else
6085 // later.
6086 SDValue LROp, FPOp;
6087 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6088
6089 // Set up a copy of the stack pointer for use loading and storing any
6090 // arguments that may not fit in the registers available for argument
6091 // passing.
6092 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6093
6095 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6096 SmallVector<SDValue, 8> MemOpChains;
6097
6098 bool seenFloatArg = false;
6099 // Walk the register/memloc assignments, inserting copies/loads.
6100 // i - Tracks the index into the list of registers allocated for the call
6101 // RealArgIdx - Tracks the index into the list of actual function arguments
6102 // j - Tracks the index into the list of byval arguments
6103 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6104 i != e;
6105 ++i, ++RealArgIdx) {
6106 CCValAssign &VA = ArgLocs[i];
6107 SDValue Arg = OutVals[RealArgIdx];
6108 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6109
6110 if (Flags.isByVal()) {
6111 // Argument is an aggregate which is passed by value, thus we need to
6112 // create a copy of it in the local variable space of the current stack
6113 // frame (which is the stack frame of the caller) and pass the address of
6114 // this copy to the callee.
6115 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6116 CCValAssign &ByValVA = ByValArgLocs[j++];
6117 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6118
6119 // Memory reserved in the local variable space of the callers stack frame.
6120 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6121
6122 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6123 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6124 StackPtr, PtrOff);
6125
6126 // Create a copy of the argument in the local area of the current
6127 // stack frame.
6128 SDValue MemcpyCall =
6129 CreateCopyOfByValArgument(Arg, PtrOff,
6130 CallSeqStart.getNode()->getOperand(0),
6131 Flags, DAG, dl);
6132
6133 // This must go outside the CALLSEQ_START..END.
6134 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6135 SDLoc(MemcpyCall));
6136 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6137 NewCallSeqStart.getNode());
6138 Chain = CallSeqStart = NewCallSeqStart;
6139
6140 // Pass the address of the aggregate copy on the stack either in a
6141 // physical register or in the parameter list area of the current stack
6142 // frame to the callee.
6143 Arg = PtrOff;
6144 }
6145
6146 // When useCRBits() is true, there can be i1 arguments.
6147 // It is because getRegisterType(MVT::i1) => MVT::i1,
6148 // and for other integer types getRegisterType() => MVT::i32.
6149 // Extend i1 and ensure callee will get i32.
6150 if (Arg.getValueType() == MVT::i1)
6151 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6152 dl, MVT::i32, Arg);
6153
6154 if (VA.isRegLoc()) {
6155 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6156 // Put argument in a physical register.
6157 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6158 bool IsLE = Subtarget.isLittleEndian();
6159 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6160 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6161 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6162 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6163 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6164 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6165 SVal.getValue(0)));
6166 } else
6167 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6168 } else {
6169 // Put argument in the parameter list area of the current stack frame.
6170 assert(VA.isMemLoc());
6171 unsigned LocMemOffset = VA.getLocMemOffset();
6172
6173 if (!IsTailCall) {
6174 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6175 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6176 StackPtr, PtrOff);
6177
6178 MemOpChains.push_back(
6179 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6180 } else {
6181 // Calculate and remember argument location.
6182 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6183 TailCallArguments);
6184 }
6185 }
6186 }
6187
6188 if (!MemOpChains.empty())
6189 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6190
6191 // Build a sequence of copy-to-reg nodes chained together with token chain
6192 // and flag operands which copy the outgoing args into the appropriate regs.
6193 SDValue InGlue;
6194 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6195 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6196 RegsToPass[i].second, InGlue);
6197 InGlue = Chain.getValue(1);
6198 }
6199
6200 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6201 // registers.
6202 if (IsVarArg) {
6203 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6204 SDValue Ops[] = { Chain, InGlue };
6205
6206 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6207 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6208
6209 InGlue = Chain.getValue(1);
6210 }
6211
6212 if (IsTailCall)
6213 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6214 TailCallArguments);
6215
6216 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6217 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6218}
6219
6220// Copy an argument into memory, being careful to do this outside the
6221// call sequence for the call to which the argument belongs.
6222SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6223 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6224 SelectionDAG &DAG, const SDLoc &dl) const {
6225 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6226 CallSeqStart.getNode()->getOperand(0),
6227 Flags, DAG, dl);
6228 // The MEMCPY must go outside the CALLSEQ_START..END.
6229 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6230 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6231 SDLoc(MemcpyCall));
6232 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6233 NewCallSeqStart.getNode());
6234 return NewCallSeqStart;
6235}
6236
6237SDValue PPCTargetLowering::LowerCall_64SVR4(
6238 SDValue Chain, SDValue Callee, CallFlags CFlags,
6240 const SmallVectorImpl<SDValue> &OutVals,
6241 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6243 const CallBase *CB) const {
6244 bool isELFv2ABI = Subtarget.isELFv2ABI();
6245 bool isLittleEndian = Subtarget.isLittleEndian();
6246 unsigned NumOps = Outs.size();
6247 bool IsSibCall = false;
6248 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6249
6250 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6251 unsigned PtrByteSize = 8;
6252
6254
6255 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6256 IsSibCall = true;
6257
6258 // Mark this function as potentially containing a function that contains a
6259 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6260 // and restoring the callers stack pointer in this functions epilog. This is
6261 // done because by tail calling the called function might overwrite the value
6262 // in this function's (MF) stack pointer stack slot 0(SP).
6263 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6264 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6265
6266 assert(!(IsFastCall && CFlags.IsVarArg) &&
6267 "fastcc not supported on varargs functions");
6268
6269 // Count how many bytes are to be pushed on the stack, including the linkage
6270 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6271 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6272 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6273 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6274 unsigned NumBytes = LinkageSize;
6275 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6276
6277 static const MCPhysReg GPR[] = {
6278 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6279 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6280 };
6281 static const MCPhysReg VR[] = {
6282 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6283 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6284 };
6285
6286 const unsigned NumGPRs = std::size(GPR);
6287 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6288 const unsigned NumVRs = std::size(VR);
6289
6290 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6291 // can be passed to the callee in registers.
6292 // For the fast calling convention, there is another check below.
6293 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6294 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6295 if (!HasParameterArea) {
6296 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6297 unsigned AvailableFPRs = NumFPRs;
6298 unsigned AvailableVRs = NumVRs;
6299 unsigned NumBytesTmp = NumBytes;
6300 for (unsigned i = 0; i != NumOps; ++i) {
6301 if (Outs[i].Flags.isNest()) continue;
6302 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6303 PtrByteSize, LinkageSize, ParamAreaSize,
6304 NumBytesTmp, AvailableFPRs, AvailableVRs))
6305 HasParameterArea = true;
6306 }
6307 }
6308
6309 // When using the fast calling convention, we don't provide backing for
6310 // arguments that will be in registers.
6311 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6312
6313 // Avoid allocating parameter area for fastcc functions if all the arguments
6314 // can be passed in the registers.
6315 if (IsFastCall)
6316 HasParameterArea = false;
6317
6318 // Add up all the space actually used.
6319 for (unsigned i = 0; i != NumOps; ++i) {
6320 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6321 EVT ArgVT = Outs[i].VT;
6322 EVT OrigVT = Outs[i].ArgVT;
6323
6324 if (Flags.isNest())
6325 continue;
6326
6327 if (IsFastCall) {
6328 if (Flags.isByVal()) {
6329 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6330 if (NumGPRsUsed > NumGPRs)
6331 HasParameterArea = true;
6332 } else {
6333 switch (ArgVT.getSimpleVT().SimpleTy) {
6334 default: llvm_unreachable("Unexpected ValueType for argument!");
6335 case MVT::i1:
6336 case MVT::i32:
6337 case MVT::i64:
6338 if (++NumGPRsUsed <= NumGPRs)
6339 continue;
6340 break;
6341 case MVT::v4i32:
6342 case MVT::v8i16:
6343 case MVT::v16i8:
6344 case MVT::v2f64:
6345 case MVT::v2i64:
6346 case MVT::v1i128:
6347 case MVT::f128:
6348 if (++NumVRsUsed <= NumVRs)
6349 continue;
6350 break;
6351 case MVT::v4f32:
6352 if (++NumVRsUsed <= NumVRs)
6353 continue;
6354 break;
6355 case MVT::f32:
6356 case MVT::f64:
6357 if (++NumFPRsUsed <= NumFPRs)
6358 continue;
6359 break;
6360 }
6361 HasParameterArea = true;
6362 }
6363 }
6364
6365 /* Respect alignment of argument on the stack. */
6366 auto Alignement =
6367 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6368 NumBytes = alignTo(NumBytes, Alignement);
6369
6370 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6371 if (Flags.isInConsecutiveRegsLast())
6372 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6373 }
6374
6375 unsigned NumBytesActuallyUsed = NumBytes;
6376
6377 // In the old ELFv1 ABI,
6378 // the prolog code of the callee may store up to 8 GPR argument registers to
6379 // the stack, allowing va_start to index over them in memory if its varargs.
6380 // Because we cannot tell if this is needed on the caller side, we have to
6381 // conservatively assume that it is needed. As such, make sure we have at
6382 // least enough stack space for the caller to store the 8 GPRs.
6383 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6384 // really requires memory operands, e.g. a vararg function.
6385 if (HasParameterArea)
6386 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6387 else
6388 NumBytes = LinkageSize;
6389
6390 // Tail call needs the stack to be aligned.
6391 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6392 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6393
6394 int SPDiff = 0;
6395
6396 // Calculate by how many bytes the stack has to be adjusted in case of tail
6397 // call optimization.
6398 if (!IsSibCall)
6399 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6400
6401 // To protect arguments on the stack from being clobbered in a tail call,
6402 // force all the loads to happen before doing any other lowering.
6403 if (CFlags.IsTailCall)
6404 Chain = DAG.getStackArgumentTokenFactor(Chain);
6405
6406 // Adjust the stack pointer for the new arguments...
6407 // These operations are automatically eliminated by the prolog/epilog pass
6408 if (!IsSibCall)
6409 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6410 SDValue CallSeqStart = Chain;
6411
6412 // Load the return address and frame pointer so it can be move somewhere else
6413 // later.
6414 SDValue LROp, FPOp;
6415 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6416
6417 // Set up a copy of the stack pointer for use loading and storing any
6418 // arguments that may not fit in the registers available for argument
6419 // passing.
6420 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6421
6422 // Figure out which arguments are going to go in registers, and which in
6423 // memory. Also, if this is a vararg function, floating point operations
6424 // must be stored to our stack, and loaded into integer regs as well, if
6425 // any integer regs are available for argument passing.
6426 unsigned ArgOffset = LinkageSize;
6427
6429 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6430
6431 SmallVector<SDValue, 8> MemOpChains;
6432 for (unsigned i = 0; i != NumOps; ++i) {
6433 SDValue Arg = OutVals[i];
6434 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6435 EVT ArgVT = Outs[i].VT;
6436 EVT OrigVT = Outs[i].ArgVT;
6437
6438 // PtrOff will be used to store the current argument to the stack if a
6439 // register cannot be found for it.
6440 SDValue PtrOff;
6441
6442 // We re-align the argument offset for each argument, except when using the
6443 // fast calling convention, when we need to make sure we do that only when
6444 // we'll actually use a stack slot.
6445 auto ComputePtrOff = [&]() {
6446 /* Respect alignment of argument on the stack. */
6447 auto Alignment =
6448 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6449 ArgOffset = alignTo(ArgOffset, Alignment);
6450
6451 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6452
6453 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6454 };
6455
6456 if (!IsFastCall) {
6457 ComputePtrOff();
6458
6459 /* Compute GPR index associated with argument offset. */
6460 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6461 GPR_idx = std::min(GPR_idx, NumGPRs);
6462 }
6463
6464 // Promote integers to 64-bit values.
6465 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6466 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6467 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6468 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6469 }
6470
6471 // FIXME memcpy is used way more than necessary. Correctness first.
6472 // Note: "by value" is code for passing a structure by value, not
6473 // basic types.
6474 if (Flags.isByVal()) {
6475 // Note: Size includes alignment padding, so
6476 // struct x { short a; char b; }
6477 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6478 // These are the proper values we need for right-justifying the
6479 // aggregate in a parameter register.
6480 unsigned Size = Flags.getByValSize();
6481
6482 // An empty aggregate parameter takes up no storage and no
6483 // registers.
6484 if (Size == 0)
6485 continue;
6486
6487 if (IsFastCall)
6488 ComputePtrOff();
6489
6490 // All aggregates smaller than 8 bytes must be passed right-justified.
6491 if (Size==1 || Size==2 || Size==4) {
6492 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6493 if (GPR_idx != NumGPRs) {
6494 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6495 MachinePointerInfo(), VT);
6496 MemOpChains.push_back(Load.getValue(1));
6497 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6498
6499 ArgOffset += PtrByteSize;
6500 continue;
6501 }
6502 }
6503
6504 if (GPR_idx == NumGPRs && Size < 8) {
6505 SDValue AddPtr = PtrOff;
6506 if (!isLittleEndian) {
6507 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6508 PtrOff.getValueType());
6509 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6510 }
6511 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6512 CallSeqStart,
6513 Flags, DAG, dl);
6514 ArgOffset += PtrByteSize;
6515 continue;
6516 }
6517 // Copy the object to parameter save area if it can not be entirely passed
6518 // by registers.
6519 // FIXME: we only need to copy the parts which need to be passed in
6520 // parameter save area. For the parts passed by registers, we don't need
6521 // to copy them to the stack although we need to allocate space for them
6522 // in parameter save area.
6523 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6524 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6525 CallSeqStart,
6526 Flags, DAG, dl);
6527
6528 // When a register is available, pass a small aggregate right-justified.
6529 if (Size < 8 && GPR_idx != NumGPRs) {
6530 // The easiest way to get this right-justified in a register
6531 // is to copy the structure into the rightmost portion of a
6532 // local variable slot, then load the whole slot into the
6533 // register.
6534 // FIXME: The memcpy seems to produce pretty awful code for
6535 // small aggregates, particularly for packed ones.
6536 // FIXME: It would be preferable to use the slot in the
6537 // parameter save area instead of a new local variable.
6538 SDValue AddPtr = PtrOff;
6539 if (!isLittleEndian) {
6540 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6541 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6542 }
6543 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6544 CallSeqStart,
6545 Flags, DAG, dl);
6546
6547 // Load the slot into the register.
6548 SDValue Load =
6549 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6550 MemOpChains.push_back(Load.getValue(1));
6551 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6552
6553 // Done with this argument.
6554 ArgOffset += PtrByteSize;
6555 continue;
6556 }
6557
6558 // For aggregates larger than PtrByteSize, copy the pieces of the
6559 // object that fit into registers from the parameter save area.
6560 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6561 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6562 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6563 if (GPR_idx != NumGPRs) {
6564 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6565 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6566 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6567 MachinePointerInfo(), ObjType);
6568
6569 MemOpChains.push_back(Load.getValue(1));
6570 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6571 ArgOffset += PtrByteSize;
6572 } else {
6573 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6574 break;
6575 }
6576 }
6577 continue;
6578 }
6579
6580 switch (Arg.getSimpleValueType().SimpleTy) {
6581 default: llvm_unreachable("Unexpected ValueType for argument!");
6582 case MVT::i1:
6583 case MVT::i32:
6584 case MVT::i64:
6585 if (Flags.isNest()) {
6586 // The 'nest' parameter, if any, is passed in R11.
6587 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6588 break;
6589 }
6590
6591 // These can be scalar arguments or elements of an integer array type
6592 // passed directly. Clang may use those instead of "byval" aggregate
6593 // types to avoid forcing arguments to memory unnecessarily.
6594 if (GPR_idx != NumGPRs) {
6595 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6596 } else {
6597 if (IsFastCall)
6598 ComputePtrOff();
6599
6600 assert(HasParameterArea &&
6601 "Parameter area must exist to pass an argument in memory.");
6602 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6603 true, CFlags.IsTailCall, false, MemOpChains,
6604 TailCallArguments, dl);
6605 if (IsFastCall)
6606 ArgOffset += PtrByteSize;
6607 }
6608 if (!IsFastCall)
6609 ArgOffset += PtrByteSize;
6610 break;
6611 case MVT::f32:
6612 case MVT::f64: {
6613 // These can be scalar arguments or elements of a float array type
6614 // passed directly. The latter are used to implement ELFv2 homogenous
6615 // float aggregates.
6616
6617 // Named arguments go into FPRs first, and once they overflow, the
6618 // remaining arguments go into GPRs and then the parameter save area.
6619 // Unnamed arguments for vararg functions always go to GPRs and
6620 // then the parameter save area. For now, put all arguments to vararg
6621 // routines always in both locations (FPR *and* GPR or stack slot).
6622 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6623 bool NeededLoad = false;
6624
6625 // First load the argument into the next available FPR.
6626 if (FPR_idx != NumFPRs)
6627 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6628
6629 // Next, load the argument into GPR or stack slot if needed.
6630 if (!NeedGPROrStack)
6631 ;
6632 else if (GPR_idx != NumGPRs && !IsFastCall) {
6633 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6634 // once we support fp <-> gpr moves.
6635
6636 // In the non-vararg case, this can only ever happen in the
6637 // presence of f32 array types, since otherwise we never run
6638 // out of FPRs before running out of GPRs.
6639 SDValue ArgVal;
6640
6641 // Double values are always passed in a single GPR.
6642 if (Arg.getValueType() != MVT::f32) {
6643 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6644
6645 // Non-array float values are extended and passed in a GPR.
6646 } else if (!Flags.isInConsecutiveRegs()) {
6647 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6648 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6649
6650 // If we have an array of floats, we collect every odd element
6651 // together with its predecessor into one GPR.
6652 } else if (ArgOffset % PtrByteSize != 0) {
6653 SDValue Lo, Hi;
6654 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6655 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6656 if (!isLittleEndian)
6657 std::swap(Lo, Hi);
6658 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6659
6660 // The final element, if even, goes into the first half of a GPR.
6661 } else if (Flags.isInConsecutiveRegsLast()) {
6662 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6663 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6664 if (!isLittleEndian)
6665 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6666 DAG.getConstant(32, dl, MVT::i32));
6667
6668 // Non-final even elements are skipped; they will be handled
6669 // together the with subsequent argument on the next go-around.
6670 } else
6671 ArgVal = SDValue();
6672
6673 if (ArgVal.getNode())
6674 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6675 } else {
6676 if (IsFastCall)
6677 ComputePtrOff();
6678
6679 // Single-precision floating-point values are mapped to the
6680 // second (rightmost) word of the stack doubleword.
6681 if (Arg.getValueType() == MVT::f32 &&
6682 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6683 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6684 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6685 }
6686
6687 assert(HasParameterArea &&
6688 "Parameter area must exist to pass an argument in memory.");
6689 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6690 true, CFlags.IsTailCall, false, MemOpChains,
6691 TailCallArguments, dl);
6692
6693 NeededLoad = true;
6694 }
6695 // When passing an array of floats, the array occupies consecutive
6696 // space in the argument area; only round up to the next doubleword
6697 // at the end of the array. Otherwise, each float takes 8 bytes.
6698 if (!IsFastCall || NeededLoad) {
6699 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6700 Flags.isInConsecutiveRegs()) ? 4 : 8;
6701 if (Flags.isInConsecutiveRegsLast())
6702 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6703 }
6704 break;
6705 }
6706 case MVT::v4f32:
6707 case MVT::v4i32:
6708 case MVT::v8i16:
6709 case MVT::v16i8:
6710 case MVT::v2f64:
6711 case MVT::v2i64:
6712 case MVT::v1i128:
6713 case MVT::f128:
6714 // These can be scalar arguments or elements of a vector array type
6715 // passed directly. The latter are used to implement ELFv2 homogenous
6716 // vector aggregates.
6717
6718 // For a varargs call, named arguments go into VRs or on the stack as
6719 // usual; unnamed arguments always go to the stack or the corresponding
6720 // GPRs when within range. For now, we always put the value in both
6721 // locations (or even all three).
6722 if (CFlags.IsVarArg) {
6723 assert(HasParameterArea &&
6724 "Parameter area must exist if we have a varargs call.");
6725 // We could elide this store in the case where the object fits
6726 // entirely in R registers. Maybe later.
6727 SDValue Store =
6728 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6729 MemOpChains.push_back(Store);
6730 if (VR_idx != NumVRs) {
6731 SDValue Load =
6732 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6733 MemOpChains.push_back(Load.getValue(1));
6734 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6735 }
6736 ArgOffset += 16;
6737 for (unsigned i=0; i<16; i+=PtrByteSize) {
6738 if (GPR_idx == NumGPRs)
6739 break;
6740 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6741 DAG.getConstant(i, dl, PtrVT));
6742 SDValue Load =
6743 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6744 MemOpChains.push_back(Load.getValue(1));
6745 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6746 }
6747 break;
6748 }
6749
6750 // Non-varargs Altivec params go into VRs or on the stack.
6751 if (VR_idx != NumVRs) {
6752 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6753 } else {
6754 if (IsFastCall)
6755 ComputePtrOff();
6756
6757 assert(HasParameterArea &&
6758 "Parameter area must exist to pass an argument in memory.");
6759 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6760 true, CFlags.IsTailCall, true, MemOpChains,
6761 TailCallArguments, dl);
6762 if (IsFastCall)
6763 ArgOffset += 16;
6764 }
6765
6766 if (!IsFastCall)
6767 ArgOffset += 16;
6768 break;
6769 }
6770 }
6771
6772 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6773 "mismatch in size of parameter area");
6774 (void)NumBytesActuallyUsed;
6775
6776 if (!MemOpChains.empty())
6777 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6778
6779 // Check if this is an indirect call (MTCTR/BCTRL).
6780 // See prepareDescriptorIndirectCall and buildCallOperands for more
6781 // information about calls through function pointers in the 64-bit SVR4 ABI.
6782 if (CFlags.IsIndirect) {
6783 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6784 // caller in the TOC save area.
6785 if (isTOCSaveRestoreRequired(Subtarget)) {
6786 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6787 // Load r2 into a virtual register and store it to the TOC save area.
6788 setUsesTOCBasePtr(DAG);
6789 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6790 // TOC save area offset.
6791 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6792 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6793 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6794 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6796 DAG.getMachineFunction(), TOCSaveOffset));
6797 }
6798 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6799 // This does not mean the MTCTR instruction must use R12; it's easier
6800 // to model this as an extra parameter, so do that.
6801 if (isELFv2ABI && !CFlags.IsPatchPoint)
6802 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6803 }
6804
6805 // Build a sequence of copy-to-reg nodes chained together with token chain
6806 // and flag operands which copy the outgoing args into the appropriate regs.
6807 SDValue InGlue;
6808 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6809 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6810 RegsToPass[i].second, InGlue);
6811 InGlue = Chain.getValue(1);
6812 }
6813
6814 if (CFlags.IsTailCall && !IsSibCall)
6815 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6816 TailCallArguments);
6817
6818 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6819 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6820}
6821
6822// Returns true when the shadow of a general purpose argument register
6823// in the parameter save area is aligned to at least 'RequiredAlign'.
6824static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6825 assert(RequiredAlign.value() <= 16 &&
6826 "Required alignment greater than stack alignment.");
6827 switch (Reg) {
6828 default:
6829 report_fatal_error("called on invalid register.");
6830 case PPC::R5:
6831 case PPC::R9:
6832 case PPC::X3:
6833 case PPC::X5:
6834 case PPC::X7:
6835 case PPC::X9:
6836 // These registers are 16 byte aligned which is the most strict aligment
6837 // we can support.
6838 return true;
6839 case PPC::R3:
6840 case PPC::R7:
6841 case PPC::X4:
6842 case PPC::X6:
6843 case PPC::X8:
6844 case PPC::X10:
6845 // The shadow of these registers in the PSA is 8 byte aligned.
6846 return RequiredAlign <= 8;
6847 case PPC::R4:
6848 case PPC::R6:
6849 case PPC::R8:
6850 case PPC::R10:
6851 return RequiredAlign <= 4;
6852 }
6853}
6854
6855static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6856 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6857 CCState &S) {
6858 AIXCCState &State = static_cast<AIXCCState &>(S);
6859 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6861 const bool IsPPC64 = Subtarget.isPPC64();
6862 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6863 const Align PtrAlign(PtrSize);
6864 const Align StackAlign(16);
6865 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6866
6867 if (ValVT == MVT::f128)
6868 report_fatal_error("f128 is unimplemented on AIX.");
6869
6870 if (ArgFlags.isNest())
6871 report_fatal_error("Nest arguments are unimplemented.");
6872
6873 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6874 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6875 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6876 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6877 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6878 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6879
6880 static const MCPhysReg VR[] = {// Vector registers.
6881 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6882 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6883 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6884
6885 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6886
6887 if (ArgFlags.isByVal()) {
6888 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6889 if (ByValAlign > StackAlign)
6890 report_fatal_error("Pass-by-value arguments with alignment greater than "
6891 "16 are not supported.");
6892
6893 const unsigned ByValSize = ArgFlags.getByValSize();
6894 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6895
6896 // An empty aggregate parameter takes up no storage and no registers,
6897 // but needs a MemLoc for a stack slot for the formal arguments side.
6898 if (ByValSize == 0) {
6900 State.getStackSize(), RegVT, LocInfo));
6901 return false;
6902 }
6903
6904 // Shadow allocate any registers that are not properly aligned.
6905 unsigned NextReg = State.getFirstUnallocated(GPRs);
6906 while (NextReg != GPRs.size() &&
6907 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6908 // Shadow allocate next registers since its aligment is not strict enough.
6909 unsigned Reg = State.AllocateReg(GPRs);
6910 // Allocate the stack space shadowed by said register.
6911 State.AllocateStack(PtrSize, PtrAlign);
6912 assert(Reg && "Alocating register unexpectedly failed.");
6913 (void)Reg;
6914 NextReg = State.getFirstUnallocated(GPRs);
6915 }
6916
6917 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6918 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6919 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6920 if (unsigned Reg = State.AllocateReg(GPRs))
6921 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6922 else {
6925 LocInfo));
6926 break;
6927 }
6928 }
6929 return false;
6930 }
6931
6932 // Arguments always reserve parameter save area.
6933 switch (ValVT.SimpleTy) {
6934 default:
6935 report_fatal_error("Unhandled value type for argument.");
6936 case MVT::i64:
6937 // i64 arguments should have been split to i32 for PPC32.
6938 assert(IsPPC64 && "PPC32 should have split i64 values.");
6939 [[fallthrough]];
6940 case MVT::i1:
6941 case MVT::i32: {
6942 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6943 // AIX integer arguments are always passed in register width.
6944 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6945 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6947 if (unsigned Reg = State.AllocateReg(GPRs))
6948 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6949 else
6950 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6951
6952 return false;
6953 }
6954 case MVT::f32:
6955 case MVT::f64: {
6956 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6957 const unsigned StoreSize = LocVT.getStoreSize();
6958 // Floats are always 4-byte aligned in the PSA on AIX.
6959 // This includes f64 in 64-bit mode for ABI compatibility.
6960 const unsigned Offset =
6961 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6962 unsigned FReg = State.AllocateReg(FPR);
6963 if (FReg)
6964 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6965
6966 // Reserve and initialize GPRs or initialize the PSA as required.
6967 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6968 if (unsigned Reg = State.AllocateReg(GPRs)) {
6969 assert(FReg && "An FPR should be available when a GPR is reserved.");
6970 if (State.isVarArg()) {
6971 // Successfully reserved GPRs are only initialized for vararg calls.
6972 // Custom handling is required for:
6973 // f64 in PPC32 needs to be split into 2 GPRs.
6974 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6975 State.addLoc(
6976 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6977 }
6978 } else {
6979 // If there are insufficient GPRs, the PSA needs to be initialized.
6980 // Initialization occurs even if an FPR was initialized for
6981 // compatibility with the AIX XL compiler. The full memory for the
6982 // argument will be initialized even if a prior word is saved in GPR.
6983 // A custom memLoc is used when the argument also passes in FPR so
6984 // that the callee handling can skip over it easily.
6985 State.addLoc(
6986 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6987 LocInfo)
6988 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6989 break;
6990 }
6991 }
6992
6993 return false;
6994 }
6995 case MVT::v4f32:
6996 case MVT::v4i32:
6997 case MVT::v8i16:
6998 case MVT::v16i8:
6999 case MVT::v2i64:
7000 case MVT::v2f64:
7001 case MVT::v1i128: {
7002 const unsigned VecSize = 16;
7003 const Align VecAlign(VecSize);
7004
7005 if (!State.isVarArg()) {
7006 // If there are vector registers remaining we don't consume any stack
7007 // space.
7008 if (unsigned VReg = State.AllocateReg(VR)) {
7009 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7010 return false;
7011 }
7012 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7013 // might be allocated in the portion of the PSA that is shadowed by the
7014 // GPRs.
7015 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7016 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7017 return false;
7018 }
7019
7020 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7021 // Burn any underaligned registers and their shadowed stack space until
7022 // we reach the required alignment.
7023 while (NextRegIndex != GPRs.size() &&
7024 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7025 // Shadow allocate register and its stack shadow.
7026 unsigned Reg = State.AllocateReg(GPRs);
7027 State.AllocateStack(PtrSize, PtrAlign);
7028 assert(Reg && "Allocating register unexpectedly failed.");
7029 (void)Reg;
7030 NextRegIndex = State.getFirstUnallocated(GPRs);
7031 }
7032
7033 // Vectors that are passed as fixed arguments are handled differently.
7034 // They are passed in VRs if any are available (unlike arguments passed
7035 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7036 // functions)
7037 if (State.isFixed(ValNo)) {
7038 if (unsigned VReg = State.AllocateReg(VR)) {
7039 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7040 // Shadow allocate GPRs and stack space even though we pass in a VR.
7041 for (unsigned I = 0; I != VecSize; I += PtrSize)
7042 State.AllocateReg(GPRs);
7043 State.AllocateStack(VecSize, VecAlign);
7044 return false;
7045 }
7046 // No vector registers remain so pass on the stack.
7047 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7048 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7049 return false;
7050 }
7051
7052 // If all GPRS are consumed then we pass the argument fully on the stack.
7053 if (NextRegIndex == GPRs.size()) {
7054 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7055 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7056 return false;
7057 }
7058
7059 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7060 // half of the argument, and then need to pass the remaining half on the
7061 // stack.
7062 if (GPRs[NextRegIndex] == PPC::R9) {
7063 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7064 State.addLoc(
7065 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7066
7067 const unsigned FirstReg = State.AllocateReg(PPC::R9);
7068 const unsigned SecondReg = State.AllocateReg(PPC::R10);
7069 assert(FirstReg && SecondReg &&
7070 "Allocating R9 or R10 unexpectedly failed.");
7071 State.addLoc(
7072 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7073 State.addLoc(
7074 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7075 return false;
7076 }
7077
7078 // We have enough GPRs to fully pass the vector argument, and we have
7079 // already consumed any underaligned registers. Start with the custom
7080 // MemLoc and then the custom RegLocs.
7081 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7082 State.addLoc(
7083 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7084 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7085 const unsigned Reg = State.AllocateReg(GPRs);
7086 assert(Reg && "Failed to allocated register for vararg vector argument");
7087 State.addLoc(
7088 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7089 }
7090 return false;
7091 }
7092 }
7093 return true;
7094}
7095
7096// So far, this function is only used by LowerFormalArguments_AIX()
7098 bool IsPPC64,
7099 bool HasP8Vector,
7100 bool HasVSX) {
7101 assert((IsPPC64 || SVT != MVT::i64) &&
7102 "i64 should have been split for 32-bit codegen.");
7103
7104 switch (SVT) {
7105 default:
7106 report_fatal_error("Unexpected value type for formal argument");
7107 case MVT::i1:
7108 case MVT::i32:
7109 case MVT::i64:
7110 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7111 case MVT::f32:
7112 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7113 case MVT::f64:
7114 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7115 case MVT::v4f32:
7116 case MVT::v4i32:
7117 case MVT::v8i16:
7118 case MVT::v16i8:
7119 case MVT::v2i64:
7120 case MVT::v2f64:
7121 case MVT::v1i128:
7122 return &PPC::VRRCRegClass;
7123 }
7124}
7125
7127 SelectionDAG &DAG, SDValue ArgValue,
7128 MVT LocVT, const SDLoc &dl) {
7129 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7130 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7131
7132 if (Flags.isSExt())
7133 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7134 DAG.getValueType(ValVT));
7135 else if (Flags.isZExt())
7136 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7137 DAG.getValueType(ValVT));
7138
7139 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7140}
7141
7142static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7143 const unsigned LASize = FL->getLinkageSize();
7144
7145 if (PPC::GPRCRegClass.contains(Reg)) {
7146 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7147 "Reg must be a valid argument register!");
7148 return LASize + 4 * (Reg - PPC::R3);
7149 }
7150
7151 if (PPC::G8RCRegClass.contains(Reg)) {
7152 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7153 "Reg must be a valid argument register!");
7154 return LASize + 8 * (Reg - PPC::X3);
7155 }
7156
7157 llvm_unreachable("Only general purpose registers expected.");
7158}
7159
7160// AIX ABI Stack Frame Layout:
7161//
7162// Low Memory +--------------------------------------------+
7163// SP +---> | Back chain | ---+
7164// | +--------------------------------------------+ |
7165// | | Saved Condition Register | |
7166// | +--------------------------------------------+ |
7167// | | Saved Linkage Register | |
7168// | +--------------------------------------------+ | Linkage Area
7169// | | Reserved for compilers | |
7170// | +--------------------------------------------+ |
7171// | | Reserved for binders | |
7172// | +--------------------------------------------+ |
7173// | | Saved TOC pointer | ---+
7174// | +--------------------------------------------+
7175// | | Parameter save area |
7176// | +--------------------------------------------+
7177// | | Alloca space |
7178// | +--------------------------------------------+
7179// | | Local variable space |
7180// | +--------------------------------------------+
7181// | | Float/int conversion temporary |
7182// | +--------------------------------------------+
7183// | | Save area for AltiVec registers |
7184// | +--------------------------------------------+
7185// | | AltiVec alignment padding |
7186// | +--------------------------------------------+
7187// | | Save area for VRSAVE register |
7188// | +--------------------------------------------+
7189// | | Save area for General Purpose registers |
7190// | +--------------------------------------------+
7191// | | Save area for Floating Point registers |
7192// | +--------------------------------------------+
7193// +---- | Back chain |
7194// High Memory +--------------------------------------------+
7195//
7196// Specifications:
7197// AIX 7.2 Assembler Language Reference
7198// Subroutine linkage convention
7199
7200SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7201 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7202 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7203 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7204
7205 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7206 CallConv == CallingConv::Fast) &&
7207 "Unexpected calling convention!");
7208
7209 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7210 report_fatal_error("Tail call support is unimplemented on AIX.");
7211
7212 if (useSoftFloat())
7213 report_fatal_error("Soft float support is unimplemented on AIX.");
7214
7215 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7216
7217 const bool IsPPC64 = Subtarget.isPPC64();
7218 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7219
7220 // Assign locations to all of the incoming arguments.
7223 MachineFrameInfo &MFI = MF.getFrameInfo();
7224 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7225 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7226
7227 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7228 // Reserve space for the linkage area on the stack.
7229 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7230 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7231 uint64_t SaveStackPos = CCInfo.getStackSize();
7232 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7233 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7234
7236
7237 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7238 CCValAssign &VA = ArgLocs[I++];
7239 MVT LocVT = VA.getLocVT();
7240 MVT ValVT = VA.getValVT();
7241 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7242 // For compatibility with the AIX XL compiler, the float args in the
7243 // parameter save area are initialized even if the argument is available
7244 // in register. The caller is required to initialize both the register
7245 // and memory, however, the callee can choose to expect it in either.
7246 // The memloc is dismissed here because the argument is retrieved from
7247 // the register.
7248 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7249 continue;
7250
7251 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7252 const TargetRegisterClass *RegClass = getRegClassForSVT(
7253 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7254 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7255 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7256 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7257 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7258 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7259 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7260 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7261 MachinePointerInfo(), Align(PtrByteSize));
7262 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7263 MemOps.push_back(StoreReg);
7264 }
7265
7266 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7267 unsigned StoreSize =
7268 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7269 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7270 }
7271
7272 auto HandleMemLoc = [&]() {
7273 const unsigned LocSize = LocVT.getStoreSize();
7274 const unsigned ValSize = ValVT.getStoreSize();
7275 assert((ValSize <= LocSize) &&
7276 "Object size is larger than size of MemLoc");
7277 int CurArgOffset = VA.getLocMemOffset();
7278 // Objects are right-justified because AIX is big-endian.
7279 if (LocSize > ValSize)
7280 CurArgOffset += LocSize - ValSize;
7281 // Potential tail calls could cause overwriting of argument stack slots.
7282 const bool IsImmutable =
7284 (CallConv == CallingConv::Fast));
7285 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7286 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7287 SDValue ArgValue =
7288 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7289 InVals.push_back(ArgValue);
7290 };
7291
7292 // Vector arguments to VaArg functions are passed both on the stack, and
7293 // in any available GPRs. Load the value from the stack and add the GPRs
7294 // as live ins.
7295 if (VA.isMemLoc() && VA.needsCustom()) {
7296 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7297 assert(isVarArg && "Only use custom memloc for vararg.");
7298 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7299 // matching custom RegLocs.
7300 const unsigned OriginalValNo = VA.getValNo();
7301 (void)OriginalValNo;
7302
7303 auto HandleCustomVecRegLoc = [&]() {
7304 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7305 "Missing custom RegLoc.");
7306 VA = ArgLocs[I++];
7307 assert(VA.getValVT().isVector() &&
7308 "Unexpected Val type for custom RegLoc.");
7309 assert(VA.getValNo() == OriginalValNo &&
7310 "ValNo mismatch between custom MemLoc and RegLoc.");
7312 MF.addLiveIn(VA.getLocReg(),
7313 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7314 Subtarget.hasVSX()));
7315 };
7316
7317 HandleMemLoc();
7318 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7319 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7320 // R10.
7321 HandleCustomVecRegLoc();
7322 HandleCustomVecRegLoc();
7323
7324 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7325 // we passed the vector in R5, R6, R7 and R8.
7326 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7327 assert(!IsPPC64 &&
7328 "Only 2 custom RegLocs expected for 64-bit codegen.");
7329 HandleCustomVecRegLoc();
7330 HandleCustomVecRegLoc();
7331 }
7332
7333 continue;
7334 }
7335
7336 if (VA.isRegLoc()) {
7337 if (VA.getValVT().isScalarInteger())
7339 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7340 switch (VA.getValVT().SimpleTy) {
7341 default:
7342 report_fatal_error("Unhandled value type for argument.");
7343 case MVT::f32:
7345 break;
7346 case MVT::f64:
7348 break;
7349 }
7350 } else if (VA.getValVT().isVector()) {
7351 switch (VA.getValVT().SimpleTy) {
7352 default:
7353 report_fatal_error("Unhandled value type for argument.");
7354 case MVT::v16i8:
7356 break;
7357 case MVT::v8i16:
7359 break;
7360 case MVT::v4i32:
7361 case MVT::v2i64:
7362 case MVT::v1i128:
7364 break;
7365 case MVT::v4f32:
7366 case MVT::v2f64:
7368 break;
7369 }
7370 }
7371 }
7372
7373 if (Flags.isByVal() && VA.isMemLoc()) {
7374 const unsigned Size =
7375 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7376 PtrByteSize);
7377 const int FI = MF.getFrameInfo().CreateFixedObject(
7378 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7379 /* IsAliased */ true);
7380 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7381 InVals.push_back(FIN);
7382
7383 continue;
7384 }
7385
7386 if (Flags.isByVal()) {
7387 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7388
7389 const MCPhysReg ArgReg = VA.getLocReg();
7390 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7391
7392 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7393 const int FI = MF.getFrameInfo().CreateFixedObject(
7394 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7395 /* IsAliased */ true);
7396 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7397 InVals.push_back(FIN);
7398
7399 // Add live ins for all the RegLocs for the same ByVal.
7400 const TargetRegisterClass *RegClass =
7401 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7402
7403 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7404 unsigned Offset) {
7405 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7406 // Since the callers side has left justified the aggregate in the
7407 // register, we can simply store the entire register into the stack
7408 // slot.
7409 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7410 // The store to the fixedstack object is needed becuase accessing a
7411 // field of the ByVal will use a gep and load. Ideally we will optimize
7412 // to extracting the value from the register directly, and elide the
7413 // stores when the arguments address is not taken, but that will need to
7414 // be future work.
7415 SDValue Store = DAG.getStore(
7416 CopyFrom.getValue(1), dl, CopyFrom,
7419
7420 MemOps.push_back(Store);
7421 };
7422
7423 unsigned Offset = 0;
7424 HandleRegLoc(VA.getLocReg(), Offset);
7425 Offset += PtrByteSize;
7426 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7427 Offset += PtrByteSize) {
7428 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7429 "RegLocs should be for ByVal argument.");
7430
7431 const CCValAssign RL = ArgLocs[I++];
7432 HandleRegLoc(RL.getLocReg(), Offset);
7434 }
7435
7436 if (Offset != StackSize) {
7437 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7438 "Expected MemLoc for remaining bytes.");
7439 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7440 // Consume the MemLoc.The InVal has already been emitted, so nothing
7441 // more needs to be done.
7442 ++I;
7443 }
7444
7445 continue;
7446 }
7447
7448 if (VA.isRegLoc() && !VA.needsCustom()) {
7449 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7450 Register VReg =
7451 MF.addLiveIn(VA.getLocReg(),
7452 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7453 Subtarget.hasVSX()));
7454 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7455 if (ValVT.isScalarInteger() &&
7456 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7457 ArgValue =
7458 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7459 }
7460 InVals.push_back(ArgValue);
7461 continue;
7462 }
7463 if (VA.isMemLoc()) {
7464 HandleMemLoc();
7465 continue;
7466 }
7467 }
7468
7469 // On AIX a minimum of 8 words is saved to the parameter save area.
7470 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7471 // Area that is at least reserved in the caller of this function.
7472 unsigned CallerReservedArea = std::max<unsigned>(
7473 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7474
7475 // Set the size that is at least reserved in caller of this function. Tail
7476 // call optimized function's reserved stack space needs to be aligned so
7477 // that taking the difference between two stack areas will result in an
7478 // aligned stack.
7479 CallerReservedArea =
7480 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7481 FuncInfo->setMinReservedArea(CallerReservedArea);
7482
7483 if (isVarArg) {
7484 FuncInfo->setVarArgsFrameIndex(
7485 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7486 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7487
7488 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7489 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7490
7491 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7492 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7493 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7494
7495 // The fixed integer arguments of a variadic function are stored to the
7496 // VarArgsFrameIndex on the stack so that they may be loaded by
7497 // dereferencing the result of va_next.
7498 for (unsigned GPRIndex =
7499 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7500 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7501
7502 const Register VReg =
7503 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7504 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7505
7506 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7507 SDValue Store =
7508 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7509 MemOps.push_back(Store);
7510 // Increment the address for the next argument to store.
7511 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7512 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7513 }
7514 }
7515
7516 if (!MemOps.empty())
7517 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7518
7519 return Chain;
7520}
7521
7522SDValue PPCTargetLowering::LowerCall_AIX(
7523 SDValue Chain, SDValue Callee, CallFlags CFlags,
7525 const SmallVectorImpl<SDValue> &OutVals,
7526 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7528 const CallBase *CB) const {
7529 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7530 // AIX ABI stack frame layout.
7531
7532 assert((CFlags.CallConv == CallingConv::C ||
7533 CFlags.CallConv == CallingConv::Cold ||
7534 CFlags.CallConv == CallingConv::Fast) &&
7535 "Unexpected calling convention!");
7536
7537 if (CFlags.IsPatchPoint)
7538 report_fatal_error("This call type is unimplemented on AIX.");
7539
7540 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7541
7544 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7545 *DAG.getContext());
7546
7547 // Reserve space for the linkage save area (LSA) on the stack.
7548 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7549 // [SP][CR][LR][2 x reserved][TOC].
7550 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7551 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7552 const bool IsPPC64 = Subtarget.isPPC64();
7553 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7554 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7555 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7556 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7557
7558 // The prolog code of the callee may store up to 8 GPR argument registers to
7559 // the stack, allowing va_start to index over them in memory if the callee
7560 // is variadic.
7561 // Because we cannot tell if this is needed on the caller side, we have to
7562 // conservatively assume that it is needed. As such, make sure we have at
7563 // least enough stack space for the caller to store the 8 GPRs.
7564 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7565 const unsigned NumBytes = std::max<unsigned>(
7566 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7567
7568 // Adjust the stack pointer for the new arguments...
7569 // These operations are automatically eliminated by the prolog/epilog pass.
7570 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7571 SDValue CallSeqStart = Chain;
7572
7574 SmallVector<SDValue, 8> MemOpChains;
7575
7576 // Set up a copy of the stack pointer for loading and storing any
7577 // arguments that may not fit in the registers available for argument
7578 // passing.
7579 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7580 : DAG.getRegister(PPC::R1, MVT::i32);
7581
7582 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7583 const unsigned ValNo = ArgLocs[I].getValNo();
7584 SDValue Arg = OutVals[ValNo];
7585 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7586
7587 if (Flags.isByVal()) {
7588 const unsigned ByValSize = Flags.getByValSize();
7589
7590 // Nothing to do for zero-sized ByVals on the caller side.
7591 if (!ByValSize) {
7592 ++I;
7593 continue;
7594 }
7595
7596 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7597 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7598 (LoadOffset != 0)
7599 ? DAG.getObjectPtrOffset(
7600 dl, Arg, TypeSize::getFixed(LoadOffset))
7601 : Arg,
7602 MachinePointerInfo(), VT);
7603 };
7604
7605 unsigned LoadOffset = 0;
7606
7607 // Initialize registers, which are fully occupied by the by-val argument.
7608 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7609 SDValue Load = GetLoad(PtrVT, LoadOffset);
7610 MemOpChains.push_back(Load.getValue(1));
7611 LoadOffset += PtrByteSize;
7612 const CCValAssign &ByValVA = ArgLocs[I++];
7613 assert(ByValVA.getValNo() == ValNo &&
7614 "Unexpected location for pass-by-value argument.");
7615 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7616 }
7617
7618 if (LoadOffset == ByValSize)
7619 continue;
7620
7621 // There must be one more loc to handle the remainder.
7622 assert(ArgLocs[I].getValNo() == ValNo &&
7623 "Expected additional location for by-value argument.");
7624
7625 if (ArgLocs[I].isMemLoc()) {
7626 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7627 const CCValAssign &ByValVA = ArgLocs[I++];
7628 ISD::ArgFlagsTy MemcpyFlags = Flags;
7629 // Only memcpy the bytes that don't pass in register.
7630 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7631 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7632 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7633 dl, Arg, TypeSize::getFixed(LoadOffset))
7634 : Arg,
7636 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7637 CallSeqStart, MemcpyFlags, DAG, dl);
7638 continue;
7639 }
7640
7641 // Initialize the final register residue.
7642 // Any residue that occupies the final by-val arg register must be
7643 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7644 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7645 // 2 and 1 byte loads.
7646 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7647 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7648 "Unexpected register residue for by-value argument.");
7649 SDValue ResidueVal;
7650 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7651 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7652 const MVT VT =
7653 N == 1 ? MVT::i8
7654 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7655 SDValue Load = GetLoad(VT, LoadOffset);
7656 MemOpChains.push_back(Load.getValue(1));
7657 LoadOffset += N;
7658 Bytes += N;
7659
7660 // By-val arguments are passed left-justfied in register.
7661 // Every load here needs to be shifted, otherwise a full register load
7662 // should have been used.
7663 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7664 "Unexpected load emitted during handling of pass-by-value "
7665 "argument.");
7666 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7667 EVT ShiftAmountTy =
7668 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7669 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7670 SDValue ShiftedLoad =
7671 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7672 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7673 ShiftedLoad)
7674 : ShiftedLoad;
7675 }
7676
7677 const CCValAssign &ByValVA = ArgLocs[I++];
7678 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7679 continue;
7680 }
7681
7682 CCValAssign &VA = ArgLocs[I++];
7683 const MVT LocVT = VA.getLocVT();
7684 const MVT ValVT = VA.getValVT();
7685
7686 switch (VA.getLocInfo()) {
7687 default:
7688 report_fatal_error("Unexpected argument extension type.");
7689 case CCValAssign::Full:
7690 break;
7691 case CCValAssign::ZExt:
7692 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7693 break;
7694 case CCValAssign::SExt:
7695 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7696 break;
7697 }
7698
7699 if (VA.isRegLoc() && !VA.needsCustom()) {
7700 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7701 continue;
7702 }
7703
7704 // Vector arguments passed to VarArg functions need custom handling when
7705 // they are passed (at least partially) in GPRs.
7706 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7707 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7708 // Store value to its stack slot.
7709 SDValue PtrOff =
7710 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7711 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7712 SDValue Store =
7713 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7714 MemOpChains.push_back(Store);
7715 const unsigned OriginalValNo = VA.getValNo();
7716 // Then load the GPRs from the stack
7717 unsigned LoadOffset = 0;
7718 auto HandleCustomVecRegLoc = [&]() {
7719 assert(I != E && "Unexpected end of CCvalAssigns.");
7720 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7721 "Expected custom RegLoc.");
7722 CCValAssign RegVA = ArgLocs[I++];
7723 assert(RegVA.getValNo() == OriginalValNo &&
7724 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7725 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7726 DAG.getConstant(LoadOffset, dl, PtrVT));
7727 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7728 MemOpChains.push_back(Load.getValue(1));
7729 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7730 LoadOffset += PtrByteSize;
7731 };
7732
7733 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7734 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7735 // R10.
7736 HandleCustomVecRegLoc();
7737 HandleCustomVecRegLoc();
7738
7739 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7740 ArgLocs[I].getValNo() == OriginalValNo) {
7741 assert(!IsPPC64 &&
7742 "Only 2 custom RegLocs expected for 64-bit codegen.");
7743 HandleCustomVecRegLoc();
7744 HandleCustomVecRegLoc();
7745 }
7746
7747 continue;
7748 }
7749
7750 if (VA.isMemLoc()) {
7751 SDValue PtrOff =
7752 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7753 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7754 MemOpChains.push_back(
7755 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7756
7757 continue;
7758 }
7759
7760 if (!ValVT.isFloatingPoint())
7762 "Unexpected register handling for calling convention.");
7763
7764 // Custom handling is used for GPR initializations for vararg float
7765 // arguments.
7766 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7767 LocVT.isInteger() &&
7768 "Custom register handling only expected for VarArg.");
7769
7770 SDValue ArgAsInt =
7771 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7772
7773 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7774 // f32 in 32-bit GPR
7775 // f64 in 64-bit GPR
7776 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7777 else if (Arg.getValueType().getFixedSizeInBits() <
7778 LocVT.getFixedSizeInBits())
7779 // f32 in 64-bit GPR.
7780 RegsToPass.push_back(std::make_pair(
7781 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7782 else {
7783 // f64 in two 32-bit GPRs
7784 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7785 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7786 "Unexpected custom register for argument!");
7787 CCValAssign &GPR1 = VA;
7788 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7789 DAG.getConstant(32, dl, MVT::i8));
7790 RegsToPass.push_back(std::make_pair(
7791 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7792
7793 if (I != E) {
7794 // If only 1 GPR was available, there will only be one custom GPR and
7795 // the argument will also pass in memory.
7796 CCValAssign &PeekArg = ArgLocs[I];
7797 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7798 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7799 CCValAssign &GPR2 = ArgLocs[I++];
7800 RegsToPass.push_back(std::make_pair(
7801 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7802 }
7803 }
7804 }
7805 }
7806
7807 if (!MemOpChains.empty())
7808 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7809
7810 // For indirect calls, we need to save the TOC base to the stack for
7811 // restoration after the call.
7812 if (CFlags.IsIndirect) {
7813 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7814 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7815 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7816 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7817 const unsigned TOCSaveOffset =
7818 Subtarget.getFrameLowering()->getTOCSaveOffset();
7819
7820 setUsesTOCBasePtr(DAG);
7821 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7822 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7823 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7824 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7825 Chain = DAG.getStore(
7826 Val.getValue(1), dl, Val, AddPtr,
7827 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7828 }
7829
7830 // Build a sequence of copy-to-reg nodes chained together with token chain
7831 // and flag operands which copy the outgoing args into the appropriate regs.
7832 SDValue InGlue;
7833 for (auto Reg : RegsToPass) {
7834 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7835 InGlue = Chain.getValue(1);
7836 }
7837
7838 const int SPDiff = 0;
7839 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7840 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7841}
7842
7843bool
7844PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7845 MachineFunction &MF, bool isVarArg,
7847 LLVMContext &Context) const {
7849 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7850 return CCInfo.CheckReturn(
7851 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7853 : RetCC_PPC);
7854}
7855
7856SDValue
7857PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7858 bool isVarArg,
7860 const SmallVectorImpl<SDValue> &OutVals,
7861 const SDLoc &dl, SelectionDAG &DAG) const {
7863 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7864 *DAG.getContext());
7865 CCInfo.AnalyzeReturn(Outs,
7866 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7868 : RetCC_PPC);
7869
7870 SDValue Glue;
7871 SmallVector<SDValue, 4> RetOps(1, Chain);
7872
7873 // Copy the result values into the output registers.
7874 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7875 CCValAssign &VA = RVLocs[i];
7876 assert(VA.isRegLoc() && "Can only return in registers!");
7877
7878 SDValue Arg = OutVals[RealResIdx];
7879
7880 switch (VA.getLocInfo()) {
7881 default: llvm_unreachable("Unknown loc info!");
7882 case CCValAssign::Full: break;
7883 case CCValAssign::AExt:
7884 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7885 break;
7886 case CCValAssign::ZExt:
7887 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7888 break;
7889 case CCValAssign::SExt:
7890 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7891 break;
7892 }
7893 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7894 bool isLittleEndian = Subtarget.isLittleEndian();
7895 // Legalize ret f64 -> ret 2 x i32.
7896 SDValue SVal =
7897 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7898 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7899 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7900 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7901 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7902 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7903 Glue = Chain.getValue(1);
7904 VA = RVLocs[++i]; // skip ahead to next loc
7905 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7906 } else
7907 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7908 Glue = Chain.getValue(1);
7909 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7910 }
7911
7912 RetOps[0] = Chain; // Update chain.
7913
7914 // Add the glue if we have it.
7915 if (Glue.getNode())
7916 RetOps.push_back(Glue);
7917
7918 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7919}
7920
7921SDValue
7922PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7923 SelectionDAG &DAG) const {
7924 SDLoc dl(Op);
7925
7926 // Get the correct type for integers.
7927 EVT IntVT = Op.getValueType();
7928
7929 // Get the inputs.
7930 SDValue Chain = Op.getOperand(0);
7931 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7932 // Build a DYNAREAOFFSET node.
7933 SDValue Ops[2] = {Chain, FPSIdx};
7934 SDVTList VTs = DAG.getVTList(IntVT);
7935 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7936}
7937
7938SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7939 SelectionDAG &DAG) const {
7940 // When we pop the dynamic allocation we need to restore the SP link.
7941 SDLoc dl(Op);
7942
7943 // Get the correct type for pointers.
7944 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7945
7946 // Construct the stack pointer operand.
7947 bool isPPC64 = Subtarget.isPPC64();
7948 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7949 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7950
7951 // Get the operands for the STACKRESTORE.
7952 SDValue Chain = Op.getOperand(0);
7953 SDValue SaveSP = Op.getOperand(1);
7954
7955 // Load the old link SP.
7956 SDValue LoadLinkSP =
7957 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7958
7959 // Restore the stack pointer.
7960 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7961
7962 // Store the old link SP.
7963 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7964}
7965
7966SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7968 bool isPPC64 = Subtarget.isPPC64();
7969 EVT PtrVT = getPointerTy(MF.getDataLayout());
7970
7971 // Get current frame pointer save index. The users of this index will be
7972 // primarily DYNALLOC instructions.
7974 int RASI = FI->getReturnAddrSaveIndex();
7975
7976 // If the frame pointer save index hasn't been defined yet.
7977 if (!RASI) {
7978 // Find out what the fix offset of the frame pointer save area.
7979 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7980 // Allocate the frame index for frame pointer save area.
7981 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7982 // Save the result.
7983 FI->setReturnAddrSaveIndex(RASI);
7984 }
7985 return DAG.getFrameIndex(RASI, PtrVT);
7986}
7987
7988SDValue
7989PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7991 bool isPPC64 = Subtarget.isPPC64();
7992 EVT PtrVT = getPointerTy(MF.getDataLayout());
7993
7994 // Get current frame pointer save index. The users of this index will be
7995 // primarily DYNALLOC instructions.
7997 int FPSI = FI->getFramePointerSaveIndex();
7998
7999 // If the frame pointer save index hasn't been defined yet.
8000 if (!FPSI) {
8001 // Find out what the fix offset of the frame pointer save area.
8002 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8003 // Allocate the frame index for frame pointer save area.
8004 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8005 // Save the result.
8006 FI->setFramePointerSaveIndex(FPSI);
8007 }
8008 return DAG.getFrameIndex(FPSI, PtrVT);
8009}
8010
8011SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8012 SelectionDAG &DAG) const {
8014 // Get the inputs.
8015 SDValue Chain = Op.getOperand(0);
8016 SDValue Size = Op.getOperand(1);
8017 SDLoc dl(Op);
8018
8019 // Get the correct type for pointers.
8020 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8021 // Negate the size.
8022 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8023 DAG.getConstant(0, dl, PtrVT), Size);
8024 // Construct a node for the frame pointer save index.
8025 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8026 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8027 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8028 if (hasInlineStackProbe(MF))
8029 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8030 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8031}
8032
8033SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8034 SelectionDAG &DAG) const {
8036
8037 bool isPPC64 = Subtarget.isPPC64();
8038 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8039
8040 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8041 return DAG.getFrameIndex(FI, PtrVT);
8042}
8043
8044SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8045 SelectionDAG &DAG) const {
8046 SDLoc DL(Op);
8047 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8048 DAG.getVTList(MVT::i32, MVT::Other),
8049 Op.getOperand(0), Op.getOperand(1));
8050}
8051
8052SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8053 SelectionDAG &DAG) const {
8054 SDLoc DL(Op);
8055 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8056 Op.getOperand(0), Op.getOperand(1));
8057}
8058
8059SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8060 if (Op.getValueType().isVector())
8061 return LowerVectorLoad(Op, DAG);
8062
8063 assert(Op.getValueType() == MVT::i1 &&
8064 "Custom lowering only for i1 loads");
8065
8066 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8067
8068 SDLoc dl(Op);
8069 LoadSDNode *LD = cast<LoadSDNode>(Op);
8070
8071 SDValue Chain = LD->getChain();
8072 SDValue BasePtr = LD->getBasePtr();
8073 MachineMemOperand *MMO = LD->getMemOperand();
8074
8075 SDValue NewLD =
8076 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8077 BasePtr, MVT::i8, MMO);
8078 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8079
8080 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8081 return DAG.getMergeValues(Ops, dl);
8082}
8083
8084SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8085 if (Op.getOperand(1).getValueType().isVector())
8086 return LowerVectorStore(Op, DAG);
8087
8088 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8089 "Custom lowering only for i1 stores");
8090
8091 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8092
8093 SDLoc dl(Op);
8094 StoreSDNode *ST = cast<StoreSDNode>(Op);
8095
8096 SDValue Chain = ST->getChain();
8097 SDValue BasePtr = ST->getBasePtr();
8098 SDValue Value = ST->getValue();
8099 MachineMemOperand *MMO = ST->getMemOperand();
8100
8102 Value);
8103 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8104}
8105
8106// FIXME: Remove this once the ANDI glue bug is fixed:
8107SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8108 assert(Op.getValueType() == MVT::i1 &&
8109 "Custom lowering only for i1 results");
8110
8111 SDLoc DL(Op);
8112 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8113}
8114
8115SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8116 SelectionDAG &DAG) const {
8117
8118 // Implements a vector truncate that fits in a vector register as a shuffle.
8119 // We want to legalize vector truncates down to where the source fits in
8120 // a vector register (and target is therefore smaller than vector register
8121 // size). At that point legalization will try to custom lower the sub-legal
8122 // result and get here - where we can contain the truncate as a single target
8123 // operation.
8124
8125 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8126 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8127 //
8128 // We will implement it for big-endian ordering as this (where x denotes
8129 // undefined):
8130 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8131 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8132 //
8133 // The same operation in little-endian ordering will be:
8134 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8135 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8136
8137 EVT TrgVT = Op.getValueType();
8138 assert(TrgVT.isVector() && "Vector type expected.");
8139 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8140 EVT EltVT = TrgVT.getVectorElementType();
8141 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8142 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8143 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8144 return SDValue();
8145
8146 SDValue N1 = Op.getOperand(0);
8147 EVT SrcVT = N1.getValueType();
8148 unsigned SrcSize = SrcVT.getSizeInBits();
8149 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8150 !llvm::has_single_bit<uint32_t>(
8152 return SDValue();
8153 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8154 return SDValue();
8155
8156 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8157 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8158
8159 SDLoc DL(Op);
8160 SDValue Op1, Op2;
8161 if (SrcSize == 256) {
8162 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8163 EVT SplitVT =
8165 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8166 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8167 DAG.getConstant(0, DL, VecIdxTy));
8168 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8169 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8170 }
8171 else {
8172 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8173 Op2 = DAG.getUNDEF(WideVT);
8174 }
8175
8176 // First list the elements we want to keep.
8177 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8178 SmallVector<int, 16> ShuffV;
8179 if (Subtarget.isLittleEndian())
8180 for (unsigned i = 0; i < TrgNumElts; ++i)
8181 ShuffV.push_back(i * SizeMult);
8182 else
8183 for (unsigned i = 1; i <= TrgNumElts; ++i)
8184 ShuffV.push_back(i * SizeMult - 1);
8185
8186 // Populate the remaining elements with undefs.
8187 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8188 // ShuffV.push_back(i + WideNumElts);
8189 ShuffV.push_back(WideNumElts + 1);
8190
8191 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8192 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8193 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8194}
8195
8196/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8197/// possible.
8198SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8199 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8200 EVT ResVT = Op.getValueType();
8201 EVT CmpVT = Op.getOperand(0).getValueType();
8202 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8203 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8204 SDLoc dl(Op);
8205
8206 // Without power9-vector, we don't have native instruction for f128 comparison.
8207 // Following transformation to libcall is needed for setcc:
8208 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8209 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8210 SDValue Z = DAG.getSetCC(
8211 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8212 LHS, RHS, CC);
8213 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8214 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8215 }
8216
8217 // Not FP, or using SPE? Not a fsel.
8218 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8219 Subtarget.hasSPE())
8220 return Op;
8221
8222 SDNodeFlags Flags = Op.getNode()->getFlags();
8223
8224 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8225 // presence of infinities.
8226 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8227 switch (CC) {
8228 default:
8229 break;
8230 case ISD::SETOGT:
8231 case ISD::SETGT:
8232 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8233 case ISD::SETOLT:
8234 case ISD::SETLT:
8235 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8236 }
8237 }
8238
8239 // We might be able to do better than this under some circumstances, but in
8240 // general, fsel-based lowering of select is a finite-math-only optimization.
8241 // For more information, see section F.3 of the 2.06 ISA specification.
8242 // With ISA 3.0
8243 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8244 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8245 ResVT == MVT::f128)
8246 return Op;
8247
8248 // If the RHS of the comparison is a 0.0, we don't need to do the
8249 // subtraction at all.
8250 SDValue Sel1;
8251 if (isFloatingPointZero(RHS))
8252 switch (CC) {
8253 default: break; // SETUO etc aren't handled by fsel.
8254 case ISD::SETNE:
8255 std::swap(TV, FV);
8256 [[fallthrough]];
8257 case ISD::SETEQ:
8258 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8259 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8260 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8261 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8262 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8263 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8264 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8265 case ISD::SETULT:
8266 case ISD::SETLT:
8267 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8268 [[fallthrough]];
8269 case ISD::SETOGE:
8270 case ISD::SETGE:
8271 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8272 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8273 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8274 case ISD::SETUGT:
8275 case ISD::SETGT:
8276 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8277 [[fallthrough]];
8278 case ISD::SETOLE:
8279 case ISD::SETLE:
8280 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8281 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8282 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8283 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8284 }
8285
8286 SDValue Cmp;
8287 switch (CC) {
8288 default: break; // SETUO etc aren't handled by fsel.
8289 case ISD::SETNE:
8290 std::swap(TV, FV);
8291 [[fallthrough]];
8292 case ISD::SETEQ:
8293 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8294 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8295 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8296 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8297 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8298 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8299 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8300 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8301 case ISD::SETULT:
8302 case ISD::SETLT:
8303 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8304 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8305 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8306 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8307 case ISD::SETOGE:
8308 case ISD::SETGE:
8309 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8310 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8311 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8312 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8313 case ISD::SETUGT:
8314 case ISD::SETGT:
8315 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8316 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8317 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8318 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8319 case ISD::SETOLE:
8320 case ISD::SETLE:
8321 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8322 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8323 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8324 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8325 }
8326 return Op;
8327}
8328
8329static unsigned getPPCStrictOpcode(unsigned Opc) {
8330 switch (Opc) {
8331 default:
8332 llvm_unreachable("No strict version of this opcode!");
8333 case PPCISD::FCTIDZ:
8334 return PPCISD::STRICT_FCTIDZ;
8335 case PPCISD::FCTIWZ:
8336 return PPCISD::STRICT_FCTIWZ;
8337 case PPCISD::FCTIDUZ:
8339 case PPCISD::FCTIWUZ:
8341 case PPCISD::FCFID:
8342 return PPCISD::STRICT_FCFID;
8343 case PPCISD::FCFIDU:
8344 return PPCISD::STRICT_FCFIDU;
8345 case PPCISD::FCFIDS:
8346 return PPCISD::STRICT_FCFIDS;
8347 case PPCISD::FCFIDUS:
8349 }
8350}
8351
8353 const PPCSubtarget &Subtarget) {
8354 SDLoc dl(Op);
8355 bool IsStrict = Op->isStrictFPOpcode();
8356 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8357 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8358
8359 // TODO: Any other flags to propagate?
8360 SDNodeFlags Flags;
8361 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8362
8363 // For strict nodes, source is the second operand.
8364 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8365 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8366 MVT DestTy = Op.getSimpleValueType();
8367 assert(Src.getValueType().isFloatingPoint() &&
8368 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8369 DestTy == MVT::i64) &&
8370 "Invalid FP_TO_INT types");
8371 if (Src.getValueType() == MVT::f32) {
8372 if (IsStrict) {
8373 Src =
8375 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8376 Chain = Src.getValue(1);
8377 } else
8378 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8379 }
8380 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8381 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8382 unsigned Opc = ISD::DELETED_NODE;
8383 switch (DestTy.SimpleTy) {
8384 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8385 case MVT::i32:
8386 Opc = IsSigned ? PPCISD::FCTIWZ
8387 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8388 break;
8389 case MVT::i64:
8390 assert((IsSigned || Subtarget.hasFPCVT()) &&
8391 "i64 FP_TO_UINT is supported only with FPCVT");
8392 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8393 }
8394 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8395 SDValue Conv;
8396 if (IsStrict) {
8397 Opc = getPPCStrictOpcode(Opc);
8398 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8399 Flags);
8400 } else {
8401 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8402 }
8403 return Conv;
8404}
8405
8406void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8407 SelectionDAG &DAG,
8408 const SDLoc &dl) const {
8409 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8410 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8411 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8412 bool IsStrict = Op->isStrictFPOpcode();
8413
8414 // Convert the FP value to an int value through memory.
8415 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8416 (IsSigned || Subtarget.hasFPCVT());
8417 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8418 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8419 MachinePointerInfo MPI =
8421
8422 // Emit a store to the stack slot.
8423 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8424 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8425 if (i32Stack) {
8427 Alignment = Align(4);
8428 MachineMemOperand *MMO =
8429 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8430 SDValue Ops[] = { Chain, Tmp, FIPtr };
8431 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8432 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8433 } else
8434 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8435
8436 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8437 // add in a bias on big endian.
8438 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8439 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8440 DAG.getConstant(4, dl, FIPtr.getValueType()));
8441 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8442 }
8443
8444 RLI.Chain = Chain;
8445 RLI.Ptr = FIPtr;
8446 RLI.MPI = MPI;
8447 RLI.Alignment = Alignment;
8448}
8449
8450/// Custom lowers floating point to integer conversions to use
8451/// the direct move instructions available in ISA 2.07 to avoid the
8452/// need for load/store combinations.
8453SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8454 SelectionDAG &DAG,
8455 const SDLoc &dl) const {
8456 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8457 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8458 if (Op->isStrictFPOpcode())
8459 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8460 else
8461 return Mov;
8462}
8463
8464SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8465 const SDLoc &dl) const {
8466 bool IsStrict = Op->isStrictFPOpcode();
8467 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8468 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8469 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8470 EVT SrcVT = Src.getValueType();
8471 EVT DstVT = Op.getValueType();
8472
8473 // FP to INT conversions are legal for f128.
8474 if (SrcVT == MVT::f128)
8475 return Subtarget.hasP9Vector() ? Op : SDValue();
8476
8477 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8478 // PPC (the libcall is not available).
8479 if (SrcVT == MVT::ppcf128) {
8480 if (DstVT == MVT::i32) {
8481 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8482 // set other fast-math flags to FP operations in both strict and
8483 // non-strict cases. (FP_TO_SINT, FSUB)
8485 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8486
8487 if (IsSigned) {
8488 SDValue Lo, Hi;
8489 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8490
8491 // Add the two halves of the long double in round-to-zero mode, and use
8492 // a smaller FP_TO_SINT.
8493 if (IsStrict) {
8495 DAG.getVTList(MVT::f64, MVT::Other),
8496 {Op.getOperand(0), Lo, Hi}, Flags);
8497 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8498 DAG.getVTList(MVT::i32, MVT::Other),
8499 {Res.getValue(1), Res}, Flags);
8500 } else {
8501 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8502 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8503 }
8504 } else {
8505 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8506 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8507 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8508 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8509 if (IsStrict) {
8510 // Sel = Src < 0x80000000
8511 // FltOfs = select Sel, 0.0, 0x80000000
8512 // IntOfs = select Sel, 0, 0x80000000
8513 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8514 SDValue Chain = Op.getOperand(0);
8515 EVT SetCCVT =
8516 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8517 EVT DstSetCCVT =
8518 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8519 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8520 Chain, true);
8521 Chain = Sel.getValue(1);
8522
8523 SDValue FltOfs = DAG.getSelect(
8524 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8525 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8526
8527 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8528 DAG.getVTList(SrcVT, MVT::Other),
8529 {Chain, Src, FltOfs}, Flags);
8530 Chain = Val.getValue(1);
8531 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8532 DAG.getVTList(DstVT, MVT::Other),
8533 {Chain, Val}, Flags);
8534 Chain = SInt.getValue(1);
8535 SDValue IntOfs = DAG.getSelect(
8536 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8537 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8538 return DAG.getMergeValues({Result, Chain}, dl);
8539 } else {
8540 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8541 // FIXME: generated code sucks.
8542 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8543 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8544 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8545 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8546 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8547 }
8548 }
8549 }
8550
8551 return SDValue();
8552 }
8553
8554 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8555 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8556
8557 ReuseLoadInfo RLI;
8558 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8559
8560 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8561 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8562}
8563
8564// We're trying to insert a regular store, S, and then a load, L. If the
8565// incoming value, O, is a load, we might just be able to have our load use the
8566// address used by O. However, we don't know if anything else will store to
8567// that address before we can load from it. To prevent this situation, we need
8568// to insert our load, L, into the chain as a peer of O. To do this, we give L
8569// the same chain operand as O, we create a token factor from the chain results
8570// of O and L, and we replace all uses of O's chain result with that token
8571// factor (see spliceIntoChain below for this last part).
8572bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8573 ReuseLoadInfo &RLI,
8574 SelectionDAG &DAG,
8575 ISD::LoadExtType ET) const {
8576 // Conservatively skip reusing for constrained FP nodes.
8577 if (Op->isStrictFPOpcode())
8578 return false;
8579
8580 SDLoc dl(Op);
8581 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8582 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8583 if (ET == ISD::NON_EXTLOAD &&
8584 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8585 isOperationLegalOrCustom(Op.getOpcode(),
8586 Op.getOperand(0).getValueType())) {
8587
8588 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8589 return true;
8590 }
8591
8592 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8593 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8594 LD->isNonTemporal())
8595 return false;
8596 if (LD->getMemoryVT() != MemVT)
8597 return false;
8598
8599 // If the result of the load is an illegal type, then we can't build a
8600 // valid chain for reuse since the legalised loads and token factor node that
8601 // ties the legalised loads together uses a different output chain then the
8602 // illegal load.
8603 if (!isTypeLegal(LD->getValueType(0)))
8604 return false;
8605
8606 RLI.Ptr = LD->getBasePtr();
8607 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8608 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8609 "Non-pre-inc AM on PPC?");
8610 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8611 LD->getOffset());
8612 }
8613
8614 RLI.Chain = LD->getChain();
8615 RLI.MPI = LD->getPointerInfo();
8616 RLI.IsDereferenceable = LD->isDereferenceable();
8617 RLI.IsInvariant = LD->isInvariant();
8618 RLI.Alignment = LD->getAlign();
8619 RLI.AAInfo = LD->getAAInfo();
8620 RLI.Ranges = LD->getRanges();
8621
8622 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8623 return true;
8624}
8625
8626// Given the head of the old chain, ResChain, insert a token factor containing
8627// it and NewResChain, and make users of ResChain now be users of that token
8628// factor.
8629// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8630void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8631 SDValue NewResChain,
8632 SelectionDAG &DAG) const {
8633 if (!ResChain)
8634 return;
8635
8636 SDLoc dl(NewResChain);
8637
8638 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8639 NewResChain, DAG.getUNDEF(MVT::Other));
8640 assert(TF.getNode() != NewResChain.getNode() &&
8641 "A new TF really is required here");
8642
8643 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8644 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8645}
8646
8647/// Analyze profitability of direct move
8648/// prefer float load to int load plus direct move
8649/// when there is no integer use of int load
8650bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8651 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8652 if (Origin->getOpcode() != ISD::LOAD)
8653 return true;
8654
8655 // If there is no LXSIBZX/LXSIHZX, like Power8,
8656 // prefer direct move if the memory size is 1 or 2 bytes.
8657 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8658 if (!Subtarget.hasP9Vector() &&
8659 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8660 return true;
8661
8662 for (SDNode::use_iterator UI = Origin->use_begin(),
8663 UE = Origin->use_end();
8664 UI != UE; ++UI) {
8665
8666 // Only look at the users of the loaded value.
8667 if (UI.getUse().get().getResNo() != 0)
8668 continue;
8669
8670 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8671 UI->getOpcode() != ISD::UINT_TO_FP &&
8672 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8673 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8674 return true;
8675 }
8676
8677 return false;
8678}
8679
8681 const PPCSubtarget &Subtarget,
8682 SDValue Chain = SDValue()) {
8683 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8684 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8685 SDLoc dl(Op);
8686
8687 // TODO: Any other flags to propagate?
8688 SDNodeFlags Flags;
8689 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8690
8691 // If we have FCFIDS, then use it when converting to single-precision.
8692 // Otherwise, convert to double-precision and then round.
8693 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8694 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8695 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8696 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8697 if (Op->isStrictFPOpcode()) {
8698 if (!Chain)
8699 Chain = Op.getOperand(0);
8700 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8701 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8702 } else
8703 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8704}
8705
8706/// Custom lowers integer to floating point conversions to use
8707/// the direct move instructions available in ISA 2.07 to avoid the
8708/// need for load/store combinations.
8709SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8710 SelectionDAG &DAG,
8711 const SDLoc &dl) const {
8712 assert((Op.getValueType() == MVT::f32 ||
8713 Op.getValueType() == MVT::f64) &&
8714 "Invalid floating point type as target of conversion");
8715 assert(Subtarget.hasFPCVT() &&
8716 "Int to FP conversions with direct moves require FPCVT");
8717 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8718 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8719 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8720 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8721 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8722 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8723 return convertIntToFP(Op, Mov, DAG, Subtarget);
8724}
8725
8726static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8727
8728 EVT VecVT = Vec.getValueType();
8729 assert(VecVT.isVector() && "Expected a vector type.");
8730 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8731
8732 EVT EltVT = VecVT.getVectorElementType();
8733 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8734 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8735
8736 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8737 SmallVector<SDValue, 16> Ops(NumConcat);
8738 Ops[0] = Vec;
8739 SDValue UndefVec = DAG.getUNDEF(VecVT);
8740 for (unsigned i = 1; i < NumConcat; ++i)
8741 Ops[i] = UndefVec;
8742
8743 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8744}
8745
8746SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8747 const SDLoc &dl) const {
8748 bool IsStrict = Op->isStrictFPOpcode();
8749 unsigned Opc = Op.getOpcode();
8750 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8751 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8753 "Unexpected conversion type");
8754 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8755 "Supports conversions to v2f64/v4f32 only.");
8756
8757 // TODO: Any other flags to propagate?
8759 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8760
8761 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8762 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8763
8764 SDValue Wide = widenVec(DAG, Src, dl);
8765 EVT WideVT = Wide.getValueType();
8766 unsigned WideNumElts = WideVT.getVectorNumElements();
8767 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8768
8769 SmallVector<int, 16> ShuffV;
8770 for (unsigned i = 0; i < WideNumElts; ++i)
8771 ShuffV.push_back(i + WideNumElts);
8772
8773 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8774 int SaveElts = FourEltRes ? 4 : 2;
8775 if (Subtarget.isLittleEndian())
8776 for (int i = 0; i < SaveElts; i++)
8777 ShuffV[i * Stride] = i;
8778 else
8779 for (int i = 1; i <= SaveElts; i++)
8780 ShuffV[i * Stride - 1] = i - 1;
8781
8782 SDValue ShuffleSrc2 =
8783 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8784 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8785
8786 SDValue Extend;
8787 if (SignedConv) {
8788 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8789 EVT ExtVT = Src.getValueType();
8790 if (Subtarget.hasP9Altivec())
8791 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8792 IntermediateVT.getVectorNumElements());
8793
8794 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8795 DAG.getValueType(ExtVT));
8796 } else
8797 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8798
8799 if (IsStrict)
8800 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8801 {Op.getOperand(0), Extend}, Flags);
8802
8803 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8804}
8805
8806SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8807 SelectionDAG &DAG) const {
8808 SDLoc dl(Op);
8809 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8810 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8811 bool IsStrict = Op->isStrictFPOpcode();
8812 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8813 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8814
8815 // TODO: Any other flags to propagate?
8817 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8818
8819 EVT InVT = Src.getValueType();
8820 EVT OutVT = Op.getValueType();
8821 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8822 isOperationCustom(Op.getOpcode(), InVT))
8823 return LowerINT_TO_FPVector(Op, DAG, dl);
8824
8825 // Conversions to f128 are legal.
8826 if (Op.getValueType() == MVT::f128)
8827 return Subtarget.hasP9Vector() ? Op : SDValue();
8828
8829 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8830 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8831 return SDValue();
8832
8833 if (Src.getValueType() == MVT::i1) {
8834 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8835 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8836 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8837 if (IsStrict)
8838 return DAG.getMergeValues({Sel, Chain}, dl);
8839 else
8840 return Sel;
8841 }
8842
8843 // If we have direct moves, we can do all the conversion, skip the store/load
8844 // however, without FPCVT we can't do most conversions.
8845 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8846 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8847 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8848
8849 assert((IsSigned || Subtarget.hasFPCVT()) &&
8850 "UINT_TO_FP is supported only with FPCVT");
8851
8852 if (Src.getValueType() == MVT::i64) {
8853 SDValue SINT = Src;
8854 // When converting to single-precision, we actually need to convert
8855 // to double-precision first and then round to single-precision.
8856 // To avoid double-rounding effects during that operation, we have
8857 // to prepare the input operand. Bits that might be truncated when
8858 // converting to double-precision are replaced by a bit that won't
8859 // be lost at this stage, but is below the single-precision rounding
8860 // position.
8861 //
8862 // However, if -enable-unsafe-fp-math is in effect, accept double
8863 // rounding to avoid the extra overhead.
8864 if (Op.getValueType() == MVT::f32 &&
8865 !Subtarget.hasFPCVT() &&
8867
8868 // Twiddle input to make sure the low 11 bits are zero. (If this
8869 // is the case, we are guaranteed the value will fit into the 53 bit
8870 // mantissa of an IEEE double-precision value without rounding.)
8871 // If any of those low 11 bits were not zero originally, make sure
8872 // bit 12 (value 2048) is set instead, so that the final rounding
8873 // to single-precision gets the correct result.
8874 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8875 SINT, DAG.getConstant(2047, dl, MVT::i64));
8876 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8877 Round, DAG.getConstant(2047, dl, MVT::i64));
8878 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8879 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8880 Round, DAG.getConstant(-2048, dl, MVT::i64));
8881
8882 // However, we cannot use that value unconditionally: if the magnitude
8883 // of the input value is small, the bit-twiddling we did above might
8884 // end up visibly changing the output. Fortunately, in that case, we
8885 // don't need to twiddle bits since the original input will convert
8886 // exactly to double-precision floating-point already. Therefore,
8887 // construct a conditional to use the original value if the top 11
8888 // bits are all sign-bit copies, and use the rounded value computed
8889 // above otherwise.
8890 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8891 SINT, DAG.getConstant(53, dl, MVT::i32));
8892 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8893 Cond, DAG.getConstant(1, dl, MVT::i64));
8894 Cond = DAG.getSetCC(
8895 dl,
8896 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8897 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8898
8899 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8900 }
8901
8902 ReuseLoadInfo RLI;
8903 SDValue Bits;
8904
8906 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8907 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8908 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8909 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8910 } else if (Subtarget.hasLFIWAX() &&
8911 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8912 MachineMemOperand *MMO =
8914 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8915 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8917 DAG.getVTList(MVT::f64, MVT::Other),
8918 Ops, MVT::i32, MMO);
8919 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8920 } else if (Subtarget.hasFPCVT() &&
8921 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8922 MachineMemOperand *MMO =
8924 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8925 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8927 DAG.getVTList(MVT::f64, MVT::Other),
8928 Ops, MVT::i32, MMO);
8929 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8930 } else if (((Subtarget.hasLFIWAX() &&
8931 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8932 (Subtarget.hasFPCVT() &&
8933 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8934 SINT.getOperand(0).getValueType() == MVT::i32) {
8935 MachineFrameInfo &MFI = MF.getFrameInfo();
8936 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8937
8938 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8939 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8940
8941 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8943 DAG.getMachineFunction(), FrameIdx));
8944 Chain = Store;
8945
8946 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8947 "Expected an i32 store");
8948
8949 RLI.Ptr = FIdx;
8950 RLI.Chain = Chain;
8951 RLI.MPI =
8953 RLI.Alignment = Align(4);
8954
8955 MachineMemOperand *MMO =
8957 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8958 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8961 dl, DAG.getVTList(MVT::f64, MVT::Other),
8962 Ops, MVT::i32, MMO);
8963 Chain = Bits.getValue(1);
8964 } else
8965 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8966
8967 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8968 if (IsStrict)
8969 Chain = FP.getValue(1);
8970
8971 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8972 if (IsStrict)
8974 DAG.getVTList(MVT::f32, MVT::Other),
8975 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8976 else
8977 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8978 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8979 }
8980 return FP;
8981 }
8982
8983 assert(Src.getValueType() == MVT::i32 &&
8984 "Unhandled INT_TO_FP type in custom expander!");
8985 // Since we only generate this in 64-bit mode, we can take advantage of
8986 // 64-bit registers. In particular, sign extend the input value into the
8987 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8988 // then lfd it and fcfid it.
8990 MachineFrameInfo &MFI = MF.getFrameInfo();
8991 EVT PtrVT = getPointerTy(MF.getDataLayout());
8992
8993 SDValue Ld;
8994 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8995 ReuseLoadInfo RLI;
8996 bool ReusingLoad;
8997 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8998 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8999 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9000
9001 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9003 DAG.getMachineFunction(), FrameIdx));
9004 Chain = Store;
9005
9006 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9007 "Expected an i32 store");
9008
9009 RLI.Ptr = FIdx;
9010 RLI.Chain = Chain;
9011 RLI.MPI =
9013 RLI.Alignment = Align(4);
9014 }
9015
9016 MachineMemOperand *MMO =
9018 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9019 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9020 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9021 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9022 MVT::i32, MMO);
9023 Chain = Ld.getValue(1);
9024 if (ReusingLoad)
9025 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
9026 } else {
9027 assert(Subtarget.isPPC64() &&
9028 "i32->FP without LFIWAX supported only on PPC64");
9029
9030 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9031 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9032
9033 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9034
9035 // STD the extended value into the stack slot.
9036 SDValue Store = DAG.getStore(
9037 Chain, dl, Ext64, FIdx,
9039 Chain = Store;
9040
9041 // Load the value as a double.
9042 Ld = DAG.getLoad(
9043 MVT::f64, dl, Chain, FIdx,
9045 Chain = Ld.getValue(1);
9046 }
9047
9048 // FCFID it and return it.
9049 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9050 if (IsStrict)
9051 Chain = FP.getValue(1);
9052 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9053 if (IsStrict)
9055 DAG.getVTList(MVT::f32, MVT::Other),
9056 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
9057 else
9058 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9059 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9060 }
9061 return FP;
9062}
9063
9064SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9065 SelectionDAG &DAG) const {
9066 SDLoc dl(Op);
9067 /*
9068 The rounding mode is in bits 30:31 of FPSR, and has the following
9069 settings:
9070 00 Round to nearest
9071 01 Round to 0
9072 10 Round to +inf
9073 11 Round to -inf
9074
9075 GET_ROUNDING, on the other hand, expects the following:
9076 -1 Undefined
9077 0 Round to 0
9078 1 Round to nearest
9079 2 Round to +inf
9080 3 Round to -inf
9081
9082 To perform the conversion, we do:
9083 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9084 */
9085
9087 EVT VT = Op.getValueType();
9088 EVT PtrVT = getPointerTy(MF.getDataLayout());
9089
9090 // Save FP Control Word to register
9091 SDValue Chain = Op.getOperand(0);
9092 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9093 Chain = MFFS.getValue(1);
9094
9095 SDValue CWD;
9096 if (isTypeLegal(MVT::i64)) {
9097 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9098 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9099 } else {
9100 // Save FP register to stack slot
9101 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9102 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9103 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9104
9105 // Load FP Control Word from low 32 bits of stack slot.
9107 "Stack slot adjustment is valid only on big endian subtargets!");
9108 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9109 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9110 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9111 Chain = CWD.getValue(1);
9112 }
9113
9114 // Transform as necessary
9115 SDValue CWD1 =
9116 DAG.getNode(ISD::AND, dl, MVT::i32,
9117 CWD, DAG.getConstant(3, dl, MVT::i32));
9118 SDValue CWD2 =
9119 DAG.getNode(ISD::SRL, dl, MVT::i32,
9120 DAG.getNode(ISD::AND, dl, MVT::i32,
9121 DAG.getNode(ISD::XOR, dl, MVT::i32,
9122 CWD, DAG.getConstant(3, dl, MVT::i32)),
9123 DAG.getConstant(3, dl, MVT::i32)),
9124 DAG.getConstant(1, dl, MVT::i32));
9125
9126 SDValue RetVal =
9127 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9128
9129 RetVal =
9131 dl, VT, RetVal);
9132
9133 return DAG.getMergeValues({RetVal, Chain}, dl);
9134}
9135
9136SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9137 EVT VT = Op.getValueType();
9138 unsigned BitWidth = VT.getSizeInBits();
9139 SDLoc dl(Op);
9140 assert(Op.getNumOperands() == 3 &&
9141 VT == Op.getOperand(1).getValueType() &&
9142 "Unexpected SHL!");
9143
9144 // Expand into a bunch of logical ops. Note that these ops
9145 // depend on the PPC behavior for oversized shift amounts.
9146 SDValue Lo = Op.getOperand(0);
9147 SDValue Hi = Op.getOperand(1);
9148 SDValue Amt = Op.getOperand(2);
9149 EVT AmtVT = Amt.getValueType();
9150
9151 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9152 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9153 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9154 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9155 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9156 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9157 DAG.getConstant(-BitWidth, dl, AmtVT));
9158 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9159 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9160 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9161 SDValue OutOps[] = { OutLo, OutHi };
9162 return DAG.getMergeValues(OutOps, dl);
9163}
9164
9165SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9166 EVT VT = Op.getValueType();
9167 SDLoc dl(Op);
9168 unsigned BitWidth = VT.getSizeInBits();
9169 assert(Op.getNumOperands() == 3 &&
9170 VT == Op.getOperand(1).getValueType() &&
9171 "Unexpected SRL!");
9172
9173 // Expand into a bunch of logical ops. Note that these ops
9174 // depend on the PPC behavior for oversized shift amounts.
9175 SDValue Lo = Op.getOperand(0);
9176 SDValue Hi = Op.getOperand(1);
9177 SDValue Amt = Op.getOperand(2);
9178 EVT AmtVT = Amt.getValueType();
9179
9180 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9181 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9182 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9183 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9184 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9185 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9186 DAG.getConstant(-BitWidth, dl, AmtVT));
9187 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9188 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9189 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9190 SDValue OutOps[] = { OutLo, OutHi };
9191 return DAG.getMergeValues(OutOps, dl);
9192}
9193
9194SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9195 SDLoc dl(Op);
9196 EVT VT = Op.getValueType();
9197 unsigned BitWidth = VT.getSizeInBits();
9198 assert(Op.getNumOperands() == 3 &&
9199 VT == Op.getOperand(1).getValueType() &&
9200 "Unexpected SRA!");
9201
9202 // Expand into a bunch of logical ops, followed by a select_cc.
9203 SDValue Lo = Op.getOperand(0);
9204 SDValue Hi = Op.getOperand(1);
9205 SDValue Amt = Op.getOperand(2);
9206 EVT AmtVT = Amt.getValueType();
9207
9208 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9209 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9210 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9211 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9212 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9213 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9214 DAG.getConstant(-BitWidth, dl, AmtVT));
9215 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9216 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9217 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9218 Tmp4, Tmp6, ISD::SETLE);
9219 SDValue OutOps[] = { OutLo, OutHi };
9220 return DAG.getMergeValues(OutOps, dl);
9221}
9222
9223SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9224 SelectionDAG &DAG) const {
9225 SDLoc dl(Op);
9226 EVT VT = Op.getValueType();
9227 unsigned BitWidth = VT.getSizeInBits();
9228
9229 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9230 SDValue X = Op.getOperand(0);
9231 SDValue Y = Op.getOperand(1);
9232 SDValue Z = Op.getOperand(2);
9233 EVT AmtVT = Z.getValueType();
9234
9235 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9236 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9237 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9238 // on PowerPC shift by BW being well defined.
9239 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9240 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9241 SDValue SubZ =
9242 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9243 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9244 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9245 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9246}
9247
9248//===----------------------------------------------------------------------===//
9249// Vector related lowering.
9250//
9251
9252/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9253/// element size of SplatSize. Cast the result to VT.
9254static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9255 SelectionDAG &DAG, const SDLoc &dl) {
9256 static const MVT VTys[] = { // canonical VT to use for each size.
9257 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9258 };
9259
9260 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9261
9262 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9263 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9264 SplatSize = 1;
9265 Val = 0xFF;
9266 }
9267
9268 EVT CanonicalVT = VTys[SplatSize-1];
9269
9270 // Build a canonical splat for this value.
9271 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9272}
9273
9274/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9275/// specified intrinsic ID.
9277 const SDLoc &dl, EVT DestVT = MVT::Other) {
9278 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9279 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9280 DAG.getConstant(IID, dl, MVT::i32), Op);
9281}
9282
9283/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9284/// specified intrinsic ID.
9285static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9286 SelectionDAG &DAG, const SDLoc &dl,
9287 EVT DestVT = MVT::Other) {
9288 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9289 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9290 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9291}
9292
9293/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9294/// specified intrinsic ID.
9295static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9296 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9297 EVT DestVT = MVT::Other) {
9298 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9299 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9300 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9301}
9302
9303/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9304/// amount. The result has the specified value type.
9305static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9306 SelectionDAG &DAG, const SDLoc &dl) {
9307 // Force LHS/RHS to be the right type.
9308 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9309 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9310
9311 int Ops[16];
9312 for (unsigned i = 0; i != 16; ++i)
9313 Ops[i] = i + Amt;
9314 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9315 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9316}
9317
9318/// Do we have an efficient pattern in a .td file for this node?
9319///
9320/// \param V - pointer to the BuildVectorSDNode being matched
9321/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9322///
9323/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9324/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9325/// the opposite is true (expansion is beneficial) are:
9326/// - The node builds a vector out of integers that are not 32 or 64-bits
9327/// - The node builds a vector out of constants
9328/// - The node is a "load-and-splat"
9329/// In all other cases, we will choose to keep the BUILD_VECTOR.
9331 bool HasDirectMove,
9332 bool HasP8Vector) {
9333 EVT VecVT = V->getValueType(0);
9334 bool RightType = VecVT == MVT::v2f64 ||
9335 (HasP8Vector && VecVT == MVT::v4f32) ||
9336 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9337 if (!RightType)
9338 return false;
9339
9340 bool IsSplat = true;
9341 bool IsLoad = false;
9342 SDValue Op0 = V->getOperand(0);
9343
9344 // This function is called in a block that confirms the node is not a constant
9345 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9346 // different constants.
9347 if (V->isConstant())
9348 return false;
9349 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9350 if (V->getOperand(i).isUndef())
9351 return false;
9352 // We want to expand nodes that represent load-and-splat even if the
9353 // loaded value is a floating point truncation or conversion to int.
9354 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9355 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9356 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9357 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9358 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9359 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9360 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9361 IsLoad = true;
9362 // If the operands are different or the input is not a load and has more
9363 // uses than just this BV node, then it isn't a splat.
9364 if (V->getOperand(i) != Op0 ||
9365 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9366 IsSplat = false;
9367 }
9368 return !(IsSplat && IsLoad);
9369}
9370
9371// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9372SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9373
9374 SDLoc dl(Op);
9375 SDValue Op0 = Op->getOperand(0);
9376
9377 SDValue Lo = Op0.getOperand(0);
9378 SDValue Hi = Op0.getOperand(1);
9379
9380 if ((Op.getValueType() != MVT::f128) ||
9381 (Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) ||
9382 (Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64())
9383 return SDValue();
9384
9385 if (!Subtarget.isLittleEndian())
9386 std::swap(Lo, Hi);
9387
9388 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9389}
9390
9391static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9392 const SDValue *InputLoad = &Op;
9393 while (InputLoad->getOpcode() == ISD::BITCAST)
9394 InputLoad = &InputLoad->getOperand(0);
9395 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9397 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9398 InputLoad = &InputLoad->getOperand(0);
9399 }
9400 if (InputLoad->getOpcode() != ISD::LOAD)
9401 return nullptr;
9402 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9403 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9404}
9405
9406// Convert the argument APFloat to a single precision APFloat if there is no
9407// loss in information during the conversion to single precision APFloat and the
9408// resulting number is not a denormal number. Return true if successful.
9410 APFloat APFloatToConvert = ArgAPFloat;
9411 bool LosesInfo = true;
9413 &LosesInfo);
9414 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9415 if (Success)
9416 ArgAPFloat = APFloatToConvert;
9417 return Success;
9418}
9419
9420// Bitcast the argument APInt to a double and convert it to a single precision
9421// APFloat, bitcast the APFloat to an APInt and assign it to the original
9422// argument if there is no loss in information during the conversion from
9423// double to single precision APFloat and the resulting number is not a denormal
9424// number. Return true if successful.
9426 double DpValue = ArgAPInt.bitsToDouble();
9427 APFloat APFloatDp(DpValue);
9428 bool Success = convertToNonDenormSingle(APFloatDp);
9429 if (Success)
9430 ArgAPInt = APFloatDp.bitcastToAPInt();
9431 return Success;
9432}
9433
9434// Nondestructive check for convertTonNonDenormSingle.
9436 // Only convert if it loses info, since XXSPLTIDP should
9437 // handle the other case.
9438 APFloat APFloatToConvert = ArgAPFloat;
9439 bool LosesInfo = true;
9441 &LosesInfo);
9442
9443 return (!LosesInfo && !APFloatToConvert.isDenormal());
9444}
9445
9446static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9447 unsigned &Opcode) {
9448 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9449 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9450 return false;
9451
9452 EVT Ty = Op->getValueType(0);
9453 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9454 // as we cannot handle extending loads for these types.
9455 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9456 ISD::isNON_EXTLoad(InputNode))
9457 return true;
9458
9459 EVT MemVT = InputNode->getMemoryVT();
9460 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9461 // memory VT is the same vector element VT type.
9462 // The loads feeding into the v8i16 and v16i8 types will be extending because
9463 // scalar i8/i16 are not legal types.
9464 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9465 (MemVT == Ty.getVectorElementType()))
9466 return true;
9467
9468 if (Ty == MVT::v2i64) {
9469 // Check the extend type, when the input type is i32, and the output vector
9470 // type is v2i64.
9471 if (MemVT == MVT::i32) {
9472 if (ISD::isZEXTLoad(InputNode))
9473 Opcode = PPCISD::ZEXT_LD_SPLAT;
9474 if (ISD::isSEXTLoad(InputNode))
9475 Opcode = PPCISD::SEXT_LD_SPLAT;
9476 }
9477 return true;
9478 }
9479 return false;
9480}
9481
9482// If this is a case we can't handle, return null and let the default
9483// expansion code take care of it. If we CAN select this case, and if it
9484// selects to a single instruction, return Op. Otherwise, if we can codegen
9485// this case more efficiently than a constant pool load, lower it to the
9486// sequence of ops that should be used.
9487SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9488 SelectionDAG &DAG) const {
9489 SDLoc dl(Op);
9490 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9491 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9492
9493 // Check if this is a splat of a constant value.
9494 APInt APSplatBits, APSplatUndef;
9495 unsigned SplatBitSize;
9496 bool HasAnyUndefs;
9497 bool BVNIsConstantSplat =
9498 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9499 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9500
9501 // If it is a splat of a double, check if we can shrink it to a 32 bit
9502 // non-denormal float which when converted back to double gives us the same
9503 // double. This is to exploit the XXSPLTIDP instruction.
9504 // If we lose precision, we use XXSPLTI32DX.
9505 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9506 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9507 // Check the type first to short-circuit so we don't modify APSplatBits if
9508 // this block isn't executed.
9509 if ((Op->getValueType(0) == MVT::v2f64) &&
9510 convertToNonDenormSingle(APSplatBits)) {
9511 SDValue SplatNode = DAG.getNode(
9512 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9513 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9514 return DAG.getBitcast(Op.getValueType(), SplatNode);
9515 } else {
9516 // We may lose precision, so we have to use XXSPLTI32DX.
9517
9518 uint32_t Hi =
9519 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9520 uint32_t Lo =
9521 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9522 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9523
9524 if (!Hi || !Lo)
9525 // If either load is 0, then we should generate XXLXOR to set to 0.
9526 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9527
9528 if (Hi)
9529 SplatNode = DAG.getNode(
9530 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9531 DAG.getTargetConstant(0, dl, MVT::i32),
9532 DAG.getTargetConstant(Hi, dl, MVT::i32));
9533
9534 if (Lo)
9535 SplatNode =
9536 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9537 DAG.getTargetConstant(1, dl, MVT::i32),
9538 DAG.getTargetConstant(Lo, dl, MVT::i32));
9539
9540 return DAG.getBitcast(Op.getValueType(), SplatNode);
9541 }
9542 }
9543
9544 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9545 unsigned NewOpcode = PPCISD::LD_SPLAT;
9546
9547 // Handle load-and-splat patterns as we have instructions that will do this
9548 // in one go.
9549 if (DAG.isSplatValue(Op, true) &&
9550 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9551 const SDValue *InputLoad = &Op.getOperand(0);
9552 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9553
9554 // If the input load is an extending load, it will be an i32 -> i64
9555 // extending load and isValidSplatLoad() will update NewOpcode.
9556 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9557 unsigned ElementSize =
9558 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9559
9560 assert(((ElementSize == 2 * MemorySize)
9561 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9562 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9563 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9564 "Unmatched element size and opcode!\n");
9565
9566 // Checking for a single use of this load, we have to check for vector
9567 // width (128 bits) / ElementSize uses (since each operand of the
9568 // BUILD_VECTOR is a separate use of the value.
9569 unsigned NumUsesOfInputLD = 128 / ElementSize;
9570 for (SDValue BVInOp : Op->ops())
9571 if (BVInOp.isUndef())
9572 NumUsesOfInputLD--;
9573
9574 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9575 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9576 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9577 // 15", but function IsValidSplatLoad() now will only return true when
9578 // the data at index 0 is not nullptr. So we will not get into trouble for
9579 // these cases.
9580 //
9581 // case 1 - lfiwzx/lfiwax
9582 // 1.1: load result is i32 and is sign/zero extend to i64;
9583 // 1.2: build a v2i64 vector type with above loaded value;
9584 // 1.3: the vector has only one value at index 0, others are all undef;
9585 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9586 if (NumUsesOfInputLD == 1 &&
9587 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9588 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9589 Subtarget.hasLFIWAX()))
9590 return SDValue();
9591
9592 // case 2 - lxvr[hb]x
9593 // 2.1: load result is at most i16;
9594 // 2.2: build a vector with above loaded value;
9595 // 2.3: the vector has only one value at index 0, others are all undef;
9596 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9597 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9598 Subtarget.isISA3_1() && ElementSize <= 16)
9599 return SDValue();
9600
9601 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9602 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9603 Subtarget.hasVSX()) {
9604 SDValue Ops[] = {
9605 LD->getChain(), // Chain
9606 LD->getBasePtr(), // Ptr
9607 DAG.getValueType(Op.getValueType()) // VT
9608 };
9609 SDValue LdSplt = DAG.getMemIntrinsicNode(
9610 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9611 LD->getMemoryVT(), LD->getMemOperand());
9612 // Replace all uses of the output chain of the original load with the
9613 // output chain of the new load.
9614 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9615 LdSplt.getValue(1));
9616 return LdSplt;
9617 }
9618 }
9619
9620 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9621 // 32-bits can be lowered to VSX instructions under certain conditions.
9622 // Without VSX, there is no pattern more efficient than expanding the node.
9623 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9624 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9625 Subtarget.hasP8Vector()))
9626 return Op;
9627 return SDValue();
9628 }
9629
9630 uint64_t SplatBits = APSplatBits.getZExtValue();
9631 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9632 unsigned SplatSize = SplatBitSize / 8;
9633
9634 // First, handle single instruction cases.
9635
9636 // All zeros?
9637 if (SplatBits == 0) {
9638 // Canonicalize all zero vectors to be v4i32.
9639 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9640 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9641 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9642 }
9643 return Op;
9644 }
9645
9646 // We have XXSPLTIW for constant splats four bytes wide.
9647 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9648 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9649 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9650 // turned into a 4-byte splat of 0xABABABAB.
9651 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9652 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9653 Op.getValueType(), DAG, dl);
9654
9655 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9656 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9657 dl);
9658
9659 // We have XXSPLTIB for constant splats one byte wide.
9660 if (Subtarget.hasP9Vector() && SplatSize == 1)
9661 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9662 dl);
9663
9664 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9665 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9666 (32-SplatBitSize));
9667 if (SextVal >= -16 && SextVal <= 15)
9668 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9669 dl);
9670
9671 // Two instruction sequences.
9672
9673 // If this value is in the range [-32,30] and is even, use:
9674 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9675 // If this value is in the range [17,31] and is odd, use:
9676 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9677 // If this value is in the range [-31,-17] and is odd, use:
9678 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9679 // Note the last two are three-instruction sequences.
9680 if (SextVal >= -32 && SextVal <= 31) {
9681 // To avoid having these optimizations undone by constant folding,
9682 // we convert to a pseudo that will be expanded later into one of
9683 // the above forms.
9684 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9685 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9686 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9687 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9688 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9689 if (VT == Op.getValueType())
9690 return RetVal;
9691 else
9692 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9693 }
9694
9695 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9696 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9697 // for fneg/fabs.
9698 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9699 // Make -1 and vspltisw -1:
9700 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9701
9702 // Make the VSLW intrinsic, computing 0x8000_0000.
9703 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9704 OnesV, DAG, dl);
9705
9706 // xor by OnesV to invert it.
9707 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9708 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9709 }
9710
9711 // Check to see if this is a wide variety of vsplti*, binop self cases.
9712 static const signed char SplatCsts[] = {
9713 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9714 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9715 };
9716
9717 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9718 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9719 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9720 int i = SplatCsts[idx];
9721
9722 // Figure out what shift amount will be used by altivec if shifted by i in
9723 // this splat size.
9724 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9725
9726 // vsplti + shl self.
9727 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9728 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9729 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9730 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9731 Intrinsic::ppc_altivec_vslw
9732 };
9733 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9734 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9735 }
9736
9737 // vsplti + srl self.
9738 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9739 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9740 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9741 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9742 Intrinsic::ppc_altivec_vsrw
9743 };
9744 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9745 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9746 }
9747
9748 // vsplti + rol self.
9749 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9750 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9751 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9752 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9753 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9754 Intrinsic::ppc_altivec_vrlw
9755 };
9756 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9757 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9758 }
9759
9760 // t = vsplti c, result = vsldoi t, t, 1
9761 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9762 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9763 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9764 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9765 }
9766 // t = vsplti c, result = vsldoi t, t, 2
9767 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9768 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9769 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9770 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9771 }
9772 // t = vsplti c, result = vsldoi t, t, 3
9773 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9774 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9775 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9776 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9777 }
9778 }
9779
9780 return SDValue();
9781}
9782
9783/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9784/// the specified operations to build the shuffle.
9785static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9786 SDValue RHS, SelectionDAG &DAG,
9787 const SDLoc &dl) {
9788 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9789 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9790 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9791
9792 enum {
9793 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9794 OP_VMRGHW,
9795 OP_VMRGLW,
9796 OP_VSPLTISW0,
9797 OP_VSPLTISW1,
9798 OP_VSPLTISW2,
9799 OP_VSPLTISW3,
9800 OP_VSLDOI4,
9801 OP_VSLDOI8,
9802 OP_VSLDOI12
9803 };
9804
9805 if (OpNum == OP_COPY) {
9806 if (LHSID == (1*9+2)*9+3) return LHS;
9807 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9808 return RHS;
9809 }
9810
9811 SDValue OpLHS, OpRHS;
9812 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9813 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9814
9815 int ShufIdxs[16];
9816 switch (OpNum) {
9817 default: llvm_unreachable("Unknown i32 permute!");
9818 case OP_VMRGHW:
9819 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9820 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9821 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9822 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9823 break;
9824 case OP_VMRGLW:
9825 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9826 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9827 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9828 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9829 break;
9830 case OP_VSPLTISW0:
9831 for (unsigned i = 0; i != 16; ++i)
9832 ShufIdxs[i] = (i&3)+0;
9833 break;
9834 case OP_VSPLTISW1:
9835 for (unsigned i = 0; i != 16; ++i)
9836 ShufIdxs[i] = (i&3)+4;
9837 break;
9838 case OP_VSPLTISW2:
9839 for (unsigned i = 0; i != 16; ++i)
9840 ShufIdxs[i] = (i&3)+8;
9841 break;
9842 case OP_VSPLTISW3:
9843 for (unsigned i = 0; i != 16; ++i)
9844 ShufIdxs[i] = (i&3)+12;
9845 break;
9846 case OP_VSLDOI4:
9847 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9848 case OP_VSLDOI8:
9849 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9850 case OP_VSLDOI12:
9851 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9852 }
9853 EVT VT = OpLHS.getValueType();
9854 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9855 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9856 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9857 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9858}
9859
9860/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9861/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9862/// SDValue.
9863SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9864 SelectionDAG &DAG) const {
9865 const unsigned BytesInVector = 16;
9866 bool IsLE = Subtarget.isLittleEndian();
9867 SDLoc dl(N);
9868 SDValue V1 = N->getOperand(0);
9869 SDValue V2 = N->getOperand(1);
9870 unsigned ShiftElts = 0, InsertAtByte = 0;
9871 bool Swap = false;
9872
9873 // Shifts required to get the byte we want at element 7.
9874 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9875 0, 15, 14, 13, 12, 11, 10, 9};
9876 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9877 1, 2, 3, 4, 5, 6, 7, 8};
9878
9879 ArrayRef<int> Mask = N->getMask();
9880 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9881
9882 // For each mask element, find out if we're just inserting something
9883 // from V2 into V1 or vice versa.
9884 // Possible permutations inserting an element from V2 into V1:
9885 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9886 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9887 // ...
9888 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9889 // Inserting from V1 into V2 will be similar, except mask range will be
9890 // [16,31].
9891
9892 bool FoundCandidate = false;
9893 // If both vector operands for the shuffle are the same vector, the mask
9894 // will contain only elements from the first one and the second one will be
9895 // undef.
9896 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9897 // Go through the mask of half-words to find an element that's being moved
9898 // from one vector to the other.
9899 for (unsigned i = 0; i < BytesInVector; ++i) {
9900 unsigned CurrentElement = Mask[i];
9901 // If 2nd operand is undefined, we should only look for element 7 in the
9902 // Mask.
9903 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9904 continue;
9905
9906 bool OtherElementsInOrder = true;
9907 // Examine the other elements in the Mask to see if they're in original
9908 // order.
9909 for (unsigned j = 0; j < BytesInVector; ++j) {
9910 if (j == i)
9911 continue;
9912 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9913 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9914 // in which we always assume we're always picking from the 1st operand.
9915 int MaskOffset =
9916 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9917 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9918 OtherElementsInOrder = false;
9919 break;
9920 }
9921 }
9922 // If other elements are in original order, we record the number of shifts
9923 // we need to get the element we want into element 7. Also record which byte
9924 // in the vector we should insert into.
9925 if (OtherElementsInOrder) {
9926 // If 2nd operand is undefined, we assume no shifts and no swapping.
9927 if (V2.isUndef()) {
9928 ShiftElts = 0;
9929 Swap = false;
9930 } else {
9931 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9932 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9933 : BigEndianShifts[CurrentElement & 0xF];
9934 Swap = CurrentElement < BytesInVector;
9935 }
9936 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9937 FoundCandidate = true;
9938 break;
9939 }
9940 }
9941
9942 if (!FoundCandidate)
9943 return SDValue();
9944
9945 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9946 // optionally with VECSHL if shift is required.
9947 if (Swap)
9948 std::swap(V1, V2);
9949 if (V2.isUndef())
9950 V2 = V1;
9951 if (ShiftElts) {
9952 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9953 DAG.getConstant(ShiftElts, dl, MVT::i32));
9954 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9955 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9956 }
9957 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9958 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9959}
9960
9961/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9962/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9963/// SDValue.
9964SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9965 SelectionDAG &DAG) const {
9966 const unsigned NumHalfWords = 8;
9967 const unsigned BytesInVector = NumHalfWords * 2;
9968 // Check that the shuffle is on half-words.
9969 if (!isNByteElemShuffleMask(N, 2, 1))
9970 return SDValue();
9971
9972 bool IsLE = Subtarget.isLittleEndian();
9973 SDLoc dl(N);
9974 SDValue V1 = N->getOperand(0);
9975 SDValue V2 = N->getOperand(1);
9976 unsigned ShiftElts = 0, InsertAtByte = 0;
9977 bool Swap = false;
9978
9979 // Shifts required to get the half-word we want at element 3.
9980 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9981 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9982
9983 uint32_t Mask = 0;
9984 uint32_t OriginalOrderLow = 0x1234567;
9985 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9986 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9987 // 32-bit space, only need 4-bit nibbles per element.
9988 for (unsigned i = 0; i < NumHalfWords; ++i) {
9989 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9990 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9991 }
9992
9993 // For each mask element, find out if we're just inserting something
9994 // from V2 into V1 or vice versa. Possible permutations inserting an element
9995 // from V2 into V1:
9996 // X, 1, 2, 3, 4, 5, 6, 7
9997 // 0, X, 2, 3, 4, 5, 6, 7
9998 // 0, 1, X, 3, 4, 5, 6, 7
9999 // 0, 1, 2, X, 4, 5, 6, 7
10000 // 0, 1, 2, 3, X, 5, 6, 7
10001 // 0, 1, 2, 3, 4, X, 6, 7
10002 // 0, 1, 2, 3, 4, 5, X, 7
10003 // 0, 1, 2, 3, 4, 5, 6, X
10004 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10005
10006 bool FoundCandidate = false;
10007 // Go through the mask of half-words to find an element that's being moved
10008 // from one vector to the other.
10009 for (unsigned i = 0; i < NumHalfWords; ++i) {
10010 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10011 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10012 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10013 uint32_t TargetOrder = 0x0;
10014
10015 // If both vector operands for the shuffle are the same vector, the mask
10016 // will contain only elements from the first one and the second one will be
10017 // undef.
10018 if (V2.isUndef()) {
10019 ShiftElts = 0;
10020 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10021 TargetOrder = OriginalOrderLow;
10022 Swap = false;
10023 // Skip if not the correct element or mask of other elements don't equal
10024 // to our expected order.
10025 if (MaskOneElt == VINSERTHSrcElem &&
10026 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10027 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10028 FoundCandidate = true;
10029 break;
10030 }
10031 } else { // If both operands are defined.
10032 // Target order is [8,15] if the current mask is between [0,7].
10033 TargetOrder =
10034 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10035 // Skip if mask of other elements don't equal our expected order.
10036 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10037 // We only need the last 3 bits for the number of shifts.
10038 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10039 : BigEndianShifts[MaskOneElt & 0x7];
10040 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10041 Swap = MaskOneElt < NumHalfWords;
10042 FoundCandidate = true;
10043 break;
10044 }
10045 }
10046 }
10047
10048 if (!FoundCandidate)
10049 return SDValue();
10050
10051 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10052 // optionally with VECSHL if shift is required.
10053 if (Swap)
10054 std::swap(V1, V2);
10055 if (V2.isUndef())
10056 V2 = V1;
10057 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10058 if (ShiftElts) {
10059 // Double ShiftElts because we're left shifting on v16i8 type.
10060 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10061 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10062 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10063 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10064 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10065 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10066 }
10067 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10068 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10069 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10070 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10071}
10072
10073/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10074/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10075/// return the default SDValue.
10076SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10077 SelectionDAG &DAG) const {
10078 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10079 // to v16i8. Peek through the bitcasts to get the actual operands.
10082
10083 auto ShuffleMask = SVN->getMask();
10084 SDValue VecShuffle(SVN, 0);
10085 SDLoc DL(SVN);
10086
10087 // Check that we have a four byte shuffle.
10088 if (!isNByteElemShuffleMask(SVN, 4, 1))
10089 return SDValue();
10090
10091 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10092 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10093 std::swap(LHS, RHS);
10095 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10096 if (!CommutedSV)
10097 return SDValue();
10098 ShuffleMask = CommutedSV->getMask();
10099 }
10100
10101 // Ensure that the RHS is a vector of constants.
10102 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10103 if (!BVN)
10104 return SDValue();
10105
10106 // Check if RHS is a splat of 4-bytes (or smaller).
10107 APInt APSplatValue, APSplatUndef;
10108 unsigned SplatBitSize;
10109 bool HasAnyUndefs;
10110 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10111 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10112 SplatBitSize > 32)
10113 return SDValue();
10114
10115 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10116 // The instruction splats a constant C into two words of the source vector
10117 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10118 // Thus we check that the shuffle mask is the equivalent of
10119 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10120 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10121 // within each word are consecutive, so we only need to check the first byte.
10122 SDValue Index;
10123 bool IsLE = Subtarget.isLittleEndian();
10124 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10125 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10126 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10127 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10128 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10129 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10130 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10131 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10132 else
10133 return SDValue();
10134
10135 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10136 // for XXSPLTI32DX.
10137 unsigned SplatVal = APSplatValue.getZExtValue();
10138 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10139 SplatVal |= (SplatVal << SplatBitSize);
10140
10141 SDValue SplatNode = DAG.getNode(
10142 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10143 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10144 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10145}
10146
10147/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10148/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10149/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10150/// i.e (or (shl x, C1), (srl x, 128-C1)).
10151SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10152 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10153 assert(Op.getValueType() == MVT::v1i128 &&
10154 "Only set v1i128 as custom, other type shouldn't reach here!");
10155 SDLoc dl(Op);
10156 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10157 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10158 unsigned SHLAmt = N1.getConstantOperandVal(0);
10159 if (SHLAmt % 8 == 0) {
10160 std::array<int, 16> Mask;
10161 std::iota(Mask.begin(), Mask.end(), 0);
10162 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10163 if (SDValue Shuffle =
10164 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10165 DAG.getUNDEF(MVT::v16i8), Mask))
10166 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10167 }
10168 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10169 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10170 DAG.getConstant(SHLAmt, dl, MVT::i32));
10171 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10172 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10173 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10174 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10175}
10176
10177/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10178/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10179/// return the code it can be lowered into. Worst case, it can always be
10180/// lowered into a vperm.
10181SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10182 SelectionDAG &DAG) const {
10183 SDLoc dl(Op);
10184 SDValue V1 = Op.getOperand(0);
10185 SDValue V2 = Op.getOperand(1);
10186 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10187
10188 // Any nodes that were combined in the target-independent combiner prior
10189 // to vector legalization will not be sent to the target combine. Try to
10190 // combine it here.
10191 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10192 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10193 return NewShuffle;
10194 Op = NewShuffle;
10195 SVOp = cast<ShuffleVectorSDNode>(Op);
10196 V1 = Op.getOperand(0);
10197 V2 = Op.getOperand(1);
10198 }
10199 EVT VT = Op.getValueType();
10200 bool isLittleEndian = Subtarget.isLittleEndian();
10201
10202 unsigned ShiftElts, InsertAtByte;
10203 bool Swap = false;
10204
10205 // If this is a load-and-splat, we can do that with a single instruction
10206 // in some cases. However if the load has multiple uses, we don't want to
10207 // combine it because that will just produce multiple loads.
10208 bool IsPermutedLoad = false;
10209 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10210 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10211 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10212 InputLoad->hasOneUse()) {
10213 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10214 int SplatIdx =
10215 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10216
10217 // The splat index for permuted loads will be in the left half of the vector
10218 // which is strictly wider than the loaded value by 8 bytes. So we need to
10219 // adjust the splat index to point to the correct address in memory.
10220 if (IsPermutedLoad) {
10221 assert((isLittleEndian || IsFourByte) &&
10222 "Unexpected size for permuted load on big endian target");
10223 SplatIdx += IsFourByte ? 2 : 1;
10224 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10225 "Splat of a value outside of the loaded memory");
10226 }
10227
10228 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10229 // For 4-byte load-and-splat, we need Power9.
10230 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10231 uint64_t Offset = 0;
10232 if (IsFourByte)
10233 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10234 else
10235 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10236
10237 // If the width of the load is the same as the width of the splat,
10238 // loading with an offset would load the wrong memory.
10239 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10240 Offset = 0;
10241
10242 SDValue BasePtr = LD->getBasePtr();
10243 if (Offset != 0)
10245 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10246 SDValue Ops[] = {
10247 LD->getChain(), // Chain
10248 BasePtr, // BasePtr
10249 DAG.getValueType(Op.getValueType()) // VT
10250 };
10251 SDVTList VTL =
10252 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10253 SDValue LdSplt =
10255 Ops, LD->getMemoryVT(), LD->getMemOperand());
10256 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10257 if (LdSplt.getValueType() != SVOp->getValueType(0))
10258 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10259 return LdSplt;
10260 }
10261 }
10262
10263 // All v2i64 and v2f64 shuffles are legal
10264 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10265 return Op;
10266
10267 if (Subtarget.hasP9Vector() &&
10268 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10269 isLittleEndian)) {
10270 if (V2.isUndef())
10271 V2 = V1;
10272 else if (Swap)
10273 std::swap(V1, V2);
10274 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10275 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10276 if (ShiftElts) {
10277 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10278 DAG.getConstant(ShiftElts, dl, MVT::i32));
10279 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10280 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10281 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10282 }
10283 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10284 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10285 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10286 }
10287
10288 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10289 SDValue SplatInsertNode;
10290 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10291 return SplatInsertNode;
10292 }
10293
10294 if (Subtarget.hasP9Altivec()) {
10295 SDValue NewISDNode;
10296 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10297 return NewISDNode;
10298
10299 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10300 return NewISDNode;
10301 }
10302
10303 if (Subtarget.hasVSX() &&
10304 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10305 if (Swap)
10306 std::swap(V1, V2);
10307 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10308 SDValue Conv2 =
10309 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10310
10311 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10312 DAG.getConstant(ShiftElts, dl, MVT::i32));
10313 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10314 }
10315
10316 if (Subtarget.hasVSX() &&
10317 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10318 if (Swap)
10319 std::swap(V1, V2);
10320 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10321 SDValue Conv2 =
10322 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10323
10324 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10325 DAG.getConstant(ShiftElts, dl, MVT::i32));
10326 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10327 }
10328
10329 if (Subtarget.hasP9Vector()) {
10330 if (PPC::isXXBRHShuffleMask(SVOp)) {
10331 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10332 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10333 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10334 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10335 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10336 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10337 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10338 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10339 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10340 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10341 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10342 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10343 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10344 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10345 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10346 }
10347 }
10348
10349 if (Subtarget.hasVSX()) {
10350 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10351 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10352
10353 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10354 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10355 DAG.getConstant(SplatIdx, dl, MVT::i32));
10356 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10357 }
10358
10359 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10360 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10361 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10362 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10363 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10364 }
10365 }
10366
10367 // Cases that are handled by instructions that take permute immediates
10368 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10369 // selected by the instruction selector.
10370 if (V2.isUndef()) {
10371 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10372 PPC::isSplatShuffleMask(SVOp, 2) ||
10373 PPC::isSplatShuffleMask(SVOp, 4) ||
10374 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10375 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10376 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10377 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10378 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10379 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10380 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10381 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10382 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10383 (Subtarget.hasP8Altivec() && (
10384 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10385 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10386 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10387 return Op;
10388 }
10389 }
10390
10391 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10392 // and produce a fixed permutation. If any of these match, do not lower to
10393 // VPERM.
10394 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10395 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10396 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10397 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10398 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10399 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10400 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10401 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10402 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10403 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10404 (Subtarget.hasP8Altivec() && (
10405 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10406 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10407 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10408 return Op;
10409
10410 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10411 // perfect shuffle table to emit an optimal matching sequence.
10412 ArrayRef<int> PermMask = SVOp->getMask();
10413
10414 if (!DisablePerfectShuffle && !isLittleEndian) {
10415 unsigned PFIndexes[4];
10416 bool isFourElementShuffle = true;
10417 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10418 ++i) { // Element number
10419 unsigned EltNo = 8; // Start out undef.
10420 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10421 if (PermMask[i * 4 + j] < 0)
10422 continue; // Undef, ignore it.
10423
10424 unsigned ByteSource = PermMask[i * 4 + j];
10425 if ((ByteSource & 3) != j) {
10426 isFourElementShuffle = false;
10427 break;
10428 }
10429
10430 if (EltNo == 8) {
10431 EltNo = ByteSource / 4;
10432 } else if (EltNo != ByteSource / 4) {
10433 isFourElementShuffle = false;
10434 break;
10435 }
10436 }
10437 PFIndexes[i] = EltNo;
10438 }
10439
10440 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10441 // perfect shuffle vector to determine if it is cost effective to do this as
10442 // discrete instructions, or whether we should use a vperm.
10443 // For now, we skip this for little endian until such time as we have a
10444 // little-endian perfect shuffle table.
10445 if (isFourElementShuffle) {
10446 // Compute the index in the perfect shuffle table.
10447 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10448 PFIndexes[2] * 9 + PFIndexes[3];
10449
10450 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10451 unsigned Cost = (PFEntry >> 30);
10452
10453 // Determining when to avoid vperm is tricky. Many things affect the cost
10454 // of vperm, particularly how many times the perm mask needs to be
10455 // computed. For example, if the perm mask can be hoisted out of a loop or
10456 // is already used (perhaps because there are multiple permutes with the
10457 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10458 // permute mask out of the loop requires an extra register.
10459 //
10460 // As a compromise, we only emit discrete instructions if the shuffle can
10461 // be generated in 3 or fewer operations. When we have loop information
10462 // available, if this block is within a loop, we should avoid using vperm
10463 // for 3-operation perms and use a constant pool load instead.
10464 if (Cost < 3)
10465 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10466 }
10467 }
10468
10469 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10470 // vector that will get spilled to the constant pool.
10471 if (V2.isUndef()) V2 = V1;
10472
10473 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10474}
10475
10476SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10477 ArrayRef<int> PermMask, EVT VT,
10478 SDValue V1, SDValue V2) const {
10479 unsigned Opcode = PPCISD::VPERM;
10480 EVT ValType = V1.getValueType();
10481 SDLoc dl(Op);
10482 bool NeedSwap = false;
10483 bool isLittleEndian = Subtarget.isLittleEndian();
10484 bool isPPC64 = Subtarget.isPPC64();
10485
10486 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10487 (V1->hasOneUse() || V2->hasOneUse())) {
10488 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10489 "XXPERM instead\n");
10490 Opcode = PPCISD::XXPERM;
10491
10492 // The second input to XXPERM is also an output so if the second input has
10493 // multiple uses then copying is necessary, as a result we want the
10494 // single-use operand to be used as the second input to prevent copying.
10495 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10496 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10497 std::swap(V1, V2);
10498 NeedSwap = !NeedSwap;
10499 }
10500 }
10501
10502 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10503 // that it is in input element units, not in bytes. Convert now.
10504
10505 // For little endian, the order of the input vectors is reversed, and
10506 // the permutation mask is complemented with respect to 31. This is
10507 // necessary to produce proper semantics with the big-endian-based vperm
10508 // instruction.
10509 EVT EltVT = V1.getValueType().getVectorElementType();
10510 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10511
10512 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10513 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10514
10515 /*
10516 Vectors will be appended like so: [ V1 | v2 ]
10517 XXSWAPD on V1:
10518 [ A | B | C | D ] -> [ C | D | A | B ]
10519 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10520 i.e. index of A, B += 8, and index of C, D -= 8.
10521 XXSWAPD on V2:
10522 [ E | F | G | H ] -> [ G | H | E | F ]
10523 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10524 i.e. index of E, F += 8, index of G, H -= 8
10525 Swap V1 and V2:
10526 [ V1 | V2 ] -> [ V2 | V1 ]
10527 0-15 16-31 0-15 16-31
10528 i.e. index of V1 += 16, index of V2 -= 16
10529 */
10530
10531 SmallVector<SDValue, 16> ResultMask;
10532 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10533 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10534
10535 if (V1HasXXSWAPD) {
10536 if (SrcElt < 8)
10537 SrcElt += 8;
10538 else if (SrcElt < 16)
10539 SrcElt -= 8;
10540 }
10541 if (V2HasXXSWAPD) {
10542 if (SrcElt > 23)
10543 SrcElt -= 8;
10544 else if (SrcElt > 15)
10545 SrcElt += 8;
10546 }
10547 if (NeedSwap) {
10548 if (SrcElt < 16)
10549 SrcElt += 16;
10550 else
10551 SrcElt -= 16;
10552 }
10553 for (unsigned j = 0; j != BytesPerElement; ++j)
10554 if (isLittleEndian)
10555 ResultMask.push_back(
10556 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10557 else
10558 ResultMask.push_back(
10559 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10560 }
10561
10562 if (V1HasXXSWAPD) {
10563 dl = SDLoc(V1->getOperand(0));
10564 V1 = V1->getOperand(0)->getOperand(1);
10565 }
10566 if (V2HasXXSWAPD) {
10567 dl = SDLoc(V2->getOperand(0));
10568 V2 = V2->getOperand(0)->getOperand(1);
10569 }
10570
10571 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10572 if (ValType != MVT::v2f64)
10573 V1 = DAG.getBitcast(MVT::v2f64, V1);
10574 if (V2.getValueType() != MVT::v2f64)
10575 V2 = DAG.getBitcast(MVT::v2f64, V2);
10576 }
10577
10578 ShufflesHandledWithVPERM++;
10579 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10580 LLVM_DEBUG({
10581 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10582 if (Opcode == PPCISD::XXPERM) {
10583 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10584 } else {
10585 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10586 }
10587 SVOp->dump();
10588 dbgs() << "With the following permute control vector:\n";
10589 VPermMask.dump();
10590 });
10591
10592 if (Opcode == PPCISD::XXPERM)
10593 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10594
10595 // Only need to place items backwards in LE,
10596 // the mask was properly calculated.
10597 if (isLittleEndian)
10598 std::swap(V1, V2);
10599
10600 SDValue VPERMNode =
10601 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10602
10603 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10604 return VPERMNode;
10605}
10606
10607/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10608/// vector comparison. If it is, return true and fill in Opc/isDot with
10609/// information about the intrinsic.
10610static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10611 bool &isDot, const PPCSubtarget &Subtarget) {
10612 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10613 CompareOpc = -1;
10614 isDot = false;
10615 switch (IntrinsicID) {
10616 default:
10617 return false;
10618 // Comparison predicates.
10619 case Intrinsic::ppc_altivec_vcmpbfp_p:
10620 CompareOpc = 966;
10621 isDot = true;
10622 break;
10623 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10624 CompareOpc = 198;
10625 isDot = true;
10626 break;
10627 case Intrinsic::ppc_altivec_vcmpequb_p:
10628 CompareOpc = 6;
10629 isDot = true;
10630 break;
10631 case Intrinsic::ppc_altivec_vcmpequh_p:
10632 CompareOpc = 70;
10633 isDot = true;
10634 break;
10635 case Intrinsic::ppc_altivec_vcmpequw_p:
10636 CompareOpc = 134;
10637 isDot = true;
10638 break;
10639 case Intrinsic::ppc_altivec_vcmpequd_p:
10640 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10641 CompareOpc = 199;
10642 isDot = true;
10643 } else
10644 return false;
10645 break;
10646 case Intrinsic::ppc_altivec_vcmpneb_p:
10647 case Intrinsic::ppc_altivec_vcmpneh_p:
10648 case Intrinsic::ppc_altivec_vcmpnew_p:
10649 case Intrinsic::ppc_altivec_vcmpnezb_p:
10650 case Intrinsic::ppc_altivec_vcmpnezh_p:
10651 case Intrinsic::ppc_altivec_vcmpnezw_p:
10652 if (Subtarget.hasP9Altivec()) {
10653 switch (IntrinsicID) {
10654 default:
10655 llvm_unreachable("Unknown comparison intrinsic.");
10656 case Intrinsic::ppc_altivec_vcmpneb_p:
10657 CompareOpc = 7;
10658 break;
10659 case Intrinsic::ppc_altivec_vcmpneh_p:
10660 CompareOpc = 71;
10661 break;
10662 case Intrinsic::ppc_altivec_vcmpnew_p:
10663 CompareOpc = 135;
10664 break;
10665 case Intrinsic::ppc_altivec_vcmpnezb_p:
10666 CompareOpc = 263;
10667 break;
10668 case Intrinsic::ppc_altivec_vcmpnezh_p:
10669 CompareOpc = 327;
10670 break;
10671 case Intrinsic::ppc_altivec_vcmpnezw_p:
10672 CompareOpc = 391;
10673 break;
10674 }
10675 isDot = true;
10676 } else
10677 return false;
10678 break;
10679 case Intrinsic::ppc_altivec_vcmpgefp_p:
10680 CompareOpc = 454;
10681 isDot = true;
10682 break;
10683 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10684 CompareOpc = 710;
10685 isDot = true;
10686 break;
10687 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10688 CompareOpc = 774;
10689 isDot = true;
10690 break;
10691 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10692 CompareOpc = 838;
10693 isDot = true;
10694 break;
10695 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10696 CompareOpc = 902;
10697 isDot = true;
10698 break;
10699 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10700 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10701 CompareOpc = 967;
10702 isDot = true;
10703 } else
10704 return false;
10705 break;
10706 case Intrinsic::ppc_altivec_vcmpgtub_p:
10707 CompareOpc = 518;
10708 isDot = true;
10709 break;
10710 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10711 CompareOpc = 582;
10712 isDot = true;
10713 break;
10714 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10715 CompareOpc = 646;
10716 isDot = true;
10717 break;
10718 case Intrinsic::ppc_altivec_vcmpgtud_p:
10719 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10720 CompareOpc = 711;
10721 isDot = true;
10722 } else
10723 return false;
10724 break;
10725
10726 case Intrinsic::ppc_altivec_vcmpequq:
10727 case Intrinsic::ppc_altivec_vcmpgtsq:
10728 case Intrinsic::ppc_altivec_vcmpgtuq:
10729 if (!Subtarget.isISA3_1())
10730 return false;
10731 switch (IntrinsicID) {
10732 default:
10733 llvm_unreachable("Unknown comparison intrinsic.");
10734 case Intrinsic::ppc_altivec_vcmpequq:
10735 CompareOpc = 455;
10736 break;
10737 case Intrinsic::ppc_altivec_vcmpgtsq:
10738 CompareOpc = 903;
10739 break;
10740 case Intrinsic::ppc_altivec_vcmpgtuq:
10741 CompareOpc = 647;
10742 break;
10743 }
10744 break;
10745
10746 // VSX predicate comparisons use the same infrastructure
10747 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10748 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10749 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10750 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10751 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10752 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10753 if (Subtarget.hasVSX()) {
10754 switch (IntrinsicID) {
10755 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10756 CompareOpc = 99;
10757 break;
10758 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10759 CompareOpc = 115;
10760 break;
10761 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10762 CompareOpc = 107;
10763 break;
10764 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10765 CompareOpc = 67;
10766 break;
10767 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10768 CompareOpc = 83;
10769 break;
10770 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10771 CompareOpc = 75;
10772 break;
10773 }
10774 isDot = true;
10775 } else
10776 return false;
10777 break;
10778
10779 // Normal Comparisons.
10780 case Intrinsic::ppc_altivec_vcmpbfp:
10781 CompareOpc = 966;
10782 break;
10783 case Intrinsic::ppc_altivec_vcmpeqfp:
10784 CompareOpc = 198;
10785 break;
10786 case Intrinsic::ppc_altivec_vcmpequb:
10787 CompareOpc = 6;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpequh:
10790 CompareOpc = 70;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpequw:
10793 CompareOpc = 134;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpequd:
10796 if (Subtarget.hasP8Altivec())
10797 CompareOpc = 199;
10798 else
10799 return false;
10800 break;
10801 case Intrinsic::ppc_altivec_vcmpneb:
10802 case Intrinsic::ppc_altivec_vcmpneh:
10803 case Intrinsic::ppc_altivec_vcmpnew:
10804 case Intrinsic::ppc_altivec_vcmpnezb:
10805 case Intrinsic::ppc_altivec_vcmpnezh:
10806 case Intrinsic::ppc_altivec_vcmpnezw:
10807 if (Subtarget.hasP9Altivec())
10808 switch (IntrinsicID) {
10809 default:
10810 llvm_unreachable("Unknown comparison intrinsic.");
10811 case Intrinsic::ppc_altivec_vcmpneb:
10812 CompareOpc = 7;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpneh:
10815 CompareOpc = 71;
10816 break;
10817 case Intrinsic::ppc_altivec_vcmpnew:
10818 CompareOpc = 135;
10819 break;
10820 case Intrinsic::ppc_altivec_vcmpnezb:
10821 CompareOpc = 263;
10822 break;
10823 case Intrinsic::ppc_altivec_vcmpnezh:
10824 CompareOpc = 327;
10825 break;
10826 case Intrinsic::ppc_altivec_vcmpnezw:
10827 CompareOpc = 391;
10828 break;
10829 }
10830 else
10831 return false;
10832 break;
10833 case Intrinsic::ppc_altivec_vcmpgefp:
10834 CompareOpc = 454;
10835 break;
10836 case Intrinsic::ppc_altivec_vcmpgtfp:
10837 CompareOpc = 710;
10838 break;
10839 case Intrinsic::ppc_altivec_vcmpgtsb:
10840 CompareOpc = 774;
10841 break;
10842 case Intrinsic::ppc_altivec_vcmpgtsh:
10843 CompareOpc = 838;
10844 break;
10845 case Intrinsic::ppc_altivec_vcmpgtsw:
10846 CompareOpc = 902;
10847 break;
10848 case Intrinsic::ppc_altivec_vcmpgtsd:
10849 if (Subtarget.hasP8Altivec())
10850 CompareOpc = 967;
10851 else
10852 return false;
10853 break;
10854 case Intrinsic::ppc_altivec_vcmpgtub:
10855 CompareOpc = 518;
10856 break;
10857 case Intrinsic::ppc_altivec_vcmpgtuh:
10858 CompareOpc = 582;
10859 break;
10860 case Intrinsic::ppc_altivec_vcmpgtuw:
10861 CompareOpc = 646;
10862 break;
10863 case Intrinsic::ppc_altivec_vcmpgtud:
10864 if (Subtarget.hasP8Altivec())
10865 CompareOpc = 711;
10866 else
10867 return false;
10868 break;
10869 case Intrinsic::ppc_altivec_vcmpequq_p:
10870 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10871 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10872 if (!Subtarget.isISA3_1())
10873 return false;
10874 switch (IntrinsicID) {
10875 default:
10876 llvm_unreachable("Unknown comparison intrinsic.");
10877 case Intrinsic::ppc_altivec_vcmpequq_p:
10878 CompareOpc = 455;
10879 break;
10880 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10881 CompareOpc = 903;
10882 break;
10883 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10884 CompareOpc = 647;
10885 break;
10886 }
10887 isDot = true;
10888 break;
10889 }
10890 return true;
10891}
10892
10893/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10894/// lower, do it, otherwise return null.
10895SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10896 SelectionDAG &DAG) const {
10897 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10898
10899 SDLoc dl(Op);
10900
10901 switch (IntrinsicID) {
10902 case Intrinsic::thread_pointer:
10903 // Reads the thread pointer register, used for __builtin_thread_pointer.
10904 if (Subtarget.isPPC64())
10905 return DAG.getRegister(PPC::X13, MVT::i64);
10906 return DAG.getRegister(PPC::R2, MVT::i32);
10907
10908 case Intrinsic::ppc_rldimi: {
10909 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10910 SDValue Src = Op.getOperand(1);
10911 APInt Mask = Op.getConstantOperandAPInt(4);
10912 if (Mask.isZero())
10913 return Op.getOperand(2);
10914 if (Mask.isAllOnes())
10915 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10916 uint64_t SH = Op.getConstantOperandVal(3);
10917 unsigned MB = 0, ME = 0;
10918 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10919 report_fatal_error("invalid rldimi mask!");
10920 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10921 if (ME < 63 - SH) {
10922 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10923 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10924 } else if (ME > 63 - SH) {
10925 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10926 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10927 }
10928 return SDValue(
10929 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10930 {Op.getOperand(2), Src,
10931 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10932 DAG.getTargetConstant(MB, dl, MVT::i32)}),
10933 0);
10934 }
10935
10936 case Intrinsic::ppc_rlwimi: {
10937 APInt Mask = Op.getConstantOperandAPInt(4);
10938 if (Mask.isZero())
10939 return Op.getOperand(2);
10940 if (Mask.isAllOnes())
10941 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10942 Op.getOperand(3));
10943 unsigned MB = 0, ME = 0;
10944 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10945 report_fatal_error("invalid rlwimi mask!");
10946 return SDValue(DAG.getMachineNode(
10947 PPC::RLWIMI, dl, MVT::i32,
10948 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10949 DAG.getTargetConstant(MB, dl, MVT::i32),
10950 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10951 0);
10952 }
10953
10954 case Intrinsic::ppc_rlwnm: {
10955 if (Op.getConstantOperandVal(3) == 0)
10956 return DAG.getConstant(0, dl, MVT::i32);
10957 unsigned MB = 0, ME = 0;
10958 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10959 report_fatal_error("invalid rlwnm mask!");
10960 return SDValue(
10961 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10962 {Op.getOperand(1), Op.getOperand(2),
10963 DAG.getTargetConstant(MB, dl, MVT::i32),
10964 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10965 0);
10966 }
10967
10968 case Intrinsic::ppc_mma_disassemble_acc: {
10969 if (Subtarget.isISAFuture()) {
10970 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10971 SDValue WideVec =
10972 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
10973 Op.getOperand(1)),
10974 0);
10976 SDValue Value = SDValue(WideVec.getNode(), 0);
10977 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10978
10979 SDValue Extract;
10980 Extract = DAG.getNode(
10981 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10982 Subtarget.isLittleEndian() ? Value2 : Value,
10983 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10984 dl, getPointerTy(DAG.getDataLayout())));
10985 RetOps.push_back(Extract);
10986 Extract = DAG.getNode(
10987 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10988 Subtarget.isLittleEndian() ? Value2 : Value,
10989 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10990 dl, getPointerTy(DAG.getDataLayout())));
10991 RetOps.push_back(Extract);
10992 Extract = DAG.getNode(
10993 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10994 Subtarget.isLittleEndian() ? Value : Value2,
10995 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10996 dl, getPointerTy(DAG.getDataLayout())));
10997 RetOps.push_back(Extract);
10998 Extract = DAG.getNode(
10999 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11000 Subtarget.isLittleEndian() ? Value : Value2,
11001 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11002 dl, getPointerTy(DAG.getDataLayout())));
11003 RetOps.push_back(Extract);
11004 return DAG.getMergeValues(RetOps, dl);
11005 }
11006 [[fallthrough]];
11007 }
11008 case Intrinsic::ppc_vsx_disassemble_pair: {
11009 int NumVecs = 2;
11010 SDValue WideVec = Op.getOperand(1);
11011 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11012 NumVecs = 4;
11013 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11014 }
11016 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11017 SDValue Extract = DAG.getNode(
11018 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11019 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11020 : VecNo,
11021 dl, getPointerTy(DAG.getDataLayout())));
11022 RetOps.push_back(Extract);
11023 }
11024 return DAG.getMergeValues(RetOps, dl);
11025 }
11026
11027 case Intrinsic::ppc_mma_xxmfacc:
11028 case Intrinsic::ppc_mma_xxmtacc: {
11029 // Allow pre-isa-future subtargets to lower as normal.
11030 if (!Subtarget.isISAFuture())
11031 return SDValue();
11032 // The intrinsics for xxmtacc and xxmfacc take one argument of
11033 // type v512i1, for future cpu the corresponding wacc instruction
11034 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11035 // the need to produce the xxm[t|f]acc.
11036 SDValue WideVec = Op.getOperand(1);
11037 DAG.ReplaceAllUsesWith(Op, WideVec);
11038 return SDValue();
11039 }
11040
11041 case Intrinsic::ppc_unpack_longdouble: {
11042 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11043 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11044 "Argument of long double unpack must be 0 or 1!");
11045 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11046 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11047 Idx->getValueType(0)));
11048 }
11049
11050 case Intrinsic::ppc_compare_exp_lt:
11051 case Intrinsic::ppc_compare_exp_gt:
11052 case Intrinsic::ppc_compare_exp_eq:
11053 case Intrinsic::ppc_compare_exp_uo: {
11054 unsigned Pred;
11055 switch (IntrinsicID) {
11056 case Intrinsic::ppc_compare_exp_lt:
11057 Pred = PPC::PRED_LT;
11058 break;
11059 case Intrinsic::ppc_compare_exp_gt:
11060 Pred = PPC::PRED_GT;
11061 break;
11062 case Intrinsic::ppc_compare_exp_eq:
11063 Pred = PPC::PRED_EQ;
11064 break;
11065 case Intrinsic::ppc_compare_exp_uo:
11066 Pred = PPC::PRED_UN;
11067 break;
11068 }
11069 return SDValue(
11070 DAG.getMachineNode(
11071 PPC::SELECT_CC_I4, dl, MVT::i32,
11072 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11073 Op.getOperand(1), Op.getOperand(2)),
11074 0),
11075 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11076 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11077 0);
11078 }
11079 case Intrinsic::ppc_test_data_class: {
11080 EVT OpVT = Op.getOperand(1).getValueType();
11081 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11082 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11083 : PPC::XSTSTDCSP);
11084 return SDValue(
11085 DAG.getMachineNode(
11086 PPC::SELECT_CC_I4, dl, MVT::i32,
11087 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11088 Op.getOperand(1)),
11089 0),
11090 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11091 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11092 0);
11093 }
11094 case Intrinsic::ppc_fnmsub: {
11095 EVT VT = Op.getOperand(1).getValueType();
11096 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11097 return DAG.getNode(
11098 ISD::FNEG, dl, VT,
11099 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11100 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11101 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11102 Op.getOperand(2), Op.getOperand(3));
11103 }
11104 case Intrinsic::ppc_convert_f128_to_ppcf128:
11105 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11106 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11107 ? RTLIB::CONVERT_PPCF128_F128
11108 : RTLIB::CONVERT_F128_PPCF128;
11109 MakeLibCallOptions CallOptions;
11110 std::pair<SDValue, SDValue> Result =
11111 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11112 dl, SDValue());
11113 return Result.first;
11114 }
11115 case Intrinsic::ppc_maxfe:
11116 case Intrinsic::ppc_maxfl:
11117 case Intrinsic::ppc_maxfs:
11118 case Intrinsic::ppc_minfe:
11119 case Intrinsic::ppc_minfl:
11120 case Intrinsic::ppc_minfs: {
11121 EVT VT = Op.getValueType();
11122 assert(
11123 all_of(Op->ops().drop_front(4),
11124 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11125 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11126 (void)VT;
11128 if (IntrinsicID == Intrinsic::ppc_minfe ||
11129 IntrinsicID == Intrinsic::ppc_minfl ||
11130 IntrinsicID == Intrinsic::ppc_minfs)
11131 CC = ISD::SETLT;
11132 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11133 SDValue Res = Op.getOperand(I);
11134 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11135 Res =
11136 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11137 }
11138 return Res;
11139 }
11140 }
11141
11142 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11143 // opcode number of the comparison.
11144 int CompareOpc;
11145 bool isDot;
11146 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11147 return SDValue(); // Don't custom lower most intrinsics.
11148
11149 // If this is a non-dot comparison, make the VCMP node and we are done.
11150 if (!isDot) {
11151 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11152 Op.getOperand(1), Op.getOperand(2),
11153 DAG.getConstant(CompareOpc, dl, MVT::i32));
11154 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11155 }
11156
11157 // Create the PPCISD altivec 'dot' comparison node.
11158 SDValue Ops[] = {
11159 Op.getOperand(2), // LHS
11160 Op.getOperand(3), // RHS
11161 DAG.getConstant(CompareOpc, dl, MVT::i32)
11162 };
11163 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11164 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11165
11166 // Now that we have the comparison, emit a copy from the CR to a GPR.
11167 // This is flagged to the above dot comparison.
11168 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11169 DAG.getRegister(PPC::CR6, MVT::i32),
11170 CompNode.getValue(1));
11171
11172 // Unpack the result based on how the target uses it.
11173 unsigned BitNo; // Bit # of CR6.
11174 bool InvertBit; // Invert result?
11175 switch (Op.getConstantOperandVal(1)) {
11176 default: // Can't happen, don't crash on invalid number though.
11177 case 0: // Return the value of the EQ bit of CR6.
11178 BitNo = 0; InvertBit = false;
11179 break;
11180 case 1: // Return the inverted value of the EQ bit of CR6.
11181 BitNo = 0; InvertBit = true;
11182 break;
11183 case 2: // Return the value of the LT bit of CR6.
11184 BitNo = 2; InvertBit = false;
11185 break;
11186 case 3: // Return the inverted value of the LT bit of CR6.
11187 BitNo = 2; InvertBit = true;
11188 break;
11189 }
11190
11191 // Shift the bit into the low position.
11192 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11193 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11194 // Isolate the bit.
11195 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11196 DAG.getConstant(1, dl, MVT::i32));
11197
11198 // If we are supposed to, toggle the bit.
11199 if (InvertBit)
11200 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11201 DAG.getConstant(1, dl, MVT::i32));
11202 return Flags;
11203}
11204
11205SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11206 SelectionDAG &DAG) const {
11207 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11208 // the beginning of the argument list.
11209 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11210 SDLoc DL(Op);
11211 switch (Op.getConstantOperandVal(ArgStart)) {
11212 case Intrinsic::ppc_cfence: {
11213 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11214 SDValue Val = Op.getOperand(ArgStart + 1);
11215 EVT Ty = Val.getValueType();
11216 if (Ty == MVT::i128) {
11217 // FIXME: Testing one of two paired registers is sufficient to guarantee
11218 // ordering?
11219 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11220 }
11221 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11222 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11223 return SDValue(
11224 DAG.getMachineNode(Opcode, DL, MVT::Other,
11225 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11226 Op.getOperand(0)),
11227 0);
11228 }
11229 default:
11230 break;
11231 }
11232 return SDValue();
11233}
11234
11235// Lower scalar BSWAP64 to xxbrd.
11236SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11237 SDLoc dl(Op);
11238 if (!Subtarget.isPPC64())
11239 return Op;
11240 // MTVSRDD
11241 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11242 Op.getOperand(0));
11243 // XXBRD
11244 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11245 // MFVSRD
11246 int VectorIndex = 0;
11247 if (Subtarget.isLittleEndian())
11248 VectorIndex = 1;
11249 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11250 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11251 return Op;
11252}
11253
11254// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11255// compared to a value that is atomically loaded (atomic loads zero-extend).
11256SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11257 SelectionDAG &DAG) const {
11258 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11259 "Expecting an atomic compare-and-swap here.");
11260 SDLoc dl(Op);
11261 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11262 EVT MemVT = AtomicNode->getMemoryVT();
11263 if (MemVT.getSizeInBits() >= 32)
11264 return Op;
11265
11266 SDValue CmpOp = Op.getOperand(2);
11267 // If this is already correctly zero-extended, leave it alone.
11268 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11269 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11270 return Op;
11271
11272 // Clear the high bits of the compare operand.
11273 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11274 SDValue NewCmpOp =
11275 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11276 DAG.getConstant(MaskVal, dl, MVT::i32));
11277
11278 // Replace the existing compare operand with the properly zero-extended one.
11280 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11281 Ops.push_back(AtomicNode->getOperand(i));
11282 Ops[2] = NewCmpOp;
11283 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11284 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11285 auto NodeTy =
11287 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11288}
11289
11290SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11291 SelectionDAG &DAG) const {
11292 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11293 EVT MemVT = N->getMemoryVT();
11294 assert(MemVT.getSimpleVT() == MVT::i128 &&
11295 "Expect quadword atomic operations");
11296 SDLoc dl(N);
11297 unsigned Opc = N->getOpcode();
11298 switch (Opc) {
11299 case ISD::ATOMIC_LOAD: {
11300 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11301 // lowered to ppc instructions by pattern matching instruction selector.
11302 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11304 N->getOperand(0),
11305 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11306 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11307 Ops.push_back(N->getOperand(I));
11308 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11309 Ops, MemVT, N->getMemOperand());
11310 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11311 SDValue ValHi =
11312 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11313 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11314 DAG.getConstant(64, dl, MVT::i32));
11315 SDValue Val =
11316 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11317 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11318 {Val, LoadedVal.getValue(2)});
11319 }
11320 case ISD::ATOMIC_STORE: {
11321 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11322 // lowered to ppc instructions by pattern matching instruction selector.
11323 SDVTList Tys = DAG.getVTList(MVT::Other);
11325 N->getOperand(0),
11326 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11327 SDValue Val = N->getOperand(1);
11328 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11329 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11330 DAG.getConstant(64, dl, MVT::i32));
11331 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11332 Ops.push_back(ValLo);
11333 Ops.push_back(ValHi);
11334 Ops.push_back(N->getOperand(2));
11335 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11336 N->getMemOperand());
11337 }
11338 default:
11339 llvm_unreachable("Unexpected atomic opcode");
11340 }
11341}
11342
11344 SelectionDAG &DAG,
11345 const PPCSubtarget &Subtarget) {
11346 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11347
11348 enum DataClassMask {
11349 DC_NAN = 1 << 6,
11350 DC_NEG_INF = 1 << 4,
11351 DC_POS_INF = 1 << 5,
11352 DC_NEG_ZERO = 1 << 2,
11353 DC_POS_ZERO = 1 << 3,
11354 DC_NEG_SUBNORM = 1,
11355 DC_POS_SUBNORM = 1 << 1,
11356 };
11357
11358 EVT VT = Op.getValueType();
11359
11360 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11361 : VT == MVT::f64 ? PPC::XSTSTDCDP
11362 : PPC::XSTSTDCSP;
11363
11364 if (Mask == fcAllFlags)
11365 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11366 if (Mask == 0)
11367 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11368
11369 // When it's cheaper or necessary to test reverse flags.
11370 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11371 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11372 return DAG.getNOT(Dl, Rev, MVT::i1);
11373 }
11374
11375 // Power doesn't support testing whether a value is 'normal'. Test the rest
11376 // first, and test if it's 'not not-normal' with expected sign.
11377 if (Mask & fcNormal) {
11378 SDValue Rev(DAG.getMachineNode(
11379 TestOp, Dl, MVT::i32,
11380 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11381 DC_NEG_ZERO | DC_POS_ZERO |
11382 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11383 Dl, MVT::i32),
11384 Op),
11385 0);
11386 // Sign are stored in CR bit 0, result are in CR bit 2.
11387 SDValue Sign(
11388 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11389 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11390 0);
11391 SDValue Normal(DAG.getNOT(
11392 Dl,
11394 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11395 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11396 0),
11397 MVT::i1));
11398 if (Mask & fcPosNormal)
11399 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11400 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11401 if (Mask == fcPosNormal || Mask == fcNegNormal)
11402 return Result;
11403
11404 return DAG.getNode(
11405 ISD::OR, Dl, MVT::i1,
11406 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11407 }
11408
11409 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11410 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11411 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11412 bool IsQuiet = Mask & fcQNan;
11413 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11414
11415 // Quietness is determined by the first bit in fraction field.
11416 uint64_t QuietMask = 0;
11417 SDValue HighWord;
11418 if (VT == MVT::f128) {
11419 HighWord = DAG.getNode(
11420 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11421 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11422 QuietMask = 0x8000;
11423 } else if (VT == MVT::f64) {
11424 if (Subtarget.isPPC64()) {
11425 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11426 DAG.getBitcast(MVT::i64, Op),
11427 DAG.getConstant(1, Dl, MVT::i32));
11428 } else {
11429 SDValue Vec = DAG.getBitcast(
11430 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11431 HighWord = DAG.getNode(
11432 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11433 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11434 }
11435 QuietMask = 0x80000;
11436 } else if (VT == MVT::f32) {
11437 HighWord = DAG.getBitcast(MVT::i32, Op);
11438 QuietMask = 0x400000;
11439 }
11440 SDValue NanRes = DAG.getSetCC(
11441 Dl, MVT::i1,
11442 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11443 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11444 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11445 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11446 if (Mask == fcQNan || Mask == fcSNan)
11447 return NanRes;
11448
11449 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11450 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11451 NanRes);
11452 }
11453
11454 unsigned NativeMask = 0;
11455 if ((Mask & fcNan) == fcNan)
11456 NativeMask |= DC_NAN;
11457 if (Mask & fcNegInf)
11458 NativeMask |= DC_NEG_INF;
11459 if (Mask & fcPosInf)
11460 NativeMask |= DC_POS_INF;
11461 if (Mask & fcNegZero)
11462 NativeMask |= DC_NEG_ZERO;
11463 if (Mask & fcPosZero)
11464 NativeMask |= DC_POS_ZERO;
11465 if (Mask & fcNegSubnormal)
11466 NativeMask |= DC_NEG_SUBNORM;
11467 if (Mask & fcPosSubnormal)
11468 NativeMask |= DC_POS_SUBNORM;
11469 return SDValue(
11470 DAG.getMachineNode(
11471 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11473 TestOp, Dl, MVT::i32,
11474 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11475 0),
11476 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11477 0);
11478}
11479
11480SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11481 SelectionDAG &DAG) const {
11482 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11483 SDValue LHS = Op.getOperand(0);
11484 uint64_t RHSC = Op.getConstantOperandVal(1);
11485 SDLoc Dl(Op);
11486 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11487 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11488}
11489
11490SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11491 SelectionDAG &DAG) const {
11492 SDLoc dl(Op);
11493 // Create a stack slot that is 16-byte aligned.
11495 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11496 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11497 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11498
11499 SDValue Val = Op.getOperand(0);
11500 EVT ValVT = Val.getValueType();
11501 // P10 hardware store forwarding requires that a single store contains all
11502 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11503 // to avoid load hit store on P10 when running binaries compiled for older
11504 // processors by generating two mergeable scalar stores to forward with the
11505 // vector load.
11506 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11507 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11508 ValVT.getSizeInBits() <= 64) {
11509 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11510 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11511 SDValue ShiftBy = DAG.getConstant(
11512 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11513 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11514 SDValue Plus8 =
11515 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11516 SDValue Store2 =
11517 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11518 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11519 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11521 }
11522
11523 // Store the input value into Value#0 of the stack slot.
11524 SDValue Store =
11525 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11526 // Load it out.
11527 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11528}
11529
11530SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11531 SelectionDAG &DAG) const {
11532 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11533 "Should only be called for ISD::INSERT_VECTOR_ELT");
11534
11535 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11536
11537 EVT VT = Op.getValueType();
11538 SDLoc dl(Op);
11539 SDValue V1 = Op.getOperand(0);
11540 SDValue V2 = Op.getOperand(1);
11541
11542 if (VT == MVT::v2f64 && C)
11543 return Op;
11544
11545 if (Subtarget.hasP9Vector()) {
11546 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11547 // because on P10, it allows this specific insert_vector_elt load pattern to
11548 // utilize the refactored load and store infrastructure in order to exploit
11549 // prefixed loads.
11550 // On targets with inexpensive direct moves (Power9 and up), a
11551 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11552 // load since a single precision load will involve conversion to double
11553 // precision on the load followed by another conversion to single precision.
11554 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11555 (isa<LoadSDNode>(V2))) {
11556 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11557 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11558 SDValue InsVecElt =
11559 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11560 BitcastLoad, Op.getOperand(2));
11561 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11562 }
11563 }
11564
11565 if (Subtarget.isISA3_1()) {
11566 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11567 return SDValue();
11568 // On P10, we have legal lowering for constant and variable indices for
11569 // all vectors.
11570 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11571 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11572 return Op;
11573 }
11574
11575 // Before P10, we have legal lowering for constant indices but not for
11576 // variable ones.
11577 if (!C)
11578 return SDValue();
11579
11580 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11581 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11582 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11583 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11584 unsigned InsertAtElement = C->getZExtValue();
11585 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11586 if (Subtarget.isLittleEndian()) {
11587 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11588 }
11589 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11590 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11591 }
11592 return Op;
11593}
11594
11595SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11596 SelectionDAG &DAG) const {
11597 SDLoc dl(Op);
11598 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11599 SDValue LoadChain = LN->getChain();
11600 SDValue BasePtr = LN->getBasePtr();
11601 EVT VT = Op.getValueType();
11602
11603 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11604 return Op;
11605
11606 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11607 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11608 // 2 or 4 vsx registers.
11609 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11610 "Type unsupported without MMA");
11611 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11612 "Type unsupported without paired vector support");
11613 Align Alignment = LN->getAlign();
11615 SmallVector<SDValue, 4> LoadChains;
11616 unsigned NumVecs = VT.getSizeInBits() / 128;
11617 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11618 SDValue Load =
11619 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11620 LN->getPointerInfo().getWithOffset(Idx * 16),
11621 commonAlignment(Alignment, Idx * 16),
11622 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11623 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11624 DAG.getConstant(16, dl, BasePtr.getValueType()));
11625 Loads.push_back(Load);
11626 LoadChains.push_back(Load.getValue(1));
11627 }
11628 if (Subtarget.isLittleEndian()) {
11629 std::reverse(Loads.begin(), Loads.end());
11630 std::reverse(LoadChains.begin(), LoadChains.end());
11631 }
11632 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11633 SDValue Value =
11634 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11635 dl, VT, Loads);
11636 SDValue RetOps[] = {Value, TF};
11637 return DAG.getMergeValues(RetOps, dl);
11638}
11639
11640SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11641 SelectionDAG &DAG) const {
11642 SDLoc dl(Op);
11643 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11644 SDValue StoreChain = SN->getChain();
11645 SDValue BasePtr = SN->getBasePtr();
11646 SDValue Value = SN->getValue();
11647 SDValue Value2 = SN->getValue();
11648 EVT StoreVT = Value.getValueType();
11649
11650 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11651 return Op;
11652
11653 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11654 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11655 // underlying registers individually.
11656 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11657 "Type unsupported without MMA");
11658 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11659 "Type unsupported without paired vector support");
11660 Align Alignment = SN->getAlign();
11662 unsigned NumVecs = 2;
11663 if (StoreVT == MVT::v512i1) {
11664 if (Subtarget.isISAFuture()) {
11665 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11666 MachineSDNode *ExtNode = DAG.getMachineNode(
11667 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11668
11669 Value = SDValue(ExtNode, 0);
11670 Value2 = SDValue(ExtNode, 1);
11671 } else
11672 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11673 NumVecs = 4;
11674 }
11675 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11676 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11677 SDValue Elt;
11678 if (Subtarget.isISAFuture()) {
11679 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11680 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11681 Idx > 1 ? Value2 : Value,
11682 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11683 } else
11684 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11685 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11686
11687 SDValue Store =
11688 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11689 SN->getPointerInfo().getWithOffset(Idx * 16),
11690 commonAlignment(Alignment, Idx * 16),
11691 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11692 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11693 DAG.getConstant(16, dl, BasePtr.getValueType()));
11694 Stores.push_back(Store);
11695 }
11696 SDValue TF = DAG.getTokenFactor(dl, Stores);
11697 return TF;
11698}
11699
11700SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11701 SDLoc dl(Op);
11702 if (Op.getValueType() == MVT::v4i32) {
11703 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11704
11705 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11706 // +16 as shift amt.
11707 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11708 SDValue RHSSwap = // = vrlw RHS, 16
11709 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11710
11711 // Shrinkify inputs to v8i16.
11712 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11713 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11714 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11715
11716 // Low parts multiplied together, generating 32-bit results (we ignore the
11717 // top parts).
11718 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11719 LHS, RHS, DAG, dl, MVT::v4i32);
11720
11721 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11722 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11723 // Shift the high parts up 16 bits.
11724 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11725 Neg16, DAG, dl);
11726 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11727 } else if (Op.getValueType() == MVT::v16i8) {
11728 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11729 bool isLittleEndian = Subtarget.isLittleEndian();
11730
11731 // Multiply the even 8-bit parts, producing 16-bit sums.
11732 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11733 LHS, RHS, DAG, dl, MVT::v8i16);
11734 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11735
11736 // Multiply the odd 8-bit parts, producing 16-bit sums.
11737 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11738 LHS, RHS, DAG, dl, MVT::v8i16);
11739 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11740
11741 // Merge the results together. Because vmuleub and vmuloub are
11742 // instructions with a big-endian bias, we must reverse the
11743 // element numbering and reverse the meaning of "odd" and "even"
11744 // when generating little endian code.
11745 int Ops[16];
11746 for (unsigned i = 0; i != 8; ++i) {
11747 if (isLittleEndian) {
11748 Ops[i*2 ] = 2*i;
11749 Ops[i*2+1] = 2*i+16;
11750 } else {
11751 Ops[i*2 ] = 2*i+1;
11752 Ops[i*2+1] = 2*i+1+16;
11753 }
11754 }
11755 if (isLittleEndian)
11756 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11757 else
11758 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11759 } else {
11760 llvm_unreachable("Unknown mul to lower!");
11761 }
11762}
11763
11764SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11765 bool IsStrict = Op->isStrictFPOpcode();
11766 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11767 !Subtarget.hasP9Vector())
11768 return SDValue();
11769
11770 return Op;
11771}
11772
11773// Custom lowering for fpext vf32 to v2f64
11774SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11775
11776 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11777 "Should only be called for ISD::FP_EXTEND");
11778
11779 // FIXME: handle extends from half precision float vectors on P9.
11780 // We only want to custom lower an extend from v2f32 to v2f64.
11781 if (Op.getValueType() != MVT::v2f64 ||
11782 Op.getOperand(0).getValueType() != MVT::v2f32)
11783 return SDValue();
11784
11785 SDLoc dl(Op);
11786 SDValue Op0 = Op.getOperand(0);
11787
11788 switch (Op0.getOpcode()) {
11789 default:
11790 return SDValue();
11792 assert(Op0.getNumOperands() == 2 &&
11793 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11794 "Node should have 2 operands with second one being a constant!");
11795
11796 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11797 return SDValue();
11798
11799 // Custom lower is only done for high or low doubleword.
11800 int Idx = Op0.getConstantOperandVal(1);
11801 if (Idx % 2 != 0)
11802 return SDValue();
11803
11804 // Since input is v4f32, at this point Idx is either 0 or 2.
11805 // Shift to get the doubleword position we want.
11806 int DWord = Idx >> 1;
11807
11808 // High and low word positions are different on little endian.
11809 if (Subtarget.isLittleEndian())
11810 DWord ^= 0x1;
11811
11812 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11813 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11814 }
11815 case ISD::FADD:
11816 case ISD::FMUL:
11817 case ISD::FSUB: {
11818 SDValue NewLoad[2];
11819 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11820 // Ensure both input are loads.
11821 SDValue LdOp = Op0.getOperand(i);
11822 if (LdOp.getOpcode() != ISD::LOAD)
11823 return SDValue();
11824 // Generate new load node.
11825 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11826 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11827 NewLoad[i] = DAG.getMemIntrinsicNode(
11828 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11829 LD->getMemoryVT(), LD->getMemOperand());
11830 }
11831 SDValue NewOp =
11832 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11833 NewLoad[1], Op0.getNode()->getFlags());
11834 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11835 DAG.getConstant(0, dl, MVT::i32));
11836 }
11837 case ISD::LOAD: {
11838 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11839 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11840 SDValue NewLd = DAG.getMemIntrinsicNode(
11841 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11842 LD->getMemoryVT(), LD->getMemOperand());
11843 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11844 DAG.getConstant(0, dl, MVT::i32));
11845 }
11846 }
11847 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11848}
11849
11850/// LowerOperation - Provide custom lowering hooks for some operations.
11851///
11853 switch (Op.getOpcode()) {
11854 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11855 case ISD::FPOW: return lowerPow(Op, DAG);
11856 case ISD::FSIN: return lowerSin(Op, DAG);
11857 case ISD::FCOS: return lowerCos(Op, DAG);
11858 case ISD::FLOG: return lowerLog(Op, DAG);
11859 case ISD::FLOG10: return lowerLog10(Op, DAG);
11860 case ISD::FEXP: return lowerExp(Op, DAG);
11861 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11862 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11863 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11864 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11865 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11866 case ISD::STRICT_FSETCC:
11868 case ISD::SETCC: return LowerSETCC(Op, DAG);
11869 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11870 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11871
11872 case ISD::INLINEASM:
11873 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11874 // Variable argument lowering.
11875 case ISD::VASTART: return LowerVASTART(Op, DAG);
11876 case ISD::VAARG: return LowerVAARG(Op, DAG);
11877 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11878
11879 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11880 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11882 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11883
11884 // Exception handling lowering.
11885 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11886 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11887 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11888
11889 case ISD::LOAD: return LowerLOAD(Op, DAG);
11890 case ISD::STORE: return LowerSTORE(Op, DAG);
11891 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11892 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11895 case ISD::FP_TO_UINT:
11896 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11899 case ISD::UINT_TO_FP:
11900 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11901 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11902
11903 // Lower 64-bit shifts.
11904 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11905 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11906 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11907
11908 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11909 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11910
11911 // Vector-related lowering.
11912 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11913 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11914 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11915 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11916 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11917 case ISD::MUL: return LowerMUL(Op, DAG);
11918 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11920 case ISD::FP_ROUND:
11921 return LowerFP_ROUND(Op, DAG);
11922 case ISD::ROTL: return LowerROTL(Op, DAG);
11923
11924 // For counter-based loop handling.
11925 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11926
11927 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11928
11929 // Frame & Return address.
11930 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11931 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11932
11934 return LowerINTRINSIC_VOID(Op, DAG);
11935 case ISD::BSWAP:
11936 return LowerBSWAP(Op, DAG);
11938 return LowerATOMIC_CMP_SWAP(Op, DAG);
11939 case ISD::ATOMIC_STORE:
11940 return LowerATOMIC_LOAD_STORE(Op, DAG);
11941 case ISD::IS_FPCLASS:
11942 return LowerIS_FPCLASS(Op, DAG);
11943 }
11944}
11945
11948 SelectionDAG &DAG) const {
11949 SDLoc dl(N);
11950 switch (N->getOpcode()) {
11951 default:
11952 llvm_unreachable("Do not know how to custom type legalize this operation!");
11953 case ISD::ATOMIC_LOAD: {
11954 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11955 Results.push_back(Res);
11956 Results.push_back(Res.getValue(1));
11957 break;
11958 }
11959 case ISD::READCYCLECOUNTER: {
11960 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11961 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11962
11963 Results.push_back(
11964 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11965 Results.push_back(RTB.getValue(2));
11966 break;
11967 }
11969 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11970 break;
11971
11972 assert(N->getValueType(0) == MVT::i1 &&
11973 "Unexpected result type for CTR decrement intrinsic");
11974 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11975 N->getValueType(0));
11976 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11977 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11978 N->getOperand(1));
11979
11980 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11981 Results.push_back(NewInt.getValue(1));
11982 break;
11983 }
11985 switch (N->getConstantOperandVal(0)) {
11986 case Intrinsic::ppc_pack_longdouble:
11987 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11988 N->getOperand(2), N->getOperand(1)));
11989 break;
11990 case Intrinsic::ppc_maxfe:
11991 case Intrinsic::ppc_minfe:
11992 case Intrinsic::ppc_fnmsub:
11993 case Intrinsic::ppc_convert_f128_to_ppcf128:
11994 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11995 break;
11996 }
11997 break;
11998 }
11999 case ISD::VAARG: {
12000 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12001 return;
12002
12003 EVT VT = N->getValueType(0);
12004
12005 if (VT == MVT::i64) {
12006 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12007
12008 Results.push_back(NewNode);
12009 Results.push_back(NewNode.getValue(1));
12010 }
12011 return;
12012 }
12015 case ISD::FP_TO_SINT:
12016 case ISD::FP_TO_UINT: {
12017 // LowerFP_TO_INT() can only handle f32 and f64.
12018 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12019 MVT::ppcf128)
12020 return;
12021 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12022 Results.push_back(LoweredValue);
12023 if (N->isStrictFPOpcode())
12024 Results.push_back(LoweredValue.getValue(1));
12025 return;
12026 }
12027 case ISD::TRUNCATE: {
12028 if (!N->getValueType(0).isVector())
12029 return;
12030 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12031 if (Lowered)
12032 Results.push_back(Lowered);
12033 return;
12034 }
12035 case ISD::FSHL:
12036 case ISD::FSHR:
12037 // Don't handle funnel shifts here.
12038 return;
12039 case ISD::BITCAST:
12040 // Don't handle bitcast here.
12041 return;
12042 case ISD::FP_EXTEND:
12043 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12044 if (Lowered)
12045 Results.push_back(Lowered);
12046 return;
12047 }
12048}
12049
12050//===----------------------------------------------------------------------===//
12051// Other Lowering Code
12052//===----------------------------------------------------------------------===//
12053
12055 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
12056 Function *Func = Intrinsic::getDeclaration(M, Id);
12057 return Builder.CreateCall(Func, {});
12058}
12059
12060// The mappings for emitLeading/TrailingFence is taken from
12061// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12063 Instruction *Inst,
12064 AtomicOrdering Ord) const {
12066 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12067 if (isReleaseOrStronger(Ord))
12068 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12069 return nullptr;
12070}
12071
12073 Instruction *Inst,
12074 AtomicOrdering Ord) const {
12075 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12076 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12077 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12078 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12079 if (isa<LoadInst>(Inst))
12080 return Builder.CreateCall(
12082 Builder.GetInsertBlock()->getParent()->getParent(),
12083 Intrinsic::ppc_cfence, {Inst->getType()}),
12084 {Inst});
12085 // FIXME: Can use isync for rmw operation.
12086 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12087 }
12088 return nullptr;
12089}
12090
12093 unsigned AtomicSize,
12094 unsigned BinOpcode,
12095 unsigned CmpOpcode,
12096 unsigned CmpPred) const {
12097 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12098 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12099
12100 auto LoadMnemonic = PPC::LDARX;
12101 auto StoreMnemonic = PPC::STDCX;
12102 switch (AtomicSize) {
12103 default:
12104 llvm_unreachable("Unexpected size of atomic entity");
12105 case 1:
12106 LoadMnemonic = PPC::LBARX;
12107 StoreMnemonic = PPC::STBCX;
12108 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12109 break;
12110 case 2:
12111 LoadMnemonic = PPC::LHARX;
12112 StoreMnemonic = PPC::STHCX;
12113 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12114 break;
12115 case 4:
12116 LoadMnemonic = PPC::LWARX;
12117 StoreMnemonic = PPC::STWCX;
12118 break;
12119 case 8:
12120 LoadMnemonic = PPC::LDARX;
12121 StoreMnemonic = PPC::STDCX;
12122 break;
12123 }
12124
12125 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12126 MachineFunction *F = BB->getParent();
12128
12129 Register dest = MI.getOperand(0).getReg();
12130 Register ptrA = MI.getOperand(1).getReg();
12131 Register ptrB = MI.getOperand(2).getReg();
12132 Register incr = MI.getOperand(3).getReg();
12133 DebugLoc dl = MI.getDebugLoc();
12134
12135 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12136 MachineBasicBlock *loop2MBB =
12137 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12138 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12139 F->insert(It, loopMBB);
12140 if (CmpOpcode)
12141 F->insert(It, loop2MBB);
12142 F->insert(It, exitMBB);
12143 exitMBB->splice(exitMBB->begin(), BB,
12144 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12146
12147 MachineRegisterInfo &RegInfo = F->getRegInfo();
12148 Register TmpReg = (!BinOpcode) ? incr :
12149 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12150 : &PPC::GPRCRegClass);
12151
12152 // thisMBB:
12153 // ...
12154 // fallthrough --> loopMBB
12155 BB->addSuccessor(loopMBB);
12156
12157 // loopMBB:
12158 // l[wd]arx dest, ptr
12159 // add r0, dest, incr
12160 // st[wd]cx. r0, ptr
12161 // bne- loopMBB
12162 // fallthrough --> exitMBB
12163
12164 // For max/min...
12165 // loopMBB:
12166 // l[wd]arx dest, ptr
12167 // cmpl?[wd] dest, incr
12168 // bgt exitMBB
12169 // loop2MBB:
12170 // st[wd]cx. dest, ptr
12171 // bne- loopMBB
12172 // fallthrough --> exitMBB
12173
12174 BB = loopMBB;
12175 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12176 .addReg(ptrA).addReg(ptrB);
12177 if (BinOpcode)
12178 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12179 if (CmpOpcode) {
12180 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12181 // Signed comparisons of byte or halfword values must be sign-extended.
12182 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12183 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12184 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12185 ExtReg).addReg(dest);
12186 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12187 } else
12188 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12189
12190 BuildMI(BB, dl, TII->get(PPC::BCC))
12191 .addImm(CmpPred)
12192 .addReg(CrReg)
12193 .addMBB(exitMBB);
12194 BB->addSuccessor(loop2MBB);
12195 BB->addSuccessor(exitMBB);
12196 BB = loop2MBB;
12197 }
12198 BuildMI(BB, dl, TII->get(StoreMnemonic))
12199 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12200 BuildMI(BB, dl, TII->get(PPC::BCC))
12201 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12202 BB->addSuccessor(loopMBB);
12203 BB->addSuccessor(exitMBB);
12204
12205 // exitMBB:
12206 // ...
12207 BB = exitMBB;
12208 return BB;
12209}
12210
12212 switch(MI.getOpcode()) {
12213 default:
12214 return false;
12215 case PPC::COPY:
12216 return TII->isSignExtended(MI.getOperand(1).getReg(),
12217 &MI.getMF()->getRegInfo());
12218 case PPC::LHA:
12219 case PPC::LHA8:
12220 case PPC::LHAU:
12221 case PPC::LHAU8:
12222 case PPC::LHAUX:
12223 case PPC::LHAUX8:
12224 case PPC::LHAX:
12225 case PPC::LHAX8:
12226 case PPC::LWA:
12227 case PPC::LWAUX:
12228 case PPC::LWAX:
12229 case PPC::LWAX_32:
12230 case PPC::LWA_32:
12231 case PPC::PLHA:
12232 case PPC::PLHA8:
12233 case PPC::PLHA8pc:
12234 case PPC::PLHApc:
12235 case PPC::PLWA:
12236 case PPC::PLWA8:
12237 case PPC::PLWA8pc:
12238 case PPC::PLWApc:
12239 case PPC::EXTSB:
12240 case PPC::EXTSB8:
12241 case PPC::EXTSB8_32_64:
12242 case PPC::EXTSB8_rec:
12243 case PPC::EXTSB_rec:
12244 case PPC::EXTSH:
12245 case PPC::EXTSH8:
12246 case PPC::EXTSH8_32_64:
12247 case PPC::EXTSH8_rec:
12248 case PPC::EXTSH_rec:
12249 case PPC::EXTSW:
12250 case PPC::EXTSWSLI:
12251 case PPC::EXTSWSLI_32_64:
12252 case PPC::EXTSWSLI_32_64_rec:
12253 case PPC::EXTSWSLI_rec:
12254 case PPC::EXTSW_32:
12255 case PPC::EXTSW_32_64:
12256 case PPC::EXTSW_32_64_rec:
12257 case PPC::EXTSW_rec:
12258 case PPC::SRAW:
12259 case PPC::SRAWI:
12260 case PPC::SRAWI_rec:
12261 case PPC::SRAW_rec:
12262 return true;
12263 }
12264 return false;
12265}
12266
12269 bool is8bit, // operation
12270 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12271 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12272 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12273
12274 // If this is a signed comparison and the value being compared is not known
12275 // to be sign extended, sign extend it here.
12276 DebugLoc dl = MI.getDebugLoc();
12277 MachineFunction *F = BB->getParent();
12278 MachineRegisterInfo &RegInfo = F->getRegInfo();
12279 Register incr = MI.getOperand(3).getReg();
12280 bool IsSignExtended =
12281 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12282
12283 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12284 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12285 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12286 .addReg(MI.getOperand(3).getReg());
12287 MI.getOperand(3).setReg(ValueReg);
12288 incr = ValueReg;
12289 }
12290 // If we support part-word atomic mnemonics, just use them
12291 if (Subtarget.hasPartwordAtomics())
12292 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12293 CmpPred);
12294
12295 // In 64 bit mode we have to use 64 bits for addresses, even though the
12296 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12297 // registers without caring whether they're 32 or 64, but here we're
12298 // doing actual arithmetic on the addresses.
12299 bool is64bit = Subtarget.isPPC64();
12300 bool isLittleEndian = Subtarget.isLittleEndian();
12301 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12302
12303 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12305
12306 Register dest = MI.getOperand(0).getReg();
12307 Register ptrA = MI.getOperand(1).getReg();
12308 Register ptrB = MI.getOperand(2).getReg();
12309
12310 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12311 MachineBasicBlock *loop2MBB =
12312 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12313 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12314 F->insert(It, loopMBB);
12315 if (CmpOpcode)
12316 F->insert(It, loop2MBB);
12317 F->insert(It, exitMBB);
12318 exitMBB->splice(exitMBB->begin(), BB,
12319 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12321
12322 const TargetRegisterClass *RC =
12323 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12324 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12325
12326 Register PtrReg = RegInfo.createVirtualRegister(RC);
12327 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12328 Register ShiftReg =
12329 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12330 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12331 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12332 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12333 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12334 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12335 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12336 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12337 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12338 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12339 Register Ptr1Reg;
12340 Register TmpReg =
12341 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12342
12343 // thisMBB:
12344 // ...
12345 // fallthrough --> loopMBB
12346 BB->addSuccessor(loopMBB);
12347
12348 // The 4-byte load must be aligned, while a char or short may be
12349 // anywhere in the word. Hence all this nasty bookkeeping code.
12350 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12351 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12352 // xori shift, shift1, 24 [16]
12353 // rlwinm ptr, ptr1, 0, 0, 29
12354 // slw incr2, incr, shift
12355 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12356 // slw mask, mask2, shift
12357 // loopMBB:
12358 // lwarx tmpDest, ptr
12359 // add tmp, tmpDest, incr2
12360 // andc tmp2, tmpDest, mask
12361 // and tmp3, tmp, mask
12362 // or tmp4, tmp3, tmp2
12363 // stwcx. tmp4, ptr
12364 // bne- loopMBB
12365 // fallthrough --> exitMBB
12366 // srw SrwDest, tmpDest, shift
12367 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12368 if (ptrA != ZeroReg) {
12369 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12370 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12371 .addReg(ptrA)
12372 .addReg(ptrB);
12373 } else {
12374 Ptr1Reg = ptrB;
12375 }
12376 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12377 // mode.
12378 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12379 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12380 .addImm(3)
12381 .addImm(27)
12382 .addImm(is8bit ? 28 : 27);
12383 if (!isLittleEndian)
12384 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12385 .addReg(Shift1Reg)
12386 .addImm(is8bit ? 24 : 16);
12387 if (is64bit)
12388 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12389 .addReg(Ptr1Reg)
12390 .addImm(0)
12391 .addImm(61);
12392 else
12393 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12394 .addReg(Ptr1Reg)
12395 .addImm(0)
12396 .addImm(0)
12397 .addImm(29);
12398 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12399 if (is8bit)
12400 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12401 else {
12402 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12403 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12404 .addReg(Mask3Reg)
12405 .addImm(65535);
12406 }
12407 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12408 .addReg(Mask2Reg)
12409 .addReg(ShiftReg);
12410
12411 BB = loopMBB;
12412 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12413 .addReg(ZeroReg)
12414 .addReg(PtrReg);
12415 if (BinOpcode)
12416 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12417 .addReg(Incr2Reg)
12418 .addReg(TmpDestReg);
12419 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12420 .addReg(TmpDestReg)
12421 .addReg(MaskReg);
12422 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12423 if (CmpOpcode) {
12424 // For unsigned comparisons, we can directly compare the shifted values.
12425 // For signed comparisons we shift and sign extend.
12426 Register SReg = RegInfo.createVirtualRegister(GPRC);
12427 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12428 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12429 .addReg(TmpDestReg)
12430 .addReg(MaskReg);
12431 unsigned ValueReg = SReg;
12432 unsigned CmpReg = Incr2Reg;
12433 if (CmpOpcode == PPC::CMPW) {
12434 ValueReg = RegInfo.createVirtualRegister(GPRC);
12435 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12436 .addReg(SReg)
12437 .addReg(ShiftReg);
12438 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12439 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12440 .addReg(ValueReg);
12441 ValueReg = ValueSReg;
12442 CmpReg = incr;
12443 }
12444 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12445 BuildMI(BB, dl, TII->get(PPC::BCC))
12446 .addImm(CmpPred)
12447 .addReg(CrReg)
12448 .addMBB(exitMBB);
12449 BB->addSuccessor(loop2MBB);
12450 BB->addSuccessor(exitMBB);
12451 BB = loop2MBB;
12452 }
12453 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12454 BuildMI(BB, dl, TII->get(PPC::STWCX))
12455 .addReg(Tmp4Reg)
12456 .addReg(ZeroReg)
12457 .addReg(PtrReg);
12458 BuildMI(BB, dl, TII->get(PPC::BCC))
12460 .addReg(PPC::CR0)
12461 .addMBB(loopMBB);
12462 BB->addSuccessor(loopMBB);
12463 BB->addSuccessor(exitMBB);
12464
12465 // exitMBB:
12466 // ...
12467 BB = exitMBB;
12468 // Since the shift amount is not a constant, we need to clear
12469 // the upper bits with a separate RLWINM.
12470 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12471 .addReg(SrwDestReg)
12472 .addImm(0)
12473 .addImm(is8bit ? 24 : 16)
12474 .addImm(31);
12475 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12476 .addReg(TmpDestReg)
12477 .addReg(ShiftReg);
12478 return BB;
12479}
12480
12483 MachineBasicBlock *MBB) const {
12484 DebugLoc DL = MI.getDebugLoc();
12485 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12486 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12487
12488 MachineFunction *MF = MBB->getParent();
12490
12491 const BasicBlock *BB = MBB->getBasicBlock();
12493
12494 Register DstReg = MI.getOperand(0).getReg();
12495 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12496 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12497 Register mainDstReg = MRI.createVirtualRegister(RC);
12498 Register restoreDstReg = MRI.createVirtualRegister(RC);
12499
12500 MVT PVT = getPointerTy(MF->getDataLayout());
12501 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12502 "Invalid Pointer Size!");
12503 // For v = setjmp(buf), we generate
12504 //
12505 // thisMBB:
12506 // SjLjSetup mainMBB
12507 // bl mainMBB
12508 // v_restore = 1
12509 // b sinkMBB
12510 //
12511 // mainMBB:
12512 // buf[LabelOffset] = LR
12513 // v_main = 0
12514 //
12515 // sinkMBB:
12516 // v = phi(main, restore)
12517 //
12518
12519 MachineBasicBlock *thisMBB = MBB;
12520 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12521 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12522 MF->insert(I, mainMBB);
12523 MF->insert(I, sinkMBB);
12524
12526
12527 // Transfer the remainder of BB and its successor edges to sinkMBB.
12528 sinkMBB->splice(sinkMBB->begin(), MBB,
12529 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12531
12532 // Note that the structure of the jmp_buf used here is not compatible
12533 // with that used by libc, and is not designed to be. Specifically, it
12534 // stores only those 'reserved' registers that LLVM does not otherwise
12535 // understand how to spill. Also, by convention, by the time this
12536 // intrinsic is called, Clang has already stored the frame address in the
12537 // first slot of the buffer and stack address in the third. Following the
12538 // X86 target code, we'll store the jump address in the second slot. We also
12539 // need to save the TOC pointer (R2) to handle jumps between shared
12540 // libraries, and that will be stored in the fourth slot. The thread
12541 // identifier (R13) is not affected.
12542
12543 // thisMBB:
12544 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12545 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12546 const int64_t BPOffset = 4 * PVT.getStoreSize();
12547
12548 // Prepare IP either in reg.
12549 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12550 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12551 Register BufReg = MI.getOperand(1).getReg();
12552
12553 if (Subtarget.is64BitELFABI()) {
12555 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12556 .addReg(PPC::X2)
12557 .addImm(TOCOffset)
12558 .addReg(BufReg)
12559 .cloneMemRefs(MI);
12560 }
12561
12562 // Naked functions never have a base pointer, and so we use r1. For all
12563 // other functions, this decision must be delayed until during PEI.
12564 unsigned BaseReg;
12565 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12566 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12567 else
12568 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12569
12570 MIB = BuildMI(*thisMBB, MI, DL,
12571 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12572 .addReg(BaseReg)
12573 .addImm(BPOffset)
12574 .addReg(BufReg)
12575 .cloneMemRefs(MI);
12576
12577 // Setup
12578 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12579 MIB.addRegMask(TRI->getNoPreservedMask());
12580
12581 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12582
12583 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12584 .addMBB(mainMBB);
12585 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12586
12587 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12588 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12589
12590 // mainMBB:
12591 // mainDstReg = 0
12592 MIB =
12593 BuildMI(mainMBB, DL,
12594 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12595
12596 // Store IP
12597 if (Subtarget.isPPC64()) {
12598 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12599 .addReg(LabelReg)
12600 .addImm(LabelOffset)
12601 .addReg(BufReg);
12602 } else {
12603 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12604 .addReg(LabelReg)
12605 .addImm(LabelOffset)
12606 .addReg(BufReg);
12607 }
12608 MIB.cloneMemRefs(MI);
12609
12610 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12611 mainMBB->addSuccessor(sinkMBB);
12612
12613 // sinkMBB:
12614 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12615 TII->get(PPC::PHI), DstReg)
12616 .addReg(mainDstReg).addMBB(mainMBB)
12617 .addReg(restoreDstReg).addMBB(thisMBB);
12618
12619 MI.eraseFromParent();
12620 return sinkMBB;
12621}
12622
12625 MachineBasicBlock *MBB) const {
12626 DebugLoc DL = MI.getDebugLoc();
12627 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12628
12629 MachineFunction *MF = MBB->getParent();
12631
12632 MVT PVT = getPointerTy(MF->getDataLayout());
12633 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12634 "Invalid Pointer Size!");
12635
12636 const TargetRegisterClass *RC =
12637 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12638 Register Tmp = MRI.createVirtualRegister(RC);
12639 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12640 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12641 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12642 unsigned BP =
12643 (PVT == MVT::i64)
12644 ? PPC::X30
12645 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12646 : PPC::R30);
12647
12649
12650 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12651 const int64_t SPOffset = 2 * PVT.getStoreSize();
12652 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12653 const int64_t BPOffset = 4 * PVT.getStoreSize();
12654
12655 Register BufReg = MI.getOperand(0).getReg();
12656
12657 // Reload FP (the jumped-to function may not have had a
12658 // frame pointer, and if so, then its r31 will be restored
12659 // as necessary).
12660 if (PVT == MVT::i64) {
12661 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12662 .addImm(0)
12663 .addReg(BufReg);
12664 } else {
12665 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12666 .addImm(0)
12667 .addReg(BufReg);
12668 }
12669 MIB.cloneMemRefs(MI);
12670
12671 // Reload IP
12672 if (PVT == MVT::i64) {
12673 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12674 .addImm(LabelOffset)
12675 .addReg(BufReg);
12676 } else {
12677 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12678 .addImm(LabelOffset)
12679 .addReg(BufReg);
12680 }
12681 MIB.cloneMemRefs(MI);
12682
12683 // Reload SP
12684 if (PVT == MVT::i64) {
12685 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12686 .addImm(SPOffset)
12687 .addReg(BufReg);
12688 } else {
12689 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12690 .addImm(SPOffset)
12691 .addReg(BufReg);
12692 }
12693 MIB.cloneMemRefs(MI);
12694
12695 // Reload BP
12696 if (PVT == MVT::i64) {
12697 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12698 .addImm(BPOffset)
12699 .addReg(BufReg);
12700 } else {
12701 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12702 .addImm(BPOffset)
12703 .addReg(BufReg);
12704 }
12705 MIB.cloneMemRefs(MI);
12706
12707 // Reload TOC
12708 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12710 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12711 .addImm(TOCOffset)
12712 .addReg(BufReg)
12713 .cloneMemRefs(MI);
12714 }
12715
12716 // Jump
12717 BuildMI(*MBB, MI, DL,
12718 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12719 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12720
12721 MI.eraseFromParent();
12722 return MBB;
12723}
12724
12726 // If the function specifically requests inline stack probes, emit them.
12727 if (MF.getFunction().hasFnAttribute("probe-stack"))
12728 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12729 "inline-asm";
12730 return false;
12731}
12732
12734 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12735 unsigned StackAlign = TFI->getStackAlignment();
12736 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12737 "Unexpected stack alignment");
12738 // The default stack probe size is 4096 if the function has no
12739 // stack-probe-size attribute.
12740 const Function &Fn = MF.getFunction();
12741 unsigned StackProbeSize =
12742 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12743 // Round down to the stack alignment.
12744 StackProbeSize &= ~(StackAlign - 1);
12745 return StackProbeSize ? StackProbeSize : StackAlign;
12746}
12747
12748// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12749// into three phases. In the first phase, it uses pseudo instruction
12750// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12751// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12752// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12753// MaxCallFrameSize so that it can calculate correct data area pointer.
12756 MachineBasicBlock *MBB) const {
12757 const bool isPPC64 = Subtarget.isPPC64();
12758 MachineFunction *MF = MBB->getParent();
12759 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12760 DebugLoc DL = MI.getDebugLoc();
12761 const unsigned ProbeSize = getStackProbeSize(*MF);
12762 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12764 // The CFG of probing stack looks as
12765 // +-----+
12766 // | MBB |
12767 // +--+--+
12768 // |
12769 // +----v----+
12770 // +--->+ TestMBB +---+
12771 // | +----+----+ |
12772 // | | |
12773 // | +-----v----+ |
12774 // +---+ BlockMBB | |
12775 // +----------+ |
12776 // |
12777 // +---------+ |
12778 // | TailMBB +<--+
12779 // +---------+
12780 // In MBB, calculate previous frame pointer and final stack pointer.
12781 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12782 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12783 // TailMBB is spliced via \p MI.
12784 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12785 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12786 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12787
12789 MF->insert(MBBIter, TestMBB);
12790 MF->insert(MBBIter, BlockMBB);
12791 MF->insert(MBBIter, TailMBB);
12792
12793 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12794 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12795
12796 Register DstReg = MI.getOperand(0).getReg();
12797 Register NegSizeReg = MI.getOperand(1).getReg();
12798 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12799 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12800 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12801 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12802
12803 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12804 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12805 // NegSize.
12806 unsigned ProbeOpc;
12807 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12808 ProbeOpc =
12809 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12810 else
12811 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12812 // and NegSizeReg will be allocated in the same phyreg to avoid
12813 // redundant copy when NegSizeReg has only one use which is current MI and
12814 // will be replaced by PREPARE_PROBED_ALLOCA then.
12815 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12816 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12817 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12818 .addDef(ActualNegSizeReg)
12819 .addReg(NegSizeReg)
12820 .add(MI.getOperand(2))
12821 .add(MI.getOperand(3));
12822
12823 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12824 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12825 FinalStackPtr)
12826 .addReg(SPReg)
12827 .addReg(ActualNegSizeReg);
12828
12829 // Materialize a scratch register for update.
12830 int64_t NegProbeSize = -(int64_t)ProbeSize;
12831 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12832 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12833 if (!isInt<16>(NegProbeSize)) {
12834 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12835 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12836 .addImm(NegProbeSize >> 16);
12837 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12838 ScratchReg)
12839 .addReg(TempReg)
12840 .addImm(NegProbeSize & 0xFFFF);
12841 } else
12842 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12843 .addImm(NegProbeSize);
12844
12845 {
12846 // Probing leading residual part.
12847 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12848 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12849 .addReg(ActualNegSizeReg)
12850 .addReg(ScratchReg);
12851 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12852 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12853 .addReg(Div)
12854 .addReg(ScratchReg);
12855 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12856 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12857 .addReg(Mul)
12858 .addReg(ActualNegSizeReg);
12859 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12860 .addReg(FramePointer)
12861 .addReg(SPReg)
12862 .addReg(NegMod);
12863 }
12864
12865 {
12866 // Remaining part should be multiple of ProbeSize.
12867 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12868 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12869 .addReg(SPReg)
12870 .addReg(FinalStackPtr);
12871 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12873 .addReg(CmpResult)
12874 .addMBB(TailMBB);
12875 TestMBB->addSuccessor(BlockMBB);
12876 TestMBB->addSuccessor(TailMBB);
12877 }
12878
12879 {
12880 // Touch the block.
12881 // |P...|P...|P...
12882 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12883 .addReg(FramePointer)
12884 .addReg(SPReg)
12885 .addReg(ScratchReg);
12886 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12887 BlockMBB->addSuccessor(TestMBB);
12888 }
12889
12890 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12891 // DYNAREAOFFSET pseudo instruction to get the future result.
12892 Register MaxCallFrameSizeReg =
12893 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12894 BuildMI(TailMBB, DL,
12895 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12896 MaxCallFrameSizeReg)
12897 .add(MI.getOperand(2))
12898 .add(MI.getOperand(3));
12899 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12900 .addReg(SPReg)
12901 .addReg(MaxCallFrameSizeReg);
12902
12903 // Splice instructions after MI to TailMBB.
12904 TailMBB->splice(TailMBB->end(), MBB,
12905 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12907 MBB->addSuccessor(TestMBB);
12908
12909 // Delete the pseudo instruction.
12910 MI.eraseFromParent();
12911
12912 ++NumDynamicAllocaProbed;
12913 return TailMBB;
12914}
12915
12917 switch (MI.getOpcode()) {
12918 case PPC::SELECT_CC_I4:
12919 case PPC::SELECT_CC_I8:
12920 case PPC::SELECT_CC_F4:
12921 case PPC::SELECT_CC_F8:
12922 case PPC::SELECT_CC_F16:
12923 case PPC::SELECT_CC_VRRC:
12924 case PPC::SELECT_CC_VSFRC:
12925 case PPC::SELECT_CC_VSSRC:
12926 case PPC::SELECT_CC_VSRC:
12927 case PPC::SELECT_CC_SPE4:
12928 case PPC::SELECT_CC_SPE:
12929 return true;
12930 default:
12931 return false;
12932 }
12933}
12934
12935static bool IsSelect(MachineInstr &MI) {
12936 switch (MI.getOpcode()) {
12937 case PPC::SELECT_I4:
12938 case PPC::SELECT_I8:
12939 case PPC::SELECT_F4:
12940 case PPC::SELECT_F8:
12941 case PPC::SELECT_F16:
12942 case PPC::SELECT_SPE:
12943 case PPC::SELECT_SPE4:
12944 case PPC::SELECT_VRRC:
12945 case PPC::SELECT_VSFRC:
12946 case PPC::SELECT_VSSRC:
12947 case PPC::SELECT_VSRC:
12948 return true;
12949 default:
12950 return false;
12951 }
12952}
12953
12956 MachineBasicBlock *BB) const {
12957 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12958 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12959 if (Subtarget.is64BitELFABI() &&
12960 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12961 !Subtarget.isUsingPCRelativeCalls()) {
12962 // Call lowering should have added an r2 operand to indicate a dependence
12963 // on the TOC base pointer value. It can't however, because there is no
12964 // way to mark the dependence as implicit there, and so the stackmap code
12965 // will confuse it with a regular operand. Instead, add the dependence
12966 // here.
12967 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12968 }
12969
12970 return emitPatchPoint(MI, BB);
12971 }
12972
12973 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12974 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12975 return emitEHSjLjSetJmp(MI, BB);
12976 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12977 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12978 return emitEHSjLjLongJmp(MI, BB);
12979 }
12980
12981 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12982
12983 // To "insert" these instructions we actually have to insert their
12984 // control-flow patterns.
12985 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12987
12988 MachineFunction *F = BB->getParent();
12989 MachineRegisterInfo &MRI = F->getRegInfo();
12990
12991 if (Subtarget.hasISEL() &&
12992 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12993 MI.getOpcode() == PPC::SELECT_CC_I8 ||
12994 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12996 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12997 MI.getOpcode() == PPC::SELECT_CC_I8)
12998 Cond.push_back(MI.getOperand(4));
12999 else
13001 Cond.push_back(MI.getOperand(1));
13002
13003 DebugLoc dl = MI.getDebugLoc();
13004 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13005 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13006 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13007 // The incoming instruction knows the destination vreg to set, the
13008 // condition code register to branch on, the true/false values to
13009 // select between, and a branch opcode to use.
13010
13011 // thisMBB:
13012 // ...
13013 // TrueVal = ...
13014 // cmpTY ccX, r1, r2
13015 // bCC sinkMBB
13016 // fallthrough --> copy0MBB
13017 MachineBasicBlock *thisMBB = BB;
13018 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13019 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13020 DebugLoc dl = MI.getDebugLoc();
13021 F->insert(It, copy0MBB);
13022 F->insert(It, sinkMBB);
13023
13024 // Set the call frame size on entry to the new basic blocks.
13025 // See https://reviews.llvm.org/D156113.
13026 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13027 copy0MBB->setCallFrameSize(CallFrameSize);
13028 sinkMBB->setCallFrameSize(CallFrameSize);
13029
13030 // Transfer the remainder of BB and its successor edges to sinkMBB.
13031 sinkMBB->splice(sinkMBB->begin(), BB,
13032 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13034
13035 // Next, add the true and fallthrough blocks as its successors.
13036 BB->addSuccessor(copy0MBB);
13037 BB->addSuccessor(sinkMBB);
13038
13039 if (IsSelect(MI)) {
13040 BuildMI(BB, dl, TII->get(PPC::BC))
13041 .addReg(MI.getOperand(1).getReg())
13042 .addMBB(sinkMBB);
13043 } else {
13044 unsigned SelectPred = MI.getOperand(4).getImm();
13045 BuildMI(BB, dl, TII->get(PPC::BCC))
13046 .addImm(SelectPred)
13047 .addReg(MI.getOperand(1).getReg())
13048 .addMBB(sinkMBB);
13049 }
13050
13051 // copy0MBB:
13052 // %FalseValue = ...
13053 // # fallthrough to sinkMBB
13054 BB = copy0MBB;
13055
13056 // Update machine-CFG edges
13057 BB->addSuccessor(sinkMBB);
13058
13059 // sinkMBB:
13060 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13061 // ...
13062 BB = sinkMBB;
13063 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13064 .addReg(MI.getOperand(3).getReg())
13065 .addMBB(copy0MBB)
13066 .addReg(MI.getOperand(2).getReg())
13067 .addMBB(thisMBB);
13068 } else if (MI.getOpcode() == PPC::ReadTB) {
13069 // To read the 64-bit time-base register on a 32-bit target, we read the
13070 // two halves. Should the counter have wrapped while it was being read, we
13071 // need to try again.
13072 // ...
13073 // readLoop:
13074 // mfspr Rx,TBU # load from TBU
13075 // mfspr Ry,TB # load from TB
13076 // mfspr Rz,TBU # load from TBU
13077 // cmpw crX,Rx,Rz # check if 'old'='new'
13078 // bne readLoop # branch if they're not equal
13079 // ...
13080
13081 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13082 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13083 DebugLoc dl = MI.getDebugLoc();
13084 F->insert(It, readMBB);
13085 F->insert(It, sinkMBB);
13086
13087 // Transfer the remainder of BB and its successor edges to sinkMBB.
13088 sinkMBB->splice(sinkMBB->begin(), BB,
13089 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13091
13092 BB->addSuccessor(readMBB);
13093 BB = readMBB;
13094
13095 MachineRegisterInfo &RegInfo = F->getRegInfo();
13096 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13097 Register LoReg = MI.getOperand(0).getReg();
13098 Register HiReg = MI.getOperand(1).getReg();
13099
13100 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13101 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13102 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13103
13104 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13105
13106 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13107 .addReg(HiReg)
13108 .addReg(ReadAgainReg);
13109 BuildMI(BB, dl, TII->get(PPC::BCC))
13111 .addReg(CmpReg)
13112 .addMBB(readMBB);
13113
13114 BB->addSuccessor(readMBB);
13115 BB->addSuccessor(sinkMBB);
13116 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13117 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13118 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13119 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13120 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13121 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13122 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13123 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13124
13125 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13126 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13127 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13128 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13129 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13130 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13131 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13132 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13133
13134 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13135 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13136 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13137 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13138 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13139 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13140 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13141 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13142
13143 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13144 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13145 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13146 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13147 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13148 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13149 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13150 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13151
13152 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13153 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13154 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13155 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13156 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13157 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13158 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13159 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13160
13161 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13162 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13163 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13164 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13165 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13166 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13167 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13168 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13169
13170 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13171 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13172 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13173 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13174 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13175 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13176 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13177 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13178
13179 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13180 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13181 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13182 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13183 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13184 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13185 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13186 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13187
13188 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13189 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13190 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13191 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13192 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13193 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13194 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13195 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13196
13197 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13198 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13199 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13200 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13201 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13202 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13203 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13204 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13205
13206 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13207 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13208 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13209 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13210 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13211 BB = EmitAtomicBinary(MI, BB, 4, 0);
13212 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13213 BB = EmitAtomicBinary(MI, BB, 8, 0);
13214 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13215 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13216 (Subtarget.hasPartwordAtomics() &&
13217 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13218 (Subtarget.hasPartwordAtomics() &&
13219 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13220 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13221
13222 auto LoadMnemonic = PPC::LDARX;
13223 auto StoreMnemonic = PPC::STDCX;
13224 switch (MI.getOpcode()) {
13225 default:
13226 llvm_unreachable("Compare and swap of unknown size");
13227 case PPC::ATOMIC_CMP_SWAP_I8:
13228 LoadMnemonic = PPC::LBARX;
13229 StoreMnemonic = PPC::STBCX;
13230 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13231 break;
13232 case PPC::ATOMIC_CMP_SWAP_I16:
13233 LoadMnemonic = PPC::LHARX;
13234 StoreMnemonic = PPC::STHCX;
13235 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13236 break;
13237 case PPC::ATOMIC_CMP_SWAP_I32:
13238 LoadMnemonic = PPC::LWARX;
13239 StoreMnemonic = PPC::STWCX;
13240 break;
13241 case PPC::ATOMIC_CMP_SWAP_I64:
13242 LoadMnemonic = PPC::LDARX;
13243 StoreMnemonic = PPC::STDCX;
13244 break;
13245 }
13246 MachineRegisterInfo &RegInfo = F->getRegInfo();
13247 Register dest = MI.getOperand(0).getReg();
13248 Register ptrA = MI.getOperand(1).getReg();
13249 Register ptrB = MI.getOperand(2).getReg();
13250 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13251 Register oldval = MI.getOperand(3).getReg();
13252 Register newval = MI.getOperand(4).getReg();
13253 DebugLoc dl = MI.getDebugLoc();
13254
13255 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13256 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13257 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13258 F->insert(It, loop1MBB);
13259 F->insert(It, loop2MBB);
13260 F->insert(It, exitMBB);
13261 exitMBB->splice(exitMBB->begin(), BB,
13262 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13264
13265 // thisMBB:
13266 // ...
13267 // fallthrough --> loopMBB
13268 BB->addSuccessor(loop1MBB);
13269
13270 // loop1MBB:
13271 // l[bhwd]arx dest, ptr
13272 // cmp[wd] dest, oldval
13273 // bne- exitBB
13274 // loop2MBB:
13275 // st[bhwd]cx. newval, ptr
13276 // bne- loopMBB
13277 // b exitBB
13278 // exitBB:
13279 BB = loop1MBB;
13280 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13281 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13282 .addReg(dest)
13283 .addReg(oldval);
13284 BuildMI(BB, dl, TII->get(PPC::BCC))
13286 .addReg(CrReg)
13287 .addMBB(exitMBB);
13288 BB->addSuccessor(loop2MBB);
13289 BB->addSuccessor(exitMBB);
13290
13291 BB = loop2MBB;
13292 BuildMI(BB, dl, TII->get(StoreMnemonic))
13293 .addReg(newval)
13294 .addReg(ptrA)
13295 .addReg(ptrB);
13296 BuildMI(BB, dl, TII->get(PPC::BCC))
13298 .addReg(PPC::CR0)
13299 .addMBB(loop1MBB);
13300 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13301 BB->addSuccessor(loop1MBB);
13302 BB->addSuccessor(exitMBB);
13303
13304 // exitMBB:
13305 // ...
13306 BB = exitMBB;
13307 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13308 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13309 // We must use 64-bit registers for addresses when targeting 64-bit,
13310 // since we're actually doing arithmetic on them. Other registers
13311 // can be 32-bit.
13312 bool is64bit = Subtarget.isPPC64();
13313 bool isLittleEndian = Subtarget.isLittleEndian();
13314 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13315
13316 Register dest = MI.getOperand(0).getReg();
13317 Register ptrA = MI.getOperand(1).getReg();
13318 Register ptrB = MI.getOperand(2).getReg();
13319 Register oldval = MI.getOperand(3).getReg();
13320 Register newval = MI.getOperand(4).getReg();
13321 DebugLoc dl = MI.getDebugLoc();
13322
13323 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13324 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13325 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13326 F->insert(It, loop1MBB);
13327 F->insert(It, loop2MBB);
13328 F->insert(It, exitMBB);
13329 exitMBB->splice(exitMBB->begin(), BB,
13330 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13332
13333 MachineRegisterInfo &RegInfo = F->getRegInfo();
13334 const TargetRegisterClass *RC =
13335 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13336 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13337
13338 Register PtrReg = RegInfo.createVirtualRegister(RC);
13339 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13340 Register ShiftReg =
13341 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13342 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13343 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13344 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13345 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13346 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13347 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13348 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13349 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13350 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13351 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13352 Register Ptr1Reg;
13353 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13354 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13355 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13356 // thisMBB:
13357 // ...
13358 // fallthrough --> loopMBB
13359 BB->addSuccessor(loop1MBB);
13360
13361 // The 4-byte load must be aligned, while a char or short may be
13362 // anywhere in the word. Hence all this nasty bookkeeping code.
13363 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13364 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13365 // xori shift, shift1, 24 [16]
13366 // rlwinm ptr, ptr1, 0, 0, 29
13367 // slw newval2, newval, shift
13368 // slw oldval2, oldval,shift
13369 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13370 // slw mask, mask2, shift
13371 // and newval3, newval2, mask
13372 // and oldval3, oldval2, mask
13373 // loop1MBB:
13374 // lwarx tmpDest, ptr
13375 // and tmp, tmpDest, mask
13376 // cmpw tmp, oldval3
13377 // bne- exitBB
13378 // loop2MBB:
13379 // andc tmp2, tmpDest, mask
13380 // or tmp4, tmp2, newval3
13381 // stwcx. tmp4, ptr
13382 // bne- loop1MBB
13383 // b exitBB
13384 // exitBB:
13385 // srw dest, tmpDest, shift
13386 if (ptrA != ZeroReg) {
13387 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13388 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13389 .addReg(ptrA)
13390 .addReg(ptrB);
13391 } else {
13392 Ptr1Reg = ptrB;
13393 }
13394
13395 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13396 // mode.
13397 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13398 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13399 .addImm(3)
13400 .addImm(27)
13401 .addImm(is8bit ? 28 : 27);
13402 if (!isLittleEndian)
13403 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13404 .addReg(Shift1Reg)
13405 .addImm(is8bit ? 24 : 16);
13406 if (is64bit)
13407 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13408 .addReg(Ptr1Reg)
13409 .addImm(0)
13410 .addImm(61);
13411 else
13412 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13413 .addReg(Ptr1Reg)
13414 .addImm(0)
13415 .addImm(0)
13416 .addImm(29);
13417 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13418 .addReg(newval)
13419 .addReg(ShiftReg);
13420 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13421 .addReg(oldval)
13422 .addReg(ShiftReg);
13423 if (is8bit)
13424 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13425 else {
13426 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13427 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13428 .addReg(Mask3Reg)
13429 .addImm(65535);
13430 }
13431 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13432 .addReg(Mask2Reg)
13433 .addReg(ShiftReg);
13434 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13435 .addReg(NewVal2Reg)
13436 .addReg(MaskReg);
13437 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13438 .addReg(OldVal2Reg)
13439 .addReg(MaskReg);
13440
13441 BB = loop1MBB;
13442 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13443 .addReg(ZeroReg)
13444 .addReg(PtrReg);
13445 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13446 .addReg(TmpDestReg)
13447 .addReg(MaskReg);
13448 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13449 .addReg(TmpReg)
13450 .addReg(OldVal3Reg);
13451 BuildMI(BB, dl, TII->get(PPC::BCC))
13453 .addReg(CrReg)
13454 .addMBB(exitMBB);
13455 BB->addSuccessor(loop2MBB);
13456 BB->addSuccessor(exitMBB);
13457
13458 BB = loop2MBB;
13459 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13460 .addReg(TmpDestReg)
13461 .addReg(MaskReg);
13462 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13463 .addReg(Tmp2Reg)
13464 .addReg(NewVal3Reg);
13465 BuildMI(BB, dl, TII->get(PPC::STWCX))
13466 .addReg(Tmp4Reg)
13467 .addReg(ZeroReg)
13468 .addReg(PtrReg);
13469 BuildMI(BB, dl, TII->get(PPC::BCC))
13471 .addReg(PPC::CR0)
13472 .addMBB(loop1MBB);
13473 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13474 BB->addSuccessor(loop1MBB);
13475 BB->addSuccessor(exitMBB);
13476
13477 // exitMBB:
13478 // ...
13479 BB = exitMBB;
13480 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13481 .addReg(TmpReg)
13482 .addReg(ShiftReg);
13483 } else if (MI.getOpcode() == PPC::FADDrtz) {
13484 // This pseudo performs an FADD with rounding mode temporarily forced
13485 // to round-to-zero. We emit this via custom inserter since the FPSCR
13486 // is not modeled at the SelectionDAG level.
13487 Register Dest = MI.getOperand(0).getReg();
13488 Register Src1 = MI.getOperand(1).getReg();
13489 Register Src2 = MI.getOperand(2).getReg();
13490 DebugLoc dl = MI.getDebugLoc();
13491
13492 MachineRegisterInfo &RegInfo = F->getRegInfo();
13493 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13494
13495 // Save FPSCR value.
13496 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13497
13498 // Set rounding mode to round-to-zero.
13499 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13500 .addImm(31)
13502
13503 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13504 .addImm(30)
13506
13507 // Perform addition.
13508 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13509 .addReg(Src1)
13510 .addReg(Src2);
13511 if (MI.getFlag(MachineInstr::NoFPExcept))
13513
13514 // Restore FPSCR value.
13515 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13516 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13517 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13518 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13519 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13520 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13521 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13522 ? PPC::ANDI8_rec
13523 : PPC::ANDI_rec;
13524 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13525 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13526
13527 MachineRegisterInfo &RegInfo = F->getRegInfo();
13528 Register Dest = RegInfo.createVirtualRegister(
13529 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13530
13531 DebugLoc Dl = MI.getDebugLoc();
13532 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13533 .addReg(MI.getOperand(1).getReg())
13534 .addImm(1);
13535 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13536 MI.getOperand(0).getReg())
13537 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13538 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13539 DebugLoc Dl = MI.getDebugLoc();
13540 MachineRegisterInfo &RegInfo = F->getRegInfo();
13541 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13542 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13543 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13544 MI.getOperand(0).getReg())
13545 .addReg(CRReg);
13546 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13547 DebugLoc Dl = MI.getDebugLoc();
13548 unsigned Imm = MI.getOperand(1).getImm();
13549 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13550 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13551 MI.getOperand(0).getReg())
13552 .addReg(PPC::CR0EQ);
13553 } else if (MI.getOpcode() == PPC::SETRNDi) {
13554 DebugLoc dl = MI.getDebugLoc();
13555 Register OldFPSCRReg = MI.getOperand(0).getReg();
13556
13557 // Save FPSCR value.
13558 if (MRI.use_empty(OldFPSCRReg))
13559 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13560 else
13561 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13562
13563 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13564 // the following settings:
13565 // 00 Round to nearest
13566 // 01 Round to 0
13567 // 10 Round to +inf
13568 // 11 Round to -inf
13569
13570 // When the operand is immediate, using the two least significant bits of
13571 // the immediate to set the bits 62:63 of FPSCR.
13572 unsigned Mode = MI.getOperand(1).getImm();
13573 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13574 .addImm(31)
13576
13577 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13578 .addImm(30)
13580 } else if (MI.getOpcode() == PPC::SETRND) {
13581 DebugLoc dl = MI.getDebugLoc();
13582
13583 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13584 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13585 // If the target doesn't have DirectMove, we should use stack to do the
13586 // conversion, because the target doesn't have the instructions like mtvsrd
13587 // or mfvsrd to do this conversion directly.
13588 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13589 if (Subtarget.hasDirectMove()) {
13590 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13591 .addReg(SrcReg);
13592 } else {
13593 // Use stack to do the register copy.
13594 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13595 MachineRegisterInfo &RegInfo = F->getRegInfo();
13596 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13597 if (RC == &PPC::F8RCRegClass) {
13598 // Copy register from F8RCRegClass to G8RCRegclass.
13599 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13600 "Unsupported RegClass.");
13601
13602 StoreOp = PPC::STFD;
13603 LoadOp = PPC::LD;
13604 } else {
13605 // Copy register from G8RCRegClass to F8RCRegclass.
13606 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13607 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13608 "Unsupported RegClass.");
13609 }
13610
13611 MachineFrameInfo &MFI = F->getFrameInfo();
13612 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13613
13614 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13615 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13617 MFI.getObjectAlign(FrameIdx));
13618
13619 // Store the SrcReg into the stack.
13620 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13621 .addReg(SrcReg)
13622 .addImm(0)
13623 .addFrameIndex(FrameIdx)
13624 .addMemOperand(MMOStore);
13625
13626 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13627 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13629 MFI.getObjectAlign(FrameIdx));
13630
13631 // Load from the stack where SrcReg is stored, and save to DestReg,
13632 // so we have done the RegClass conversion from RegClass::SrcReg to
13633 // RegClass::DestReg.
13634 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13635 .addImm(0)
13636 .addFrameIndex(FrameIdx)
13637 .addMemOperand(MMOLoad);
13638 }
13639 };
13640
13641 Register OldFPSCRReg = MI.getOperand(0).getReg();
13642
13643 // Save FPSCR value.
13644 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13645
13646 // When the operand is gprc register, use two least significant bits of the
13647 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13648 //
13649 // copy OldFPSCRTmpReg, OldFPSCRReg
13650 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13651 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13652 // copy NewFPSCRReg, NewFPSCRTmpReg
13653 // mtfsf 255, NewFPSCRReg
13654 MachineOperand SrcOp = MI.getOperand(1);
13655 MachineRegisterInfo &RegInfo = F->getRegInfo();
13656 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13657
13658 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13659
13660 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13661 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13662
13663 // The first operand of INSERT_SUBREG should be a register which has
13664 // subregisters, we only care about its RegClass, so we should use an
13665 // IMPLICIT_DEF register.
13666 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13667 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13668 .addReg(ImDefReg)
13669 .add(SrcOp)
13670 .addImm(1);
13671
13672 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13673 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13674 .addReg(OldFPSCRTmpReg)
13675 .addReg(ExtSrcReg)
13676 .addImm(0)
13677 .addImm(62);
13678
13679 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13680 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13681
13682 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13683 // bits of FPSCR.
13684 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13685 .addImm(255)
13686 .addReg(NewFPSCRReg)
13687 .addImm(0)
13688 .addImm(0);
13689 } else if (MI.getOpcode() == PPC::SETFLM) {
13690 DebugLoc Dl = MI.getDebugLoc();
13691
13692 // Result of setflm is previous FPSCR content, so we need to save it first.
13693 Register OldFPSCRReg = MI.getOperand(0).getReg();
13694 if (MRI.use_empty(OldFPSCRReg))
13695 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13696 else
13697 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13698
13699 // Put bits in 32:63 to FPSCR.
13700 Register NewFPSCRReg = MI.getOperand(1).getReg();
13701 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13702 .addImm(255)
13703 .addReg(NewFPSCRReg)
13704 .addImm(0)
13705 .addImm(0);
13706 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13707 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13708 return emitProbedAlloca(MI, BB);
13709 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13710 DebugLoc DL = MI.getDebugLoc();
13711 Register Src = MI.getOperand(2).getReg();
13712 Register Lo = MI.getOperand(0).getReg();
13713 Register Hi = MI.getOperand(1).getReg();
13714 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13715 .addDef(Lo)
13716 .addUse(Src, 0, PPC::sub_gp8_x1);
13717 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13718 .addDef(Hi)
13719 .addUse(Src, 0, PPC::sub_gp8_x0);
13720 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13721 MI.getOpcode() == PPC::STQX_PSEUDO) {
13722 DebugLoc DL = MI.getDebugLoc();
13723 // Ptr is used as the ptr_rc_no_r0 part
13724 // of LQ/STQ's memory operand and adding result of RA and RB,
13725 // so it has to be g8rc_and_g8rc_nox0.
13726 Register Ptr =
13727 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13728 Register Val = MI.getOperand(0).getReg();
13729 Register RA = MI.getOperand(1).getReg();
13730 Register RB = MI.getOperand(2).getReg();
13731 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13732 BuildMI(*BB, MI, DL,
13733 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13734 : TII->get(PPC::STQ))
13735 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13736 .addImm(0)
13737 .addReg(Ptr);
13738 } else {
13739 llvm_unreachable("Unexpected instr type to insert");
13740 }
13741
13742 MI.eraseFromParent(); // The pseudo instruction is gone now.
13743 return BB;
13744}
13745
13746//===----------------------------------------------------------------------===//
13747// Target Optimization Hooks
13748//===----------------------------------------------------------------------===//
13749
13750static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13751 // For the estimates, convergence is quadratic, so we essentially double the
13752 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13753 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13754 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13755 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13756 if (VT.getScalarType() == MVT::f64)
13757 RefinementSteps++;
13758 return RefinementSteps;
13759}
13760
13761SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13762 const DenormalMode &Mode) const {
13763 // We only have VSX Vector Test for software Square Root.
13764 EVT VT = Op.getValueType();
13765 if (!isTypeLegal(MVT::i1) ||
13766 (VT != MVT::f64 &&
13767 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13768 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13769
13770 SDLoc DL(Op);
13771 // The output register of FTSQRT is CR field.
13772 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13773 // ftsqrt BF,FRB
13774 // Let e_b be the unbiased exponent of the double-precision
13775 // floating-point operand in register FRB.
13776 // fe_flag is set to 1 if either of the following conditions occurs.
13777 // - The double-precision floating-point operand in register FRB is a zero,
13778 // a NaN, or an infinity, or a negative value.
13779 // - e_b is less than or equal to -970.
13780 // Otherwise fe_flag is set to 0.
13781 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13782 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13783 // exponent is less than -970)
13784 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13785 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13786 FTSQRT, SRIdxVal),
13787 0);
13788}
13789
13790SDValue
13791PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13792 SelectionDAG &DAG) const {
13793 // We only have VSX Vector Square Root.
13794 EVT VT = Op.getValueType();
13795 if (VT != MVT::f64 &&
13796 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13798
13799 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13800}
13801
13802SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13803 int Enabled, int &RefinementSteps,
13804 bool &UseOneConstNR,
13805 bool Reciprocal) const {
13806 EVT VT = Operand.getValueType();
13807 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13808 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13809 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13810 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13811 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13812 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13813
13814 // The Newton-Raphson computation with a single constant does not provide
13815 // enough accuracy on some CPUs.
13816 UseOneConstNR = !Subtarget.needsTwoConstNR();
13817 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13818 }
13819 return SDValue();
13820}
13821
13822SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13823 int Enabled,
13824 int &RefinementSteps) const {
13825 EVT VT = Operand.getValueType();
13826 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13827 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13828 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13829 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13830 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13831 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13832 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13833 }
13834 return SDValue();
13835}
13836
13837unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13838 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13839 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13840 // enabled for division), this functionality is redundant with the default
13841 // combiner logic (once the division -> reciprocal/multiply transformation
13842 // has taken place). As a result, this matters more for older cores than for
13843 // newer ones.
13844
13845 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13846 // reciprocal if there are two or more FDIVs (for embedded cores with only
13847 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13848 switch (Subtarget.getCPUDirective()) {
13849 default:
13850 return 3;
13851 case PPC::DIR_440:
13852 case PPC::DIR_A2:
13853 case PPC::DIR_E500:
13854 case PPC::DIR_E500mc:
13855 case PPC::DIR_E5500:
13856 return 2;
13857 }
13858}
13859
13860// isConsecutiveLSLoc needs to work even if all adds have not yet been
13861// collapsed, and so we need to look through chains of them.
13863 int64_t& Offset, SelectionDAG &DAG) {
13864 if (DAG.isBaseWithConstantOffset(Loc)) {
13865 Base = Loc.getOperand(0);
13866 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13867
13868 // The base might itself be a base plus an offset, and if so, accumulate
13869 // that as well.
13871 }
13872}
13873
13875 unsigned Bytes, int Dist,
13876 SelectionDAG &DAG) {
13877 if (VT.getSizeInBits() / 8 != Bytes)
13878 return false;
13879
13880 SDValue BaseLoc = Base->getBasePtr();
13881 if (Loc.getOpcode() == ISD::FrameIndex) {
13882 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13883 return false;
13885 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13886 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13887 int FS = MFI.getObjectSize(FI);
13888 int BFS = MFI.getObjectSize(BFI);
13889 if (FS != BFS || FS != (int)Bytes) return false;
13890 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13891 }
13892
13893 SDValue Base1 = Loc, Base2 = BaseLoc;
13894 int64_t Offset1 = 0, Offset2 = 0;
13895 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13896 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13897 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13898 return true;
13899
13900 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13901 const GlobalValue *GV1 = nullptr;
13902 const GlobalValue *GV2 = nullptr;
13903 Offset1 = 0;
13904 Offset2 = 0;
13905 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13906 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13907 if (isGA1 && isGA2 && GV1 == GV2)
13908 return Offset1 == (Offset2 + Dist*Bytes);
13909 return false;
13910}
13911
13912// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13913// not enforce equality of the chain operands.
13915 unsigned Bytes, int Dist,
13916 SelectionDAG &DAG) {
13917 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13918 EVT VT = LS->getMemoryVT();
13919 SDValue Loc = LS->getBasePtr();
13920 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13921 }
13922
13923 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13924 EVT VT;
13925 switch (N->getConstantOperandVal(1)) {
13926 default: return false;
13927 case Intrinsic::ppc_altivec_lvx:
13928 case Intrinsic::ppc_altivec_lvxl:
13929 case Intrinsic::ppc_vsx_lxvw4x:
13930 case Intrinsic::ppc_vsx_lxvw4x_be:
13931 VT = MVT::v4i32;
13932 break;
13933 case Intrinsic::ppc_vsx_lxvd2x:
13934 case Intrinsic::ppc_vsx_lxvd2x_be:
13935 VT = MVT::v2f64;
13936 break;
13937 case Intrinsic::ppc_altivec_lvebx:
13938 VT = MVT::i8;
13939 break;
13940 case Intrinsic::ppc_altivec_lvehx:
13941 VT = MVT::i16;
13942 break;
13943 case Intrinsic::ppc_altivec_lvewx:
13944 VT = MVT::i32;
13945 break;
13946 }
13947
13948 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13949 }
13950
13951 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13952 EVT VT;
13953 switch (N->getConstantOperandVal(1)) {
13954 default: return false;
13955 case Intrinsic::ppc_altivec_stvx:
13956 case Intrinsic::ppc_altivec_stvxl:
13957 case Intrinsic::ppc_vsx_stxvw4x:
13958 VT = MVT::v4i32;
13959 break;
13960 case Intrinsic::ppc_vsx_stxvd2x:
13961 VT = MVT::v2f64;
13962 break;
13963 case Intrinsic::ppc_vsx_stxvw4x_be:
13964 VT = MVT::v4i32;
13965 break;
13966 case Intrinsic::ppc_vsx_stxvd2x_be:
13967 VT = MVT::v2f64;
13968 break;
13969 case Intrinsic::ppc_altivec_stvebx:
13970 VT = MVT::i8;
13971 break;
13972 case Intrinsic::ppc_altivec_stvehx:
13973 VT = MVT::i16;
13974 break;
13975 case Intrinsic::ppc_altivec_stvewx:
13976 VT = MVT::i32;
13977 break;
13978 }
13979
13980 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13981 }
13982
13983 return false;
13984}
13985
13986// Return true is there is a nearyby consecutive load to the one provided
13987// (regardless of alignment). We search up and down the chain, looking though
13988// token factors and other loads (but nothing else). As a result, a true result
13989// indicates that it is safe to create a new consecutive load adjacent to the
13990// load provided.
13992 SDValue Chain = LD->getChain();
13993 EVT VT = LD->getMemoryVT();
13994
13995 SmallSet<SDNode *, 16> LoadRoots;
13996 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13997 SmallSet<SDNode *, 16> Visited;
13998
13999 // First, search up the chain, branching to follow all token-factor operands.
14000 // If we find a consecutive load, then we're done, otherwise, record all
14001 // nodes just above the top-level loads and token factors.
14002 while (!Queue.empty()) {
14003 SDNode *ChainNext = Queue.pop_back_val();
14004 if (!Visited.insert(ChainNext).second)
14005 continue;
14006
14007 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14008 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14009 return true;
14010
14011 if (!Visited.count(ChainLD->getChain().getNode()))
14012 Queue.push_back(ChainLD->getChain().getNode());
14013 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14014 for (const SDUse &O : ChainNext->ops())
14015 if (!Visited.count(O.getNode()))
14016 Queue.push_back(O.getNode());
14017 } else
14018 LoadRoots.insert(ChainNext);
14019 }
14020
14021 // Second, search down the chain, starting from the top-level nodes recorded
14022 // in the first phase. These top-level nodes are the nodes just above all
14023 // loads and token factors. Starting with their uses, recursively look though
14024 // all loads (just the chain uses) and token factors to find a consecutive
14025 // load.
14026 Visited.clear();
14027 Queue.clear();
14028
14029 for (SDNode *I : LoadRoots) {
14030 Queue.push_back(I);
14031
14032 while (!Queue.empty()) {
14033 SDNode *LoadRoot = Queue.pop_back_val();
14034 if (!Visited.insert(LoadRoot).second)
14035 continue;
14036
14037 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14038 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14039 return true;
14040
14041 for (SDNode *U : LoadRoot->uses())
14042 if (((isa<MemSDNode>(U) &&
14043 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14044 U->getOpcode() == ISD::TokenFactor) &&
14045 !Visited.count(U))
14046 Queue.push_back(U);
14047 }
14048 }
14049
14050 return false;
14051}
14052
14053/// This function is called when we have proved that a SETCC node can be replaced
14054/// by subtraction (and other supporting instructions) so that the result of
14055/// comparison is kept in a GPR instead of CR. This function is purely for
14056/// codegen purposes and has some flags to guide the codegen process.
14057static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14058 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14059 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14060
14061 // Zero extend the operands to the largest legal integer. Originally, they
14062 // must be of a strictly smaller size.
14063 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14064 DAG.getConstant(Size, DL, MVT::i32));
14065 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14066 DAG.getConstant(Size, DL, MVT::i32));
14067
14068 // Swap if needed. Depends on the condition code.
14069 if (Swap)
14070 std::swap(Op0, Op1);
14071
14072 // Subtract extended integers.
14073 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14074
14075 // Move the sign bit to the least significant position and zero out the rest.
14076 // Now the least significant bit carries the result of original comparison.
14077 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14078 DAG.getConstant(Size - 1, DL, MVT::i32));
14079 auto Final = Shifted;
14080
14081 // Complement the result if needed. Based on the condition code.
14082 if (Complement)
14083 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14084 DAG.getConstant(1, DL, MVT::i64));
14085
14086 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14087}
14088
14089SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14090 DAGCombinerInfo &DCI) const {
14091 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14092
14093 SelectionDAG &DAG = DCI.DAG;
14094 SDLoc DL(N);
14095
14096 // Size of integers being compared has a critical role in the following
14097 // analysis, so we prefer to do this when all types are legal.
14098 if (!DCI.isAfterLegalizeDAG())
14099 return SDValue();
14100
14101 // If all users of SETCC extend its value to a legal integer type
14102 // then we replace SETCC with a subtraction
14103 for (const SDNode *U : N->uses())
14104 if (U->getOpcode() != ISD::ZERO_EXTEND)
14105 return SDValue();
14106
14107 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14108 auto OpSize = N->getOperand(0).getValueSizeInBits();
14109
14111
14112 if (OpSize < Size) {
14113 switch (CC) {
14114 default: break;
14115 case ISD::SETULT:
14116 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14117 case ISD::SETULE:
14118 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14119 case ISD::SETUGT:
14120 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14121 case ISD::SETUGE:
14122 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14123 }
14124 }
14125
14126 return SDValue();
14127}
14128
14129SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14130 DAGCombinerInfo &DCI) const {
14131 SelectionDAG &DAG = DCI.DAG;
14132 SDLoc dl(N);
14133
14134 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14135 // If we're tracking CR bits, we need to be careful that we don't have:
14136 // trunc(binary-ops(zext(x), zext(y)))
14137 // or
14138 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14139 // such that we're unnecessarily moving things into GPRs when it would be
14140 // better to keep them in CR bits.
14141
14142 // Note that trunc here can be an actual i1 trunc, or can be the effective
14143 // truncation that comes from a setcc or select_cc.
14144 if (N->getOpcode() == ISD::TRUNCATE &&
14145 N->getValueType(0) != MVT::i1)
14146 return SDValue();
14147
14148 if (N->getOperand(0).getValueType() != MVT::i32 &&
14149 N->getOperand(0).getValueType() != MVT::i64)
14150 return SDValue();
14151
14152 if (N->getOpcode() == ISD::SETCC ||
14153 N->getOpcode() == ISD::SELECT_CC) {
14154 // If we're looking at a comparison, then we need to make sure that the
14155 // high bits (all except for the first) don't matter the result.
14157 cast<CondCodeSDNode>(N->getOperand(
14158 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14159 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14160
14162 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14163 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14164 return SDValue();
14165 } else if (ISD::isUnsignedIntSetCC(CC)) {
14166 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14167 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14168 !DAG.MaskedValueIsZero(N->getOperand(1),
14169 APInt::getHighBitsSet(OpBits, OpBits-1)))
14170 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14171 : SDValue());
14172 } else {
14173 // This is neither a signed nor an unsigned comparison, just make sure
14174 // that the high bits are equal.
14175 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14176 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14177
14178 // We don't really care about what is known about the first bit (if
14179 // anything), so pretend that it is known zero for both to ensure they can
14180 // be compared as constants.
14181 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14182 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14183
14184 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14185 Op1Known.getConstant() != Op2Known.getConstant())
14186 return SDValue();
14187 }
14188 }
14189
14190 // We now know that the higher-order bits are irrelevant, we just need to
14191 // make sure that all of the intermediate operations are bit operations, and
14192 // all inputs are extensions.
14193 if (N->getOperand(0).getOpcode() != ISD::AND &&
14194 N->getOperand(0).getOpcode() != ISD::OR &&
14195 N->getOperand(0).getOpcode() != ISD::XOR &&
14196 N->getOperand(0).getOpcode() != ISD::SELECT &&
14197 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14198 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14199 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14200 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14201 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14202 return SDValue();
14203
14204 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14205 N->getOperand(1).getOpcode() != ISD::AND &&
14206 N->getOperand(1).getOpcode() != ISD::OR &&
14207 N->getOperand(1).getOpcode() != ISD::XOR &&
14208 N->getOperand(1).getOpcode() != ISD::SELECT &&
14209 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14210 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14211 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14212 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14213 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14214 return SDValue();
14215
14217 SmallVector<SDValue, 8> BinOps, PromOps;
14219
14220 for (unsigned i = 0; i < 2; ++i) {
14221 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14222 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14223 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14224 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14225 isa<ConstantSDNode>(N->getOperand(i)))
14226 Inputs.push_back(N->getOperand(i));
14227 else
14228 BinOps.push_back(N->getOperand(i));
14229
14230 if (N->getOpcode() == ISD::TRUNCATE)
14231 break;
14232 }
14233
14234 // Visit all inputs, collect all binary operations (and, or, xor and
14235 // select) that are all fed by extensions.
14236 while (!BinOps.empty()) {
14237 SDValue BinOp = BinOps.pop_back_val();
14238
14239 if (!Visited.insert(BinOp.getNode()).second)
14240 continue;
14241
14242 PromOps.push_back(BinOp);
14243
14244 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14245 // The condition of the select is not promoted.
14246 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14247 continue;
14248 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14249 continue;
14250
14251 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14252 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14253 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14254 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14255 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14256 Inputs.push_back(BinOp.getOperand(i));
14257 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14258 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14259 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14260 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14261 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14262 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14263 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14264 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14265 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14266 BinOps.push_back(BinOp.getOperand(i));
14267 } else {
14268 // We have an input that is not an extension or another binary
14269 // operation; we'll abort this transformation.
14270 return SDValue();
14271 }
14272 }
14273 }
14274
14275 // Make sure that this is a self-contained cluster of operations (which
14276 // is not quite the same thing as saying that everything has only one
14277 // use).
14278 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14279 if (isa<ConstantSDNode>(Inputs[i]))
14280 continue;
14281
14282 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14283 if (User != N && !Visited.count(User))
14284 return SDValue();
14285
14286 // Make sure that we're not going to promote the non-output-value
14287 // operand(s) or SELECT or SELECT_CC.
14288 // FIXME: Although we could sometimes handle this, and it does occur in
14289 // practice that one of the condition inputs to the select is also one of
14290 // the outputs, we currently can't deal with this.
14291 if (User->getOpcode() == ISD::SELECT) {
14292 if (User->getOperand(0) == Inputs[i])
14293 return SDValue();
14294 } else if (User->getOpcode() == ISD::SELECT_CC) {
14295 if (User->getOperand(0) == Inputs[i] ||
14296 User->getOperand(1) == Inputs[i])
14297 return SDValue();
14298 }
14299 }
14300 }
14301
14302 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14303 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14304 if (User != N && !Visited.count(User))
14305 return SDValue();
14306
14307 // Make sure that we're not going to promote the non-output-value
14308 // operand(s) or SELECT or SELECT_CC.
14309 // FIXME: Although we could sometimes handle this, and it does occur in
14310 // practice that one of the condition inputs to the select is also one of
14311 // the outputs, we currently can't deal with this.
14312 if (User->getOpcode() == ISD::SELECT) {
14313 if (User->getOperand(0) == PromOps[i])
14314 return SDValue();
14315 } else if (User->getOpcode() == ISD::SELECT_CC) {
14316 if (User->getOperand(0) == PromOps[i] ||
14317 User->getOperand(1) == PromOps[i])
14318 return SDValue();
14319 }
14320 }
14321 }
14322
14323 // Replace all inputs with the extension operand.
14324 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14325 // Constants may have users outside the cluster of to-be-promoted nodes,
14326 // and so we need to replace those as we do the promotions.
14327 if (isa<ConstantSDNode>(Inputs[i]))
14328 continue;
14329 else
14330 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14331 }
14332
14333 std::list<HandleSDNode> PromOpHandles;
14334 for (auto &PromOp : PromOps)
14335 PromOpHandles.emplace_back(PromOp);
14336
14337 // Replace all operations (these are all the same, but have a different
14338 // (i1) return type). DAG.getNode will validate that the types of
14339 // a binary operator match, so go through the list in reverse so that
14340 // we've likely promoted both operands first. Any intermediate truncations or
14341 // extensions disappear.
14342 while (!PromOpHandles.empty()) {
14343 SDValue PromOp = PromOpHandles.back().getValue();
14344 PromOpHandles.pop_back();
14345
14346 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14347 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14348 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14349 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14350 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14351 PromOp.getOperand(0).getValueType() != MVT::i1) {
14352 // The operand is not yet ready (see comment below).
14353 PromOpHandles.emplace_front(PromOp);
14354 continue;
14355 }
14356
14357 SDValue RepValue = PromOp.getOperand(0);
14358 if (isa<ConstantSDNode>(RepValue))
14359 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14360
14361 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14362 continue;
14363 }
14364
14365 unsigned C;
14366 switch (PromOp.getOpcode()) {
14367 default: C = 0; break;
14368 case ISD::SELECT: C = 1; break;
14369 case ISD::SELECT_CC: C = 2; break;
14370 }
14371
14372 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14373 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14374 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14375 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14376 // The to-be-promoted operands of this node have not yet been
14377 // promoted (this should be rare because we're going through the
14378 // list backward, but if one of the operands has several users in
14379 // this cluster of to-be-promoted nodes, it is possible).
14380 PromOpHandles.emplace_front(PromOp);
14381 continue;
14382 }
14383
14384 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
14385
14386 // If there are any constant inputs, make sure they're replaced now.
14387 for (unsigned i = 0; i < 2; ++i)
14388 if (isa<ConstantSDNode>(Ops[C+i]))
14389 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14390
14391 DAG.ReplaceAllUsesOfValueWith(PromOp,
14392 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14393 }
14394
14395 // Now we're left with the initial truncation itself.
14396 if (N->getOpcode() == ISD::TRUNCATE)
14397 return N->getOperand(0);
14398
14399 // Otherwise, this is a comparison. The operands to be compared have just
14400 // changed type (to i1), but everything else is the same.
14401 return SDValue(N, 0);
14402}
14403
14404SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14405 DAGCombinerInfo &DCI) const {
14406 SelectionDAG &DAG = DCI.DAG;
14407 SDLoc dl(N);
14408
14409 // If we're tracking CR bits, we need to be careful that we don't have:
14410 // zext(binary-ops(trunc(x), trunc(y)))
14411 // or
14412 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14413 // such that we're unnecessarily moving things into CR bits that can more
14414 // efficiently stay in GPRs. Note that if we're not certain that the high
14415 // bits are set as required by the final extension, we still may need to do
14416 // some masking to get the proper behavior.
14417
14418 // This same functionality is important on PPC64 when dealing with
14419 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14420 // the return values of functions. Because it is so similar, it is handled
14421 // here as well.
14422
14423 if (N->getValueType(0) != MVT::i32 &&
14424 N->getValueType(0) != MVT::i64)
14425 return SDValue();
14426
14427 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14428 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14429 return SDValue();
14430
14431 if (N->getOperand(0).getOpcode() != ISD::AND &&
14432 N->getOperand(0).getOpcode() != ISD::OR &&
14433 N->getOperand(0).getOpcode() != ISD::XOR &&
14434 N->getOperand(0).getOpcode() != ISD::SELECT &&
14435 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14436 return SDValue();
14437
14439 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14441
14442 // Visit all inputs, collect all binary operations (and, or, xor and
14443 // select) that are all fed by truncations.
14444 while (!BinOps.empty()) {
14445 SDValue BinOp = BinOps.pop_back_val();
14446
14447 if (!Visited.insert(BinOp.getNode()).second)
14448 continue;
14449
14450 PromOps.push_back(BinOp);
14451
14452 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14453 // The condition of the select is not promoted.
14454 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14455 continue;
14456 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14457 continue;
14458
14459 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14460 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14461 Inputs.push_back(BinOp.getOperand(i));
14462 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14463 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14464 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14465 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14466 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14467 BinOps.push_back(BinOp.getOperand(i));
14468 } else {
14469 // We have an input that is not a truncation or another binary
14470 // operation; we'll abort this transformation.
14471 return SDValue();
14472 }
14473 }
14474 }
14475
14476 // The operands of a select that must be truncated when the select is
14477 // promoted because the operand is actually part of the to-be-promoted set.
14478 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14479
14480 // Make sure that this is a self-contained cluster of operations (which
14481 // is not quite the same thing as saying that everything has only one
14482 // use).
14483 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14484 if (isa<ConstantSDNode>(Inputs[i]))
14485 continue;
14486
14487 for (SDNode *User : Inputs[i].getNode()->uses()) {
14488 if (User != N && !Visited.count(User))
14489 return SDValue();
14490
14491 // If we're going to promote the non-output-value operand(s) or SELECT or
14492 // SELECT_CC, record them for truncation.
14493 if (User->getOpcode() == ISD::SELECT) {
14494 if (User->getOperand(0) == Inputs[i])
14495 SelectTruncOp[0].insert(std::make_pair(User,
14496 User->getOperand(0).getValueType()));
14497 } else if (User->getOpcode() == ISD::SELECT_CC) {
14498 if (User->getOperand(0) == Inputs[i])
14499 SelectTruncOp[0].insert(std::make_pair(User,
14500 User->getOperand(0).getValueType()));
14501 if (User->getOperand(1) == Inputs[i])
14502 SelectTruncOp[1].insert(std::make_pair(User,
14503 User->getOperand(1).getValueType()));
14504 }
14505 }
14506 }
14507
14508 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14509 for (SDNode *User : PromOps[i].getNode()->uses()) {
14510 if (User != N && !Visited.count(User))
14511 return SDValue();
14512
14513 // If we're going to promote the non-output-value operand(s) or SELECT or
14514 // SELECT_CC, record them for truncation.
14515 if (User->getOpcode() == ISD::SELECT) {
14516 if (User->getOperand(0) == PromOps[i])
14517 SelectTruncOp[0].insert(std::make_pair(User,
14518 User->getOperand(0).getValueType()));
14519 } else if (User->getOpcode() == ISD::SELECT_CC) {
14520 if (User->getOperand(0) == PromOps[i])
14521 SelectTruncOp[0].insert(std::make_pair(User,
14522 User->getOperand(0).getValueType()));
14523 if (User->getOperand(1) == PromOps[i])
14524 SelectTruncOp[1].insert(std::make_pair(User,
14525 User->getOperand(1).getValueType()));
14526 }
14527 }
14528 }
14529
14530 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14531 bool ReallyNeedsExt = false;
14532 if (N->getOpcode() != ISD::ANY_EXTEND) {
14533 // If all of the inputs are not already sign/zero extended, then
14534 // we'll still need to do that at the end.
14535 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14536 if (isa<ConstantSDNode>(Inputs[i]))
14537 continue;
14538
14539 unsigned OpBits =
14540 Inputs[i].getOperand(0).getValueSizeInBits();
14541 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14542
14543 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14544 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14545 APInt::getHighBitsSet(OpBits,
14546 OpBits-PromBits))) ||
14547 (N->getOpcode() == ISD::SIGN_EXTEND &&
14548 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14549 (OpBits-(PromBits-1)))) {
14550 ReallyNeedsExt = true;
14551 break;
14552 }
14553 }
14554 }
14555
14556 // Replace all inputs, either with the truncation operand, or a
14557 // truncation or extension to the final output type.
14558 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14559 // Constant inputs need to be replaced with the to-be-promoted nodes that
14560 // use them because they might have users outside of the cluster of
14561 // promoted nodes.
14562 if (isa<ConstantSDNode>(Inputs[i]))
14563 continue;
14564
14565 SDValue InSrc = Inputs[i].getOperand(0);
14566 if (Inputs[i].getValueType() == N->getValueType(0))
14567 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14568 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14569 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14570 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14571 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14572 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14573 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14574 else
14575 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14576 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14577 }
14578
14579 std::list<HandleSDNode> PromOpHandles;
14580 for (auto &PromOp : PromOps)
14581 PromOpHandles.emplace_back(PromOp);
14582
14583 // Replace all operations (these are all the same, but have a different
14584 // (promoted) return type). DAG.getNode will validate that the types of
14585 // a binary operator match, so go through the list in reverse so that
14586 // we've likely promoted both operands first.
14587 while (!PromOpHandles.empty()) {
14588 SDValue PromOp = PromOpHandles.back().getValue();
14589 PromOpHandles.pop_back();
14590
14591 unsigned C;
14592 switch (PromOp.getOpcode()) {
14593 default: C = 0; break;
14594 case ISD::SELECT: C = 1; break;
14595 case ISD::SELECT_CC: C = 2; break;
14596 }
14597
14598 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14599 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14600 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14601 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14602 // The to-be-promoted operands of this node have not yet been
14603 // promoted (this should be rare because we're going through the
14604 // list backward, but if one of the operands has several users in
14605 // this cluster of to-be-promoted nodes, it is possible).
14606 PromOpHandles.emplace_front(PromOp);
14607 continue;
14608 }
14609
14610 // For SELECT and SELECT_CC nodes, we do a similar check for any
14611 // to-be-promoted comparison inputs.
14612 if (PromOp.getOpcode() == ISD::SELECT ||
14613 PromOp.getOpcode() == ISD::SELECT_CC) {
14614 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14615 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14616 (SelectTruncOp[1].count(PromOp.getNode()) &&
14617 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14618 PromOpHandles.emplace_front(PromOp);
14619 continue;
14620 }
14621 }
14622
14624 PromOp.getNode()->op_end());
14625
14626 // If this node has constant inputs, then they'll need to be promoted here.
14627 for (unsigned i = 0; i < 2; ++i) {
14628 if (!isa<ConstantSDNode>(Ops[C+i]))
14629 continue;
14630 if (Ops[C+i].getValueType() == N->getValueType(0))
14631 continue;
14632
14633 if (N->getOpcode() == ISD::SIGN_EXTEND)
14634 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14635 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14636 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14637 else
14638 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14639 }
14640
14641 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14642 // truncate them again to the original value type.
14643 if (PromOp.getOpcode() == ISD::SELECT ||
14644 PromOp.getOpcode() == ISD::SELECT_CC) {
14645 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14646 if (SI0 != SelectTruncOp[0].end())
14647 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14648 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14649 if (SI1 != SelectTruncOp[1].end())
14650 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14651 }
14652
14653 DAG.ReplaceAllUsesOfValueWith(PromOp,
14654 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14655 }
14656
14657 // Now we're left with the initial extension itself.
14658 if (!ReallyNeedsExt)
14659 return N->getOperand(0);
14660
14661 // To zero extend, just mask off everything except for the first bit (in the
14662 // i1 case).
14663 if (N->getOpcode() == ISD::ZERO_EXTEND)
14664 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14666 N->getValueSizeInBits(0), PromBits),
14667 dl, N->getValueType(0)));
14668
14669 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14670 "Invalid extension type");
14671 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14672 SDValue ShiftCst =
14673 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14674 return DAG.getNode(
14675 ISD::SRA, dl, N->getValueType(0),
14676 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14677 ShiftCst);
14678}
14679
14680SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14681 DAGCombinerInfo &DCI) const {
14682 assert(N->getOpcode() == ISD::SETCC &&
14683 "Should be called with a SETCC node");
14684
14685 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14686 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14687 SDValue LHS = N->getOperand(0);
14688 SDValue RHS = N->getOperand(1);
14689
14690 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14691 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14692 LHS.hasOneUse())
14693 std::swap(LHS, RHS);
14694
14695 // x == 0-y --> x+y == 0
14696 // x != 0-y --> x+y != 0
14697 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14698 RHS.hasOneUse()) {
14699 SDLoc DL(N);
14700 SelectionDAG &DAG = DCI.DAG;
14701 EVT VT = N->getValueType(0);
14702 EVT OpVT = LHS.getValueType();
14703 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14704 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14705 }
14706 }
14707
14708 return DAGCombineTruncBoolExt(N, DCI);
14709}
14710
14711// Is this an extending load from an f32 to an f64?
14712static bool isFPExtLoad(SDValue Op) {
14713 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14714 return LD->getExtensionType() == ISD::EXTLOAD &&
14715 Op.getValueType() == MVT::f64;
14716 return false;
14717}
14718
14719/// Reduces the number of fp-to-int conversion when building a vector.
14720///
14721/// If this vector is built out of floating to integer conversions,
14722/// transform it to a vector built out of floating point values followed by a
14723/// single floating to integer conversion of the vector.
14724/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14725/// becomes (fptosi (build_vector ($A, $B, ...)))
14726SDValue PPCTargetLowering::
14727combineElementTruncationToVectorTruncation(SDNode *N,
14728 DAGCombinerInfo &DCI) const {
14729 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14730 "Should be called with a BUILD_VECTOR node");
14731
14732 SelectionDAG &DAG = DCI.DAG;
14733 SDLoc dl(N);
14734
14735 SDValue FirstInput = N->getOperand(0);
14736 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14737 "The input operand must be an fp-to-int conversion.");
14738
14739 // This combine happens after legalization so the fp_to_[su]i nodes are
14740 // already converted to PPCSISD nodes.
14741 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14742 if (FirstConversion == PPCISD::FCTIDZ ||
14743 FirstConversion == PPCISD::FCTIDUZ ||
14744 FirstConversion == PPCISD::FCTIWZ ||
14745 FirstConversion == PPCISD::FCTIWUZ) {
14746 bool IsSplat = true;
14747 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14748 FirstConversion == PPCISD::FCTIWUZ;
14749 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14751 EVT TargetVT = N->getValueType(0);
14752 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14753 SDValue NextOp = N->getOperand(i);
14754 if (NextOp.getOpcode() != PPCISD::MFVSR)
14755 return SDValue();
14756 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14757 if (NextConversion != FirstConversion)
14758 return SDValue();
14759 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14760 // This is not valid if the input was originally double precision. It is
14761 // also not profitable to do unless this is an extending load in which
14762 // case doing this combine will allow us to combine consecutive loads.
14763 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14764 return SDValue();
14765 if (N->getOperand(i) != FirstInput)
14766 IsSplat = false;
14767 }
14768
14769 // If this is a splat, we leave it as-is since there will be only a single
14770 // fp-to-int conversion followed by a splat of the integer. This is better
14771 // for 32-bit and smaller ints and neutral for 64-bit ints.
14772 if (IsSplat)
14773 return SDValue();
14774
14775 // Now that we know we have the right type of node, get its operands
14776 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14777 SDValue In = N->getOperand(i).getOperand(0);
14778 if (Is32Bit) {
14779 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14780 // here, we know that all inputs are extending loads so this is safe).
14781 if (In.isUndef())
14782 Ops.push_back(DAG.getUNDEF(SrcVT));
14783 else {
14784 SDValue Trunc =
14785 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14786 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14787 Ops.push_back(Trunc);
14788 }
14789 } else
14790 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14791 }
14792
14793 unsigned Opcode;
14794 if (FirstConversion == PPCISD::FCTIDZ ||
14795 FirstConversion == PPCISD::FCTIWZ)
14796 Opcode = ISD::FP_TO_SINT;
14797 else
14798 Opcode = ISD::FP_TO_UINT;
14799
14800 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14801 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14802 return DAG.getNode(Opcode, dl, TargetVT, BV);
14803 }
14804 return SDValue();
14805}
14806
14807/// Reduce the number of loads when building a vector.
14808///
14809/// Building a vector out of multiple loads can be converted to a load
14810/// of the vector type if the loads are consecutive. If the loads are
14811/// consecutive but in descending order, a shuffle is added at the end
14812/// to reorder the vector.
14814 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14815 "Should be called with a BUILD_VECTOR node");
14816
14817 SDLoc dl(N);
14818
14819 // Return early for non byte-sized type, as they can't be consecutive.
14820 if (!N->getValueType(0).getVectorElementType().isByteSized())
14821 return SDValue();
14822
14823 bool InputsAreConsecutiveLoads = true;
14824 bool InputsAreReverseConsecutive = true;
14825 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14826 SDValue FirstInput = N->getOperand(0);
14827 bool IsRoundOfExtLoad = false;
14828 LoadSDNode *FirstLoad = nullptr;
14829
14830 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14831 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14832 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14833 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14834 }
14835 // Not a build vector of (possibly fp_rounded) loads.
14836 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14837 N->getNumOperands() == 1)
14838 return SDValue();
14839
14840 if (!IsRoundOfExtLoad)
14841 FirstLoad = cast<LoadSDNode>(FirstInput);
14842
14844 InputLoads.push_back(FirstLoad);
14845 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14846 // If any inputs are fp_round(extload), they all must be.
14847 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14848 return SDValue();
14849
14850 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14851 N->getOperand(i);
14852 if (NextInput.getOpcode() != ISD::LOAD)
14853 return SDValue();
14854
14855 SDValue PreviousInput =
14856 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14857 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14858 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14859
14860 // If any inputs are fp_round(extload), they all must be.
14861 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14862 return SDValue();
14863
14864 // We only care about regular loads. The PPC-specific load intrinsics
14865 // will not lead to a merge opportunity.
14866 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14867 InputsAreConsecutiveLoads = false;
14868 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14869 InputsAreReverseConsecutive = false;
14870
14871 // Exit early if the loads are neither consecutive nor reverse consecutive.
14872 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14873 return SDValue();
14874 InputLoads.push_back(LD2);
14875 }
14876
14877 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14878 "The loads cannot be both consecutive and reverse consecutive.");
14879
14880 SDValue WideLoad;
14881 SDValue ReturnSDVal;
14882 if (InputsAreConsecutiveLoads) {
14883 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14884 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14885 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14886 FirstLoad->getAlign());
14887 ReturnSDVal = WideLoad;
14888 } else if (InputsAreReverseConsecutive) {
14889 LoadSDNode *LastLoad = InputLoads.back();
14890 assert(LastLoad && "Input needs to be a LoadSDNode.");
14891 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14892 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14893 LastLoad->getAlign());
14895 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14896 Ops.push_back(i);
14897
14898 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14899 DAG.getUNDEF(N->getValueType(0)), Ops);
14900 } else
14901 return SDValue();
14902
14903 for (auto *LD : InputLoads)
14904 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14905 return ReturnSDVal;
14906}
14907
14908// This function adds the required vector_shuffle needed to get
14909// the elements of the vector extract in the correct position
14910// as specified by the CorrectElems encoding.
14912 SDValue Input, uint64_t Elems,
14913 uint64_t CorrectElems) {
14914 SDLoc dl(N);
14915
14916 unsigned NumElems = Input.getValueType().getVectorNumElements();
14917 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14918
14919 // Knowing the element indices being extracted from the original
14920 // vector and the order in which they're being inserted, just put
14921 // them at element indices required for the instruction.
14922 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14923 if (DAG.getDataLayout().isLittleEndian())
14924 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14925 else
14926 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14927 CorrectElems = CorrectElems >> 8;
14928 Elems = Elems >> 8;
14929 }
14930
14931 SDValue Shuffle =
14932 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14933 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14934
14935 EVT VT = N->getValueType(0);
14936 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14937
14938 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14941 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14942 DAG.getValueType(ExtVT));
14943}
14944
14945// Look for build vector patterns where input operands come from sign
14946// extended vector_extract elements of specific indices. If the correct indices
14947// aren't used, add a vector shuffle to fix up the indices and create
14948// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14949// during instruction selection.
14951 // This array encodes the indices that the vector sign extend instructions
14952 // extract from when extending from one type to another for both BE and LE.
14953 // The right nibble of each byte corresponds to the LE incides.
14954 // and the left nibble of each byte corresponds to the BE incides.
14955 // For example: 0x3074B8FC byte->word
14956 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14957 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14958 // For example: 0x000070F8 byte->double word
14959 // For LE: the allowed indices are: 0x0,0x8
14960 // For BE: the allowed indices are: 0x7,0xF
14961 uint64_t TargetElems[] = {
14962 0x3074B8FC, // b->w
14963 0x000070F8, // b->d
14964 0x10325476, // h->w
14965 0x00003074, // h->d
14966 0x00001032, // w->d
14967 };
14968
14969 uint64_t Elems = 0;
14970 int Index;
14971 SDValue Input;
14972
14973 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14974 if (!Op)
14975 return false;
14976 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14977 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14978 return false;
14979
14980 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14981 // of the right width.
14982 SDValue Extract = Op.getOperand(0);
14983 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14984 Extract = Extract.getOperand(0);
14985 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14986 return false;
14987
14988 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14989 if (!ExtOp)
14990 return false;
14991
14992 Index = ExtOp->getZExtValue();
14993 if (Input && Input != Extract.getOperand(0))
14994 return false;
14995
14996 if (!Input)
14997 Input = Extract.getOperand(0);
14998
14999 Elems = Elems << 8;
15000 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15001 Elems |= Index;
15002
15003 return true;
15004 };
15005
15006 // If the build vector operands aren't sign extended vector extracts,
15007 // of the same input vector, then return.
15008 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15009 if (!isSExtOfVecExtract(N->getOperand(i))) {
15010 return SDValue();
15011 }
15012 }
15013
15014 // If the vector extract indices are not correct, add the appropriate
15015 // vector_shuffle.
15016 int TgtElemArrayIdx;
15017 int InputSize = Input.getValueType().getScalarSizeInBits();
15018 int OutputSize = N->getValueType(0).getScalarSizeInBits();
15019 if (InputSize + OutputSize == 40)
15020 TgtElemArrayIdx = 0;
15021 else if (InputSize + OutputSize == 72)
15022 TgtElemArrayIdx = 1;
15023 else if (InputSize + OutputSize == 48)
15024 TgtElemArrayIdx = 2;
15025 else if (InputSize + OutputSize == 80)
15026 TgtElemArrayIdx = 3;
15027 else if (InputSize + OutputSize == 96)
15028 TgtElemArrayIdx = 4;
15029 else
15030 return SDValue();
15031
15032 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15033 CorrectElems = DAG.getDataLayout().isLittleEndian()
15034 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15035 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15036 if (Elems != CorrectElems) {
15037 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15038 }
15039
15040 // Regular lowering will catch cases where a shuffle is not needed.
15041 return SDValue();
15042}
15043
15044// Look for the pattern of a load from a narrow width to i128, feeding
15045// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15046// (LXVRZX). This node represents a zero extending load that will be matched
15047// to the Load VSX Vector Rightmost instructions.
15049 SDLoc DL(N);
15050
15051 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15052 if (N->getValueType(0) != MVT::v1i128)
15053 return SDValue();
15054
15055 SDValue Operand = N->getOperand(0);
15056 // Proceed with the transformation if the operand to the BUILD_VECTOR
15057 // is a load instruction.
15058 if (Operand.getOpcode() != ISD::LOAD)
15059 return SDValue();
15060
15061 auto *LD = cast<LoadSDNode>(Operand);
15062 EVT MemoryType = LD->getMemoryVT();
15063
15064 // This transformation is only valid if the we are loading either a byte,
15065 // halfword, word, or doubleword.
15066 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15067 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15068
15069 // Ensure that the load from the narrow width is being zero extended to i128.
15070 if (!ValidLDType ||
15071 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15072 LD->getExtensionType() != ISD::EXTLOAD))
15073 return SDValue();
15074
15075 SDValue LoadOps[] = {
15076 LD->getChain(), LD->getBasePtr(),
15077 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15078
15080 DAG.getVTList(MVT::v1i128, MVT::Other),
15081 LoadOps, MemoryType, LD->getMemOperand());
15082}
15083
15084SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15085 DAGCombinerInfo &DCI) const {
15086 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15087 "Should be called with a BUILD_VECTOR node");
15088
15089 SelectionDAG &DAG = DCI.DAG;
15090 SDLoc dl(N);
15091
15092 if (!Subtarget.hasVSX())
15093 return SDValue();
15094
15095 // The target independent DAG combiner will leave a build_vector of
15096 // float-to-int conversions intact. We can generate MUCH better code for
15097 // a float-to-int conversion of a vector of floats.
15098 SDValue FirstInput = N->getOperand(0);
15099 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15100 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15101 if (Reduced)
15102 return Reduced;
15103 }
15104
15105 // If we're building a vector out of consecutive loads, just load that
15106 // vector type.
15107 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15108 if (Reduced)
15109 return Reduced;
15110
15111 // If we're building a vector out of extended elements from another vector
15112 // we have P9 vector integer extend instructions. The code assumes legal
15113 // input types (i.e. it can't handle things like v4i16) so do not run before
15114 // legalization.
15115 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15116 Reduced = combineBVOfVecSExt(N, DAG);
15117 if (Reduced)
15118 return Reduced;
15119 }
15120
15121 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15122 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15123 // is a load from <valid narrow width> to i128.
15124 if (Subtarget.isISA3_1()) {
15125 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15126 if (BVOfZLoad)
15127 return BVOfZLoad;
15128 }
15129
15130 if (N->getValueType(0) != MVT::v2f64)
15131 return SDValue();
15132
15133 // Looking for:
15134 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15135 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15136 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15137 return SDValue();
15138 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15139 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15140 return SDValue();
15141 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15142 return SDValue();
15143
15144 SDValue Ext1 = FirstInput.getOperand(0);
15145 SDValue Ext2 = N->getOperand(1).getOperand(0);
15146 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15148 return SDValue();
15149
15150 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15151 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15152 if (!Ext1Op || !Ext2Op)
15153 return SDValue();
15154 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15155 Ext1.getOperand(0) != Ext2.getOperand(0))
15156 return SDValue();
15157
15158 int FirstElem = Ext1Op->getZExtValue();
15159 int SecondElem = Ext2Op->getZExtValue();
15160 int SubvecIdx;
15161 if (FirstElem == 0 && SecondElem == 1)
15162 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15163 else if (FirstElem == 2 && SecondElem == 3)
15164 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15165 else
15166 return SDValue();
15167
15168 SDValue SrcVec = Ext1.getOperand(0);
15169 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15171 return DAG.getNode(NodeType, dl, MVT::v2f64,
15172 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15173}
15174
15175SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15176 DAGCombinerInfo &DCI) const {
15177 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15178 N->getOpcode() == ISD::UINT_TO_FP) &&
15179 "Need an int -> FP conversion node here");
15180
15181 if (useSoftFloat() || !Subtarget.has64BitSupport())
15182 return SDValue();
15183
15184 SelectionDAG &DAG = DCI.DAG;
15185 SDLoc dl(N);
15186 SDValue Op(N, 0);
15187
15188 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15189 // from the hardware.
15190 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15191 return SDValue();
15192 if (!Op.getOperand(0).getValueType().isSimple())
15193 return SDValue();
15194 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15195 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15196 return SDValue();
15197
15198 SDValue FirstOperand(Op.getOperand(0));
15199 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15200 (FirstOperand.getValueType() == MVT::i8 ||
15201 FirstOperand.getValueType() == MVT::i16);
15202 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15203 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15204 bool DstDouble = Op.getValueType() == MVT::f64;
15205 unsigned ConvOp = Signed ?
15206 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15207 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15208 SDValue WidthConst =
15209 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15210 dl, false);
15211 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15212 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15214 DAG.getVTList(MVT::f64, MVT::Other),
15215 Ops, MVT::i8, LDN->getMemOperand());
15216 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15217
15218 // For signed conversion, we need to sign-extend the value in the VSR
15219 if (Signed) {
15220 SDValue ExtOps[] = { Ld, WidthConst };
15221 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15222 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15223 } else
15224 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15225 }
15226
15227
15228 // For i32 intermediate values, unfortunately, the conversion functions
15229 // leave the upper 32 bits of the value are undefined. Within the set of
15230 // scalar instructions, we have no method for zero- or sign-extending the
15231 // value. Thus, we cannot handle i32 intermediate values here.
15232 if (Op.getOperand(0).getValueType() == MVT::i32)
15233 return SDValue();
15234
15235 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15236 "UINT_TO_FP is supported only with FPCVT");
15237
15238 // If we have FCFIDS, then use it when converting to single-precision.
15239 // Otherwise, convert to double-precision and then round.
15240 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15241 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15243 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15244 : PPCISD::FCFID);
15245 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15246 ? MVT::f32
15247 : MVT::f64;
15248
15249 // If we're converting from a float, to an int, and back to a float again,
15250 // then we don't need the store/load pair at all.
15251 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15252 Subtarget.hasFPCVT()) ||
15253 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15254 SDValue Src = Op.getOperand(0).getOperand(0);
15255 if (Src.getValueType() == MVT::f32) {
15256 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15257 DCI.AddToWorklist(Src.getNode());
15258 } else if (Src.getValueType() != MVT::f64) {
15259 // Make sure that we don't pick up a ppc_fp128 source value.
15260 return SDValue();
15261 }
15262
15263 unsigned FCTOp =
15264 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15266
15267 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15268 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15269
15270 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15271 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15272 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15273 DCI.AddToWorklist(FP.getNode());
15274 }
15275
15276 return FP;
15277 }
15278
15279 return SDValue();
15280}
15281
15282// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15283// builtins) into loads with swaps.
15285 DAGCombinerInfo &DCI) const {
15286 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15287 // load combines.
15288 if (DCI.isBeforeLegalizeOps())
15289 return SDValue();
15290
15291 SelectionDAG &DAG = DCI.DAG;
15292 SDLoc dl(N);
15293 SDValue Chain;
15294 SDValue Base;
15295 MachineMemOperand *MMO;
15296
15297 switch (N->getOpcode()) {
15298 default:
15299 llvm_unreachable("Unexpected opcode for little endian VSX load");
15300 case ISD::LOAD: {
15301 LoadSDNode *LD = cast<LoadSDNode>(N);
15302 Chain = LD->getChain();
15303 Base = LD->getBasePtr();
15304 MMO = LD->getMemOperand();
15305 // If the MMO suggests this isn't a load of a full vector, leave
15306 // things alone. For a built-in, we have to make the change for
15307 // correctness, so if there is a size problem that will be a bug.
15308 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15309 return SDValue();
15310 break;
15311 }
15313 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15314 Chain = Intrin->getChain();
15315 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15316 // us what we want. Get operand 2 instead.
15317 Base = Intrin->getOperand(2);
15318 MMO = Intrin->getMemOperand();
15319 break;
15320 }
15321 }
15322
15323 MVT VecTy = N->getValueType(0).getSimpleVT();
15324
15325 SDValue LoadOps[] = { Chain, Base };
15327 DAG.getVTList(MVT::v2f64, MVT::Other),
15328 LoadOps, MVT::v2f64, MMO);
15329
15330 DCI.AddToWorklist(Load.getNode());
15331 Chain = Load.getValue(1);
15332 SDValue Swap = DAG.getNode(
15333 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15334 DCI.AddToWorklist(Swap.getNode());
15335
15336 // Add a bitcast if the resulting load type doesn't match v2f64.
15337 if (VecTy != MVT::v2f64) {
15338 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15339 DCI.AddToWorklist(N.getNode());
15340 // Package {bitcast value, swap's chain} to match Load's shape.
15341 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15342 N, Swap.getValue(1));
15343 }
15344
15345 return Swap;
15346}
15347
15348// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15349// builtins) into stores with swaps.
15351 DAGCombinerInfo &DCI) const {
15352 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15353 // store combines.
15354 if (DCI.isBeforeLegalizeOps())
15355 return SDValue();
15356
15357 SelectionDAG &DAG = DCI.DAG;
15358 SDLoc dl(N);
15359 SDValue Chain;
15360 SDValue Base;
15361 unsigned SrcOpnd;
15362 MachineMemOperand *MMO;
15363
15364 switch (N->getOpcode()) {
15365 default:
15366 llvm_unreachable("Unexpected opcode for little endian VSX store");
15367 case ISD::STORE: {
15368 StoreSDNode *ST = cast<StoreSDNode>(N);
15369 Chain = ST->getChain();
15370 Base = ST->getBasePtr();
15371 MMO = ST->getMemOperand();
15372 SrcOpnd = 1;
15373 // If the MMO suggests this isn't a store of a full vector, leave
15374 // things alone. For a built-in, we have to make the change for
15375 // correctness, so if there is a size problem that will be a bug.
15376 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15377 return SDValue();
15378 break;
15379 }
15380 case ISD::INTRINSIC_VOID: {
15381 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15382 Chain = Intrin->getChain();
15383 // Intrin->getBasePtr() oddly does not get what we want.
15384 Base = Intrin->getOperand(3);
15385 MMO = Intrin->getMemOperand();
15386 SrcOpnd = 2;
15387 break;
15388 }
15389 }
15390
15391 SDValue Src = N->getOperand(SrcOpnd);
15392 MVT VecTy = Src.getValueType().getSimpleVT();
15393
15394 // All stores are done as v2f64 and possible bit cast.
15395 if (VecTy != MVT::v2f64) {
15396 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15397 DCI.AddToWorklist(Src.getNode());
15398 }
15399
15400 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15401 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15402 DCI.AddToWorklist(Swap.getNode());
15403 Chain = Swap.getValue(1);
15404 SDValue StoreOps[] = { Chain, Swap, Base };
15406 DAG.getVTList(MVT::Other),
15407 StoreOps, VecTy, MMO);
15408 DCI.AddToWorklist(Store.getNode());
15409 return Store;
15410}
15411
15412// Handle DAG combine for STORE (FP_TO_INT F).
15413SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15414 DAGCombinerInfo &DCI) const {
15415 SelectionDAG &DAG = DCI.DAG;
15416 SDLoc dl(N);
15417 unsigned Opcode = N->getOperand(1).getOpcode();
15418 (void)Opcode;
15419 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15420
15421 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15422 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15423 && "Not a FP_TO_INT Instruction!");
15424
15425 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15426 EVT Op1VT = N->getOperand(1).getValueType();
15427 EVT ResVT = Val.getValueType();
15428
15429 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15430 return SDValue();
15431
15432 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15433 bool ValidTypeForStoreFltAsInt =
15434 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15435 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15436
15437 // TODO: Lower conversion from f128 on all VSX targets
15438 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15439 return SDValue();
15440
15441 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15442 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15443 return SDValue();
15444
15445 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15446
15447 // Set number of bytes being converted.
15448 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15449 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15450 DAG.getIntPtrConstant(ByteSize, dl, false),
15451 DAG.getValueType(Op1VT)};
15452
15454 DAG.getVTList(MVT::Other), Ops,
15455 cast<StoreSDNode>(N)->getMemoryVT(),
15456 cast<StoreSDNode>(N)->getMemOperand());
15457
15458 return Val;
15459}
15460
15461static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15462 // Check that the source of the element keeps flipping
15463 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15464 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15465 for (int i = 1, e = Mask.size(); i < e; i++) {
15466 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15467 return false;
15468 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15469 return false;
15470 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15471 }
15472 return true;
15473}
15474
15475static bool isSplatBV(SDValue Op) {
15476 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15477 return false;
15478 SDValue FirstOp;
15479
15480 // Find first non-undef input.
15481 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15482 FirstOp = Op.getOperand(i);
15483 if (!FirstOp.isUndef())
15484 break;
15485 }
15486
15487 // All inputs are undef or the same as the first non-undef input.
15488 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15489 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15490 return false;
15491 return true;
15492}
15493
15495 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15496 return Op;
15497 if (Op.getOpcode() != ISD::BITCAST)
15498 return SDValue();
15499 Op = Op.getOperand(0);
15500 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15501 return Op;
15502 return SDValue();
15503}
15504
15505// Fix up the shuffle mask to account for the fact that the result of
15506// scalar_to_vector is not in lane zero. This just takes all values in
15507// the ranges specified by the min/max indices and adds the number of
15508// elements required to ensure each element comes from the respective
15509// position in the valid lane.
15510// On little endian, that's just the corresponding element in the other
15511// half of the vector. On big endian, it is in the same half but right
15512// justified rather than left justified in that half.
15514 int LHSMaxIdx, int RHSMinIdx,
15515 int RHSMaxIdx, int HalfVec,
15516 unsigned ValidLaneWidth,
15517 const PPCSubtarget &Subtarget) {
15518 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15519 int Idx = ShuffV[i];
15520 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15521 ShuffV[i] +=
15522 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15523 }
15524}
15525
15526// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15527// the original is:
15528// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15529// In such a case, just change the shuffle mask to extract the element
15530// from the permuted index.
15532 const PPCSubtarget &Subtarget) {
15533 SDLoc dl(OrigSToV);
15534 EVT VT = OrigSToV.getValueType();
15535 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15536 "Expecting a SCALAR_TO_VECTOR here");
15537 SDValue Input = OrigSToV.getOperand(0);
15538
15539 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15540 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15541 SDValue OrigVector = Input.getOperand(0);
15542
15543 // Can't handle non-const element indices or different vector types
15544 // for the input to the extract and the output of the scalar_to_vector.
15545 if (Idx && VT == OrigVector.getValueType()) {
15546 unsigned NumElts = VT.getVectorNumElements();
15547 assert(
15548 NumElts > 1 &&
15549 "Cannot produce a permuted scalar_to_vector for one element vector");
15550 SmallVector<int, 16> NewMask(NumElts, -1);
15551 unsigned ResultInElt = NumElts / 2;
15552 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15553 NewMask[ResultInElt] = Idx->getZExtValue();
15554 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15555 }
15556 }
15557 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15558 OrigSToV.getOperand(0));
15559}
15560
15561// On little endian subtargets, combine shuffles such as:
15562// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15563// into:
15564// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15565// because the latter can be matched to a single instruction merge.
15566// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15567// to put the value into element zero. Adjust the shuffle mask so that the
15568// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15569// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15570// nodes with elements smaller than doubleword because all the ways
15571// of getting scalar data into a vector register put the value in the
15572// rightmost element of the left half of the vector.
15573SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15574 SelectionDAG &DAG) const {
15575 SDValue LHS = SVN->getOperand(0);
15576 SDValue RHS = SVN->getOperand(1);
15577 auto Mask = SVN->getMask();
15578 int NumElts = LHS.getValueType().getVectorNumElements();
15579 SDValue Res(SVN, 0);
15580 SDLoc dl(SVN);
15581 bool IsLittleEndian = Subtarget.isLittleEndian();
15582
15583 // On big endian targets this is only useful for subtargets with direct moves.
15584 // On little endian targets it would be useful for all subtargets with VSX.
15585 // However adding special handling for LE subtargets without direct moves
15586 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15587 // which includes direct moves.
15588 if (!Subtarget.hasDirectMove())
15589 return Res;
15590
15591 // If this is not a shuffle of a shuffle and the first element comes from
15592 // the second vector, canonicalize to the commuted form. This will make it
15593 // more likely to match one of the single instruction patterns.
15594 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15595 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15596 std::swap(LHS, RHS);
15597 Res = DAG.getCommutedVectorShuffle(*SVN);
15598 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15599 }
15600
15601 // Adjust the shuffle mask if either input vector comes from a
15602 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15603 // form (to prevent the need for a swap).
15604 SmallVector<int, 16> ShuffV(Mask);
15605 SDValue SToVLHS = isScalarToVec(LHS);
15606 SDValue SToVRHS = isScalarToVec(RHS);
15607 if (SToVLHS || SToVRHS) {
15608 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15609 // same type and have differing element sizes, then do not perform
15610 // the following transformation. The current transformation for
15611 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15612 // element size. This will be updated in the future to account for
15613 // differing sizes of the LHS and RHS.
15614 if (SToVLHS && SToVRHS &&
15615 (SToVLHS.getValueType().getScalarSizeInBits() !=
15616 SToVRHS.getValueType().getScalarSizeInBits()))
15617 return Res;
15618
15619 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15620 : SToVRHS.getValueType().getVectorNumElements();
15621 int NumEltsOut = ShuffV.size();
15622 // The width of the "valid lane" (i.e. the lane that contains the value that
15623 // is vectorized) needs to be expressed in terms of the number of elements
15624 // of the shuffle. It is thereby the ratio of the values before and after
15625 // any bitcast.
15626 unsigned ValidLaneWidth =
15627 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15628 LHS.getValueType().getScalarSizeInBits()
15629 : SToVRHS.getValueType().getScalarSizeInBits() /
15630 RHS.getValueType().getScalarSizeInBits();
15631
15632 // Initially assume that neither input is permuted. These will be adjusted
15633 // accordingly if either input is.
15634 int LHSMaxIdx = -1;
15635 int RHSMinIdx = -1;
15636 int RHSMaxIdx = -1;
15637 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15638
15639 // Get the permuted scalar to vector nodes for the source(s) that come from
15640 // ISD::SCALAR_TO_VECTOR.
15641 // On big endian systems, this only makes sense for element sizes smaller
15642 // than 64 bits since for 64-bit elements, all instructions already put
15643 // the value into element zero. Since scalar size of LHS and RHS may differ
15644 // after isScalarToVec, this should be checked using their own sizes.
15645 if (SToVLHS) {
15646 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15647 return Res;
15648 // Set up the values for the shuffle vector fixup.
15649 LHSMaxIdx = NumEltsOut / NumEltsIn;
15650 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15651 if (SToVLHS.getValueType() != LHS.getValueType())
15652 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15653 LHS = SToVLHS;
15654 }
15655 if (SToVRHS) {
15656 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15657 return Res;
15658 RHSMinIdx = NumEltsOut;
15659 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15660 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15661 if (SToVRHS.getValueType() != RHS.getValueType())
15662 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15663 RHS = SToVRHS;
15664 }
15665
15666 // Fix up the shuffle mask to reflect where the desired element actually is.
15667 // The minimum and maximum indices that correspond to element zero for both
15668 // the LHS and RHS are computed and will control which shuffle mask entries
15669 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15670 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15671 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15672 HalfVec, ValidLaneWidth, Subtarget);
15673 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15674
15675 // We may have simplified away the shuffle. We won't be able to do anything
15676 // further with it here.
15677 if (!isa<ShuffleVectorSDNode>(Res))
15678 return Res;
15679 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15680 }
15681
15682 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15683 // The common case after we commuted the shuffle is that the RHS is a splat
15684 // and we have elements coming in from the splat at indices that are not
15685 // conducive to using a merge.
15686 // Example:
15687 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15688 if (!isSplatBV(TheSplat))
15689 return Res;
15690
15691 // We are looking for a mask such that all even elements are from
15692 // one vector and all odd elements from the other.
15693 if (!isAlternatingShuffMask(Mask, NumElts))
15694 return Res;
15695
15696 // Adjust the mask so we are pulling in the same index from the splat
15697 // as the index from the interesting vector in consecutive elements.
15698 if (IsLittleEndian) {
15699 // Example (even elements from first vector):
15700 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15701 if (Mask[0] < NumElts)
15702 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15703 if (ShuffV[i] < 0)
15704 continue;
15705 // If element from non-splat is undef, pick first element from splat.
15706 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15707 }
15708 // Example (odd elements from first vector):
15709 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15710 else
15711 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15712 if (ShuffV[i] < 0)
15713 continue;
15714 // If element from non-splat is undef, pick first element from splat.
15715 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15716 }
15717 } else {
15718 // Example (even elements from first vector):
15719 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15720 if (Mask[0] < NumElts)
15721 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15722 if (ShuffV[i] < 0)
15723 continue;
15724 // If element from non-splat is undef, pick first element from splat.
15725 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15726 }
15727 // Example (odd elements from first vector):
15728 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15729 else
15730 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15731 if (ShuffV[i] < 0)
15732 continue;
15733 // If element from non-splat is undef, pick first element from splat.
15734 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15735 }
15736 }
15737
15738 // If the RHS has undefs, we need to remove them since we may have created
15739 // a shuffle that adds those instead of the splat value.
15740 SDValue SplatVal =
15741 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15742 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15743
15744 if (IsLittleEndian)
15745 RHS = TheSplat;
15746 else
15747 LHS = TheSplat;
15748 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15749}
15750
15751SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15752 LSBaseSDNode *LSBase,
15753 DAGCombinerInfo &DCI) const {
15754 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15755 "Not a reverse memop pattern!");
15756
15757 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15758 auto Mask = SVN->getMask();
15759 int i = 0;
15760 auto I = Mask.rbegin();
15761 auto E = Mask.rend();
15762
15763 for (; I != E; ++I) {
15764 if (*I != i)
15765 return false;
15766 i++;
15767 }
15768 return true;
15769 };
15770
15771 SelectionDAG &DAG = DCI.DAG;
15772 EVT VT = SVN->getValueType(0);
15773
15774 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15775 return SDValue();
15776
15777 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15778 // See comment in PPCVSXSwapRemoval.cpp.
15779 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15780 if (!Subtarget.hasP9Vector())
15781 return SDValue();
15782
15783 if(!IsElementReverse(SVN))
15784 return SDValue();
15785
15786 if (LSBase->getOpcode() == ISD::LOAD) {
15787 // If the load return value 0 has more than one user except the
15788 // shufflevector instruction, it is not profitable to replace the
15789 // shufflevector with a reverse load.
15790 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15791 UI != UE; ++UI)
15792 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15793 return SDValue();
15794
15795 SDLoc dl(LSBase);
15796 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15797 return DAG.getMemIntrinsicNode(
15798 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15799 LSBase->getMemoryVT(), LSBase->getMemOperand());
15800 }
15801
15802 if (LSBase->getOpcode() == ISD::STORE) {
15803 // If there are other uses of the shuffle, the swap cannot be avoided.
15804 // Forcing the use of an X-Form (since swapped stores only have
15805 // X-Forms) without removing the swap is unprofitable.
15806 if (!SVN->hasOneUse())
15807 return SDValue();
15808
15809 SDLoc dl(LSBase);
15810 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15811 LSBase->getBasePtr()};
15812 return DAG.getMemIntrinsicNode(
15813 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15814 LSBase->getMemoryVT(), LSBase->getMemOperand());
15815 }
15816
15817 llvm_unreachable("Expected a load or store node here");
15818}
15819
15820static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15821 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15822 if (IntrinsicID == Intrinsic::ppc_stdcx)
15823 StoreWidth = 8;
15824 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15825 StoreWidth = 4;
15826 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15827 StoreWidth = 2;
15828 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15829 StoreWidth = 1;
15830 else
15831 return false;
15832 return true;
15833}
15834
15836 DAGCombinerInfo &DCI) const {
15837 SelectionDAG &DAG = DCI.DAG;
15838 SDLoc dl(N);
15839 switch (N->getOpcode()) {
15840 default: break;
15841 case ISD::ADD:
15842 return combineADD(N, DCI);
15843 case ISD::AND: {
15844 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15845 // original input as that will prevent us from selecting optimal rotates.
15846 // This only matters if the input to the extend is i32 widened to i64.
15847 SDValue Op1 = N->getOperand(0);
15848 SDValue Op2 = N->getOperand(1);
15849 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15850 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15851 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15852 Op1.getOperand(0).getValueType() != MVT::i32)
15853 break;
15854 SDValue NarrowOp = Op1.getOperand(0);
15855 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15856 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15857 break;
15858
15859 uint64_t Imm = Op2->getAsZExtVal();
15860 // Make sure that the constant is narrow enough to fit in the narrow type.
15861 if (!isUInt<32>(Imm))
15862 break;
15863 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15864 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15865 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15866 }
15867 case ISD::SHL:
15868 return combineSHL(N, DCI);
15869 case ISD::SRA:
15870 return combineSRA(N, DCI);
15871 case ISD::SRL:
15872 return combineSRL(N, DCI);
15873 case ISD::MUL:
15874 return combineMUL(N, DCI);
15875 case ISD::FMA:
15876 case PPCISD::FNMSUB:
15877 return combineFMALike(N, DCI);
15878 case PPCISD::SHL:
15879 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15880 return N->getOperand(0);
15881 break;
15882 case PPCISD::SRL:
15883 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15884 return N->getOperand(0);
15885 break;
15886 case PPCISD::SRA:
15887 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15888 if (C->isZero() || // 0 >>s V -> 0.
15889 C->isAllOnes()) // -1 >>s V -> -1.
15890 return N->getOperand(0);
15891 }
15892 break;
15893 case ISD::SIGN_EXTEND:
15894 case ISD::ZERO_EXTEND:
15895 case ISD::ANY_EXTEND:
15896 return DAGCombineExtBoolTrunc(N, DCI);
15897 case ISD::TRUNCATE:
15898 return combineTRUNCATE(N, DCI);
15899 case ISD::SETCC:
15900 if (SDValue CSCC = combineSetCC(N, DCI))
15901 return CSCC;
15902 [[fallthrough]];
15903 case ISD::SELECT_CC:
15904 return DAGCombineTruncBoolExt(N, DCI);
15905 case ISD::SINT_TO_FP:
15906 case ISD::UINT_TO_FP:
15907 return combineFPToIntToFP(N, DCI);
15909 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15910 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15911 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15912 }
15913 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15914 case ISD::STORE: {
15915
15916 EVT Op1VT = N->getOperand(1).getValueType();
15917 unsigned Opcode = N->getOperand(1).getOpcode();
15918
15919 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15920 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15921 SDValue Val = combineStoreFPToInt(N, DCI);
15922 if (Val)
15923 return Val;
15924 }
15925
15926 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15927 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15928 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15929 if (Val)
15930 return Val;
15931 }
15932
15933 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15934 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15935 N->getOperand(1).getNode()->hasOneUse() &&
15936 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15937 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15938
15939 // STBRX can only handle simple types and it makes no sense to store less
15940 // two bytes in byte-reversed order.
15941 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15942 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15943 break;
15944
15945 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15946 // Do an any-extend to 32-bits if this is a half-word input.
15947 if (BSwapOp.getValueType() == MVT::i16)
15948 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15949
15950 // If the type of BSWAP operand is wider than stored memory width
15951 // it need to be shifted to the right side before STBRX.
15952 if (Op1VT.bitsGT(mVT)) {
15953 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15954 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15955 DAG.getConstant(Shift, dl, MVT::i32));
15956 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15957 if (Op1VT == MVT::i64)
15958 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15959 }
15960
15961 SDValue Ops[] = {
15962 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15963 };
15964 return
15965 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15966 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15967 cast<StoreSDNode>(N)->getMemOperand());
15968 }
15969
15970 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15971 // So it can increase the chance of CSE constant construction.
15972 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15973 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15974 // Need to sign-extended to 64-bits to handle negative values.
15975 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15976 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15977 MemVT.getSizeInBits());
15978 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15979
15980 // DAG.getTruncStore() can't be used here because it doesn't accept
15981 // the general (base + offset) addressing mode.
15982 // So we use UpdateNodeOperands and setTruncatingStore instead.
15983 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15984 N->getOperand(3));
15985 cast<StoreSDNode>(N)->setTruncatingStore(true);
15986 return SDValue(N, 0);
15987 }
15988
15989 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15990 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15991 if (Op1VT.isSimple()) {
15992 MVT StoreVT = Op1VT.getSimpleVT();
15993 if (Subtarget.needsSwapsForVSXMemOps() &&
15994 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15995 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15996 return expandVSXStoreForLE(N, DCI);
15997 }
15998 break;
15999 }
16000 case ISD::LOAD: {
16001 LoadSDNode *LD = cast<LoadSDNode>(N);
16002 EVT VT = LD->getValueType(0);
16003
16004 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16005 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16006 if (VT.isSimple()) {
16007 MVT LoadVT = VT.getSimpleVT();
16008 if (Subtarget.needsSwapsForVSXMemOps() &&
16009 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16010 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16011 return expandVSXLoadForLE(N, DCI);
16012 }
16013
16014 // We sometimes end up with a 64-bit integer load, from which we extract
16015 // two single-precision floating-point numbers. This happens with
16016 // std::complex<float>, and other similar structures, because of the way we
16017 // canonicalize structure copies. However, if we lack direct moves,
16018 // then the final bitcasts from the extracted integer values to the
16019 // floating-point numbers turn into store/load pairs. Even with direct moves,
16020 // just loading the two floating-point numbers is likely better.
16021 auto ReplaceTwoFloatLoad = [&]() {
16022 if (VT != MVT::i64)
16023 return false;
16024
16025 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16026 LD->isVolatile())
16027 return false;
16028
16029 // We're looking for a sequence like this:
16030 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16031 // t16: i64 = srl t13, Constant:i32<32>
16032 // t17: i32 = truncate t16
16033 // t18: f32 = bitcast t17
16034 // t19: i32 = truncate t13
16035 // t20: f32 = bitcast t19
16036
16037 if (!LD->hasNUsesOfValue(2, 0))
16038 return false;
16039
16040 auto UI = LD->use_begin();
16041 while (UI.getUse().getResNo() != 0) ++UI;
16042 SDNode *Trunc = *UI++;
16043 while (UI.getUse().getResNo() != 0) ++UI;
16044 SDNode *RightShift = *UI;
16045 if (Trunc->getOpcode() != ISD::TRUNCATE)
16046 std::swap(Trunc, RightShift);
16047
16048 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16049 Trunc->getValueType(0) != MVT::i32 ||
16050 !Trunc->hasOneUse())
16051 return false;
16052 if (RightShift->getOpcode() != ISD::SRL ||
16053 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16054 RightShift->getConstantOperandVal(1) != 32 ||
16055 !RightShift->hasOneUse())
16056 return false;
16057
16058 SDNode *Trunc2 = *RightShift->use_begin();
16059 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16060 Trunc2->getValueType(0) != MVT::i32 ||
16061 !Trunc2->hasOneUse())
16062 return false;
16063
16064 SDNode *Bitcast = *Trunc->use_begin();
16065 SDNode *Bitcast2 = *Trunc2->use_begin();
16066
16067 if (Bitcast->getOpcode() != ISD::BITCAST ||
16068 Bitcast->getValueType(0) != MVT::f32)
16069 return false;
16070 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16071 Bitcast2->getValueType(0) != MVT::f32)
16072 return false;
16073
16074 if (Subtarget.isLittleEndian())
16075 std::swap(Bitcast, Bitcast2);
16076
16077 // Bitcast has the second float (in memory-layout order) and Bitcast2
16078 // has the first one.
16079
16080 SDValue BasePtr = LD->getBasePtr();
16081 if (LD->isIndexed()) {
16082 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16083 "Non-pre-inc AM on PPC?");
16084 BasePtr =
16085 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16086 LD->getOffset());
16087 }
16088
16089 auto MMOFlags =
16090 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16091 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16092 LD->getPointerInfo(), LD->getAlign(),
16093 MMOFlags, LD->getAAInfo());
16094 SDValue AddPtr =
16095 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16096 BasePtr, DAG.getIntPtrConstant(4, dl));
16097 SDValue FloatLoad2 = DAG.getLoad(
16098 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16099 LD->getPointerInfo().getWithOffset(4),
16100 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16101
16102 if (LD->isIndexed()) {
16103 // Note that DAGCombine should re-form any pre-increment load(s) from
16104 // what is produced here if that makes sense.
16105 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16106 }
16107
16108 DCI.CombineTo(Bitcast2, FloatLoad);
16109 DCI.CombineTo(Bitcast, FloatLoad2);
16110
16111 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16112 SDValue(FloatLoad2.getNode(), 1));
16113 return true;
16114 };
16115
16116 if (ReplaceTwoFloatLoad())
16117 return SDValue(N, 0);
16118
16119 EVT MemVT = LD->getMemoryVT();
16120 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16121 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16122 if (LD->isUnindexed() && VT.isVector() &&
16123 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16124 // P8 and later hardware should just use LOAD.
16125 !Subtarget.hasP8Vector() &&
16126 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16127 VT == MVT::v4f32))) &&
16128 LD->getAlign() < ABIAlignment) {
16129 // This is a type-legal unaligned Altivec load.
16130 SDValue Chain = LD->getChain();
16131 SDValue Ptr = LD->getBasePtr();
16132 bool isLittleEndian = Subtarget.isLittleEndian();
16133
16134 // This implements the loading of unaligned vectors as described in
16135 // the venerable Apple Velocity Engine overview. Specifically:
16136 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16137 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16138 //
16139 // The general idea is to expand a sequence of one or more unaligned
16140 // loads into an alignment-based permutation-control instruction (lvsl
16141 // or lvsr), a series of regular vector loads (which always truncate
16142 // their input address to an aligned address), and a series of
16143 // permutations. The results of these permutations are the requested
16144 // loaded values. The trick is that the last "extra" load is not taken
16145 // from the address you might suspect (sizeof(vector) bytes after the
16146 // last requested load), but rather sizeof(vector) - 1 bytes after the
16147 // last requested vector. The point of this is to avoid a page fault if
16148 // the base address happened to be aligned. This works because if the
16149 // base address is aligned, then adding less than a full vector length
16150 // will cause the last vector in the sequence to be (re)loaded.
16151 // Otherwise, the next vector will be fetched as you might suspect was
16152 // necessary.
16153
16154 // We might be able to reuse the permutation generation from
16155 // a different base address offset from this one by an aligned amount.
16156 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16157 // optimization later.
16158 Intrinsic::ID Intr, IntrLD, IntrPerm;
16159 MVT PermCntlTy, PermTy, LDTy;
16160 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16161 : Intrinsic::ppc_altivec_lvsl;
16162 IntrLD = Intrinsic::ppc_altivec_lvx;
16163 IntrPerm = Intrinsic::ppc_altivec_vperm;
16164 PermCntlTy = MVT::v16i8;
16165 PermTy = MVT::v4i32;
16166 LDTy = MVT::v4i32;
16167
16168 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16169
16170 // Create the new MMO for the new base load. It is like the original MMO,
16171 // but represents an area in memory almost twice the vector size centered
16172 // on the original address. If the address is unaligned, we might start
16173 // reading up to (sizeof(vector)-1) bytes below the address of the
16174 // original unaligned load.
16176 MachineMemOperand *BaseMMO =
16177 MF.getMachineMemOperand(LD->getMemOperand(),
16178 -(int64_t)MemVT.getStoreSize()+1,
16179 2*MemVT.getStoreSize()-1);
16180
16181 // Create the new base load.
16182 SDValue LDXIntID =
16183 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16184 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16185 SDValue BaseLoad =
16187 DAG.getVTList(PermTy, MVT::Other),
16188 BaseLoadOps, LDTy, BaseMMO);
16189
16190 // Note that the value of IncOffset (which is provided to the next
16191 // load's pointer info offset value, and thus used to calculate the
16192 // alignment), and the value of IncValue (which is actually used to
16193 // increment the pointer value) are different! This is because we
16194 // require the next load to appear to be aligned, even though it
16195 // is actually offset from the base pointer by a lesser amount.
16196 int IncOffset = VT.getSizeInBits() / 8;
16197 int IncValue = IncOffset;
16198
16199 // Walk (both up and down) the chain looking for another load at the real
16200 // (aligned) offset (the alignment of the other load does not matter in
16201 // this case). If found, then do not use the offset reduction trick, as
16202 // that will prevent the loads from being later combined (as they would
16203 // otherwise be duplicates).
16204 if (!findConsecutiveLoad(LD, DAG))
16205 --IncValue;
16206
16207 SDValue Increment =
16208 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16209 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16210
16211 MachineMemOperand *ExtraMMO =
16212 MF.getMachineMemOperand(LD->getMemOperand(),
16213 1, 2*MemVT.getStoreSize()-1);
16214 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16215 SDValue ExtraLoad =
16217 DAG.getVTList(PermTy, MVT::Other),
16218 ExtraLoadOps, LDTy, ExtraMMO);
16219
16220 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16221 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16222
16223 // Because vperm has a big-endian bias, we must reverse the order
16224 // of the input vectors and complement the permute control vector
16225 // when generating little endian code. We have already handled the
16226 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16227 // and ExtraLoad here.
16228 SDValue Perm;
16229 if (isLittleEndian)
16230 Perm = BuildIntrinsicOp(IntrPerm,
16231 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16232 else
16233 Perm = BuildIntrinsicOp(IntrPerm,
16234 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16235
16236 if (VT != PermTy)
16237 Perm = Subtarget.hasAltivec()
16238 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16239 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16240 DAG.getTargetConstant(1, dl, MVT::i64));
16241 // second argument is 1 because this rounding
16242 // is always exact.
16243
16244 // The output of the permutation is our loaded result, the TokenFactor is
16245 // our new chain.
16246 DCI.CombineTo(N, Perm, TF);
16247 return SDValue(N, 0);
16248 }
16249 }
16250 break;
16252 bool isLittleEndian = Subtarget.isLittleEndian();
16253 unsigned IID = N->getConstantOperandVal(0);
16254 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16255 : Intrinsic::ppc_altivec_lvsl);
16256 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16257 SDValue Add = N->getOperand(1);
16258
16259 int Bits = 4 /* 16 byte alignment */;
16260
16261 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16262 APInt::getAllOnes(Bits /* alignment */)
16263 .zext(Add.getScalarValueSizeInBits()))) {
16264 SDNode *BasePtr = Add->getOperand(0).getNode();
16265 for (SDNode *U : BasePtr->uses()) {
16266 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16267 U->getConstantOperandVal(0) == IID) {
16268 // We've found another LVSL/LVSR, and this address is an aligned
16269 // multiple of that one. The results will be the same, so use the
16270 // one we've just found instead.
16271
16272 return SDValue(U, 0);
16273 }
16274 }
16275 }
16276
16277 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16278 SDNode *BasePtr = Add->getOperand(0).getNode();
16279 for (SDNode *U : BasePtr->uses()) {
16280 if (U->getOpcode() == ISD::ADD &&
16281 isa<ConstantSDNode>(U->getOperand(1)) &&
16282 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16283 (1ULL << Bits) ==
16284 0) {
16285 SDNode *OtherAdd = U;
16286 for (SDNode *V : OtherAdd->uses()) {
16287 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16288 V->getConstantOperandVal(0) == IID) {
16289 return SDValue(V, 0);
16290 }
16291 }
16292 }
16293 }
16294 }
16295 }
16296
16297 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16298 // Expose the vabsduw/h/b opportunity for down stream
16299 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16300 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16301 IID == Intrinsic::ppc_altivec_vmaxsh ||
16302 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16303 SDValue V1 = N->getOperand(1);
16304 SDValue V2 = N->getOperand(2);
16305 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16306 V1.getSimpleValueType() == MVT::v8i16 ||
16307 V1.getSimpleValueType() == MVT::v16i8) &&
16308 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16309 // (0-a, a)
16310 if (V1.getOpcode() == ISD::SUB &&
16312 V1.getOperand(1) == V2) {
16313 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16314 }
16315 // (a, 0-a)
16316 if (V2.getOpcode() == ISD::SUB &&
16317 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16318 V2.getOperand(1) == V1) {
16319 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16320 }
16321 // (x-y, y-x)
16322 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16323 V1.getOperand(0) == V2.getOperand(1) &&
16324 V1.getOperand(1) == V2.getOperand(0)) {
16325 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16326 }
16327 }
16328 }
16329 }
16330
16331 break;
16333 switch (N->getConstantOperandVal(1)) {
16334 default:
16335 break;
16336 case Intrinsic::ppc_altivec_vsum4sbs:
16337 case Intrinsic::ppc_altivec_vsum4shs:
16338 case Intrinsic::ppc_altivec_vsum4ubs: {
16339 // These sum-across intrinsics only have a chain due to the side effect
16340 // that they may set the SAT bit. If we know the SAT bit will not be set
16341 // for some inputs, we can replace any uses of their chain with the
16342 // input chain.
16343 if (BuildVectorSDNode *BVN =
16344 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16345 APInt APSplatBits, APSplatUndef;
16346 unsigned SplatBitSize;
16347 bool HasAnyUndefs;
16348 bool BVNIsConstantSplat = BVN->isConstantSplat(
16349 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16350 !Subtarget.isLittleEndian());
16351 // If the constant splat vector is 0, the SAT bit will not be set.
16352 if (BVNIsConstantSplat && APSplatBits == 0)
16353 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16354 }
16355 return SDValue();
16356 }
16357 case Intrinsic::ppc_vsx_lxvw4x:
16358 case Intrinsic::ppc_vsx_lxvd2x:
16359 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16360 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16361 if (Subtarget.needsSwapsForVSXMemOps())
16362 return expandVSXLoadForLE(N, DCI);
16363 break;
16364 }
16365 break;
16367 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16368 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16369 if (Subtarget.needsSwapsForVSXMemOps()) {
16370 switch (N->getConstantOperandVal(1)) {
16371 default:
16372 break;
16373 case Intrinsic::ppc_vsx_stxvw4x:
16374 case Intrinsic::ppc_vsx_stxvd2x:
16375 return expandVSXStoreForLE(N, DCI);
16376 }
16377 }
16378 break;
16379 case ISD::BSWAP: {
16380 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16381 // For subtargets without LDBRX, we can still do better than the default
16382 // expansion even for 64-bit BSWAP (LOAD).
16383 bool Is64BitBswapOn64BitTgt =
16384 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16385 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16386 N->getOperand(0).hasOneUse();
16387 if (IsSingleUseNormalLd &&
16388 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16389 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16390 SDValue Load = N->getOperand(0);
16391 LoadSDNode *LD = cast<LoadSDNode>(Load);
16392 // Create the byte-swapping load.
16393 SDValue Ops[] = {
16394 LD->getChain(), // Chain
16395 LD->getBasePtr(), // Ptr
16396 DAG.getValueType(N->getValueType(0)) // VT
16397 };
16398 SDValue BSLoad =
16400 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16401 MVT::i64 : MVT::i32, MVT::Other),
16402 Ops, LD->getMemoryVT(), LD->getMemOperand());
16403
16404 // If this is an i16 load, insert the truncate.
16405 SDValue ResVal = BSLoad;
16406 if (N->getValueType(0) == MVT::i16)
16407 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16408
16409 // First, combine the bswap away. This makes the value produced by the
16410 // load dead.
16411 DCI.CombineTo(N, ResVal);
16412
16413 // Next, combine the load away, we give it a bogus result value but a real
16414 // chain result. The result value is dead because the bswap is dead.
16415 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16416
16417 // Return N so it doesn't get rechecked!
16418 return SDValue(N, 0);
16419 }
16420 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16421 // before legalization so that the BUILD_PAIR is handled correctly.
16422 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16423 !IsSingleUseNormalLd)
16424 return SDValue();
16425 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16426
16427 // Can't split volatile or atomic loads.
16428 if (!LD->isSimple())
16429 return SDValue();
16430 SDValue BasePtr = LD->getBasePtr();
16431 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16432 LD->getPointerInfo(), LD->getAlign());
16433 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16434 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16435 DAG.getIntPtrConstant(4, dl));
16437 LD->getMemOperand(), 4, 4);
16438 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16439 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16440 SDValue Res;
16441 if (Subtarget.isLittleEndian())
16442 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16443 else
16444 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16445 SDValue TF =
16446 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16447 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16448 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16449 return Res;
16450 }
16451 case PPCISD::VCMP:
16452 // If a VCMP_rec node already exists with exactly the same operands as this
16453 // node, use its result instead of this node (VCMP_rec computes both a CR6
16454 // and a normal output).
16455 //
16456 if (!N->getOperand(0).hasOneUse() &&
16457 !N->getOperand(1).hasOneUse() &&
16458 !N->getOperand(2).hasOneUse()) {
16459
16460 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16461 SDNode *VCMPrecNode = nullptr;
16462
16463 SDNode *LHSN = N->getOperand(0).getNode();
16464 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16465 UI != E; ++UI)
16466 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16467 UI->getOperand(1) == N->getOperand(1) &&
16468 UI->getOperand(2) == N->getOperand(2) &&
16469 UI->getOperand(0) == N->getOperand(0)) {
16470 VCMPrecNode = *UI;
16471 break;
16472 }
16473
16474 // If there is no VCMP_rec node, or if the flag value has a single use,
16475 // don't transform this.
16476 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16477 break;
16478
16479 // Look at the (necessarily single) use of the flag value. If it has a
16480 // chain, this transformation is more complex. Note that multiple things
16481 // could use the value result, which we should ignore.
16482 SDNode *FlagUser = nullptr;
16483 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16484 FlagUser == nullptr; ++UI) {
16485 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16486 SDNode *User = *UI;
16487 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16488 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16489 FlagUser = User;
16490 break;
16491 }
16492 }
16493 }
16494
16495 // If the user is a MFOCRF instruction, we know this is safe.
16496 // Otherwise we give up for right now.
16497 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16498 return SDValue(VCMPrecNode, 0);
16499 }
16500 break;
16501 case ISD::BR_CC: {
16502 // If this is a branch on an altivec predicate comparison, lower this so
16503 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16504 // lowering is done pre-legalize, because the legalizer lowers the predicate
16505 // compare down to code that is difficult to reassemble.
16506 // This code also handles branches that depend on the result of a store
16507 // conditional.
16508 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16509 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16510
16511 int CompareOpc;
16512 bool isDot;
16513
16514 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16515 break;
16516
16517 // Since we are doing this pre-legalize, the RHS can be a constant of
16518 // arbitrary bitwidth which may cause issues when trying to get the value
16519 // from the underlying APInt.
16520 auto RHSAPInt = RHS->getAsAPIntVal();
16521 if (!RHSAPInt.isIntN(64))
16522 break;
16523
16524 unsigned Val = RHSAPInt.getZExtValue();
16525 auto isImpossibleCompare = [&]() {
16526 // If this is a comparison against something other than 0/1, then we know
16527 // that the condition is never/always true.
16528 if (Val != 0 && Val != 1) {
16529 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16530 return N->getOperand(0);
16531 // Always !=, turn it into an unconditional branch.
16532 return DAG.getNode(ISD::BR, dl, MVT::Other,
16533 N->getOperand(0), N->getOperand(4));
16534 }
16535 return SDValue();
16536 };
16537 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16538 unsigned StoreWidth = 0;
16539 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16540 isStoreConditional(LHS, StoreWidth)) {
16541 if (SDValue Impossible = isImpossibleCompare())
16542 return Impossible;
16543 PPC::Predicate CompOpc;
16544 // eq 0 => ne
16545 // ne 0 => eq
16546 // eq 1 => eq
16547 // ne 1 => ne
16548 if (Val == 0)
16549 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16550 else
16551 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16552
16553 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16554 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16555 auto *MemNode = cast<MemSDNode>(LHS);
16556 SDValue ConstSt = DAG.getMemIntrinsicNode(
16558 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16559 MemNode->getMemoryVT(), MemNode->getMemOperand());
16560
16561 SDValue InChain;
16562 // Unchain the branch from the original store conditional.
16563 if (N->getOperand(0) == LHS.getValue(1))
16564 InChain = LHS.getOperand(0);
16565 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16566 SmallVector<SDValue, 4> InChains;
16567 SDValue InTF = N->getOperand(0);
16568 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16569 if (InTF.getOperand(i) != LHS.getValue(1))
16570 InChains.push_back(InTF.getOperand(i));
16571 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16572 }
16573
16574 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16575 DAG.getConstant(CompOpc, dl, MVT::i32),
16576 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16577 ConstSt.getValue(2));
16578 }
16579
16580 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16581 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16582 assert(isDot && "Can't compare against a vector result!");
16583
16584 if (SDValue Impossible = isImpossibleCompare())
16585 return Impossible;
16586
16587 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16588 // Create the PPCISD altivec 'dot' comparison node.
16589 SDValue Ops[] = {
16590 LHS.getOperand(2), // LHS of compare
16591 LHS.getOperand(3), // RHS of compare
16592 DAG.getConstant(CompareOpc, dl, MVT::i32)
16593 };
16594 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16595 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16596
16597 // Unpack the result based on how the target uses it.
16598 PPC::Predicate CompOpc;
16599 switch (LHS.getConstantOperandVal(1)) {
16600 default: // Can't happen, don't crash on invalid number though.
16601 case 0: // Branch on the value of the EQ bit of CR6.
16602 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16603 break;
16604 case 1: // Branch on the inverted value of the EQ bit of CR6.
16605 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16606 break;
16607 case 2: // Branch on the value of the LT bit of CR6.
16608 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16609 break;
16610 case 3: // Branch on the inverted value of the LT bit of CR6.
16611 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16612 break;
16613 }
16614
16615 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16616 DAG.getConstant(CompOpc, dl, MVT::i32),
16617 DAG.getRegister(PPC::CR6, MVT::i32),
16618 N->getOperand(4), CompNode.getValue(1));
16619 }
16620 break;
16621 }
16622 case ISD::BUILD_VECTOR:
16623 return DAGCombineBuildVector(N, DCI);
16624 }
16625
16626 return SDValue();
16627}
16628
16629SDValue
16631 SelectionDAG &DAG,
16632 SmallVectorImpl<SDNode *> &Created) const {
16633 // fold (sdiv X, pow2)
16634 EVT VT = N->getValueType(0);
16635 if (VT == MVT::i64 && !Subtarget.isPPC64())
16636 return SDValue();
16637 if ((VT != MVT::i32 && VT != MVT::i64) ||
16638 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16639 return SDValue();
16640
16641 SDLoc DL(N);
16642 SDValue N0 = N->getOperand(0);
16643
16644 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16645 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16646 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16647
16648 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16649 Created.push_back(Op.getNode());
16650
16651 if (IsNegPow2) {
16652 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16653 Created.push_back(Op.getNode());
16654 }
16655
16656 return Op;
16657}
16658
16659//===----------------------------------------------------------------------===//
16660// Inline Assembly Support
16661//===----------------------------------------------------------------------===//
16662
16664 KnownBits &Known,
16665 const APInt &DemandedElts,
16666 const SelectionDAG &DAG,
16667 unsigned Depth) const {
16668 Known.resetAll();
16669 switch (Op.getOpcode()) {
16670 default: break;
16671 case PPCISD::LBRX: {
16672 // lhbrx is known to have the top bits cleared out.
16673 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16674 Known.Zero = 0xFFFF0000;
16675 break;
16676 }
16678 switch (Op.getConstantOperandVal(0)) {
16679 default: break;
16680 case Intrinsic::ppc_altivec_vcmpbfp_p:
16681 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16682 case Intrinsic::ppc_altivec_vcmpequb_p:
16683 case Intrinsic::ppc_altivec_vcmpequh_p:
16684 case Intrinsic::ppc_altivec_vcmpequw_p:
16685 case Intrinsic::ppc_altivec_vcmpequd_p:
16686 case Intrinsic::ppc_altivec_vcmpequq_p:
16687 case Intrinsic::ppc_altivec_vcmpgefp_p:
16688 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16689 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16690 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16691 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16692 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16693 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16694 case Intrinsic::ppc_altivec_vcmpgtub_p:
16695 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16696 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16697 case Intrinsic::ppc_altivec_vcmpgtud_p:
16698 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16699 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16700 break;
16701 }
16702 break;
16703 }
16705 switch (Op.getConstantOperandVal(1)) {
16706 default:
16707 break;
16708 case Intrinsic::ppc_load2r:
16709 // Top bits are cleared for load2r (which is the same as lhbrx).
16710 Known.Zero = 0xFFFF0000;
16711 break;
16712 }
16713 break;
16714 }
16715 }
16716}
16717
16719 switch (Subtarget.getCPUDirective()) {
16720 default: break;
16721 case PPC::DIR_970:
16722 case PPC::DIR_PWR4:
16723 case PPC::DIR_PWR5:
16724 case PPC::DIR_PWR5X:
16725 case PPC::DIR_PWR6:
16726 case PPC::DIR_PWR6X:
16727 case PPC::DIR_PWR7:
16728 case PPC::DIR_PWR8:
16729 case PPC::DIR_PWR9:
16730 case PPC::DIR_PWR10:
16731 case PPC::DIR_PWR11:
16732 case PPC::DIR_PWR_FUTURE: {
16733 if (!ML)
16734 break;
16735
16737 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16738 // so that we can decrease cache misses and branch-prediction misses.
16739 // Actual alignment of the loop will depend on the hotness check and other
16740 // logic in alignBlocks.
16741 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16742 return Align(32);
16743 }
16744
16745 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16746
16747 // For small loops (between 5 and 8 instructions), align to a 32-byte
16748 // boundary so that the entire loop fits in one instruction-cache line.
16749 uint64_t LoopSize = 0;
16750 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16751 for (const MachineInstr &J : **I) {
16752 LoopSize += TII->getInstSizeInBytes(J);
16753 if (LoopSize > 32)
16754 break;
16755 }
16756
16757 if (LoopSize > 16 && LoopSize <= 32)
16758 return Align(32);
16759
16760 break;
16761 }
16762 }
16763
16765}
16766
16767/// getConstraintType - Given a constraint, return the type of
16768/// constraint it is for this target.
16771 if (Constraint.size() == 1) {
16772 switch (Constraint[0]) {
16773 default: break;
16774 case 'b':
16775 case 'r':
16776 case 'f':
16777 case 'd':
16778 case 'v':
16779 case 'y':
16780 return C_RegisterClass;
16781 case 'Z':
16782 // FIXME: While Z does indicate a memory constraint, it specifically
16783 // indicates an r+r address (used in conjunction with the 'y' modifier
16784 // in the replacement string). Currently, we're forcing the base
16785 // register to be r0 in the asm printer (which is interpreted as zero)
16786 // and forming the complete address in the second register. This is
16787 // suboptimal.
16788 return C_Memory;
16789 }
16790 } else if (Constraint == "wc") { // individual CR bits.
16791 return C_RegisterClass;
16792 } else if (Constraint == "wa" || Constraint == "wd" ||
16793 Constraint == "wf" || Constraint == "ws" ||
16794 Constraint == "wi" || Constraint == "ww") {
16795 return C_RegisterClass; // VSX registers.
16796 }
16797 return TargetLowering::getConstraintType(Constraint);
16798}
16799
16800/// Examine constraint type and operand type and determine a weight value.
16801/// This object must already have been set up with the operand type
16802/// and the current alternative constraint selected.
16805 AsmOperandInfo &info, const char *constraint) const {
16807 Value *CallOperandVal = info.CallOperandVal;
16808 // If we don't have a value, we can't do a match,
16809 // but allow it at the lowest weight.
16810 if (!CallOperandVal)
16811 return CW_Default;
16812 Type *type = CallOperandVal->getType();
16813
16814 // Look at the constraint type.
16815 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16816 return CW_Register; // an individual CR bit.
16817 else if ((StringRef(constraint) == "wa" ||
16818 StringRef(constraint) == "wd" ||
16819 StringRef(constraint) == "wf") &&
16820 type->isVectorTy())
16821 return CW_Register;
16822 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16823 return CW_Register; // just hold 64-bit integers data.
16824 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16825 return CW_Register;
16826 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16827 return CW_Register;
16828
16829 switch (*constraint) {
16830 default:
16832 break;
16833 case 'b':
16834 if (type->isIntegerTy())
16835 weight = CW_Register;
16836 break;
16837 case 'f':
16838 if (type->isFloatTy())
16839 weight = CW_Register;
16840 break;
16841 case 'd':
16842 if (type->isDoubleTy())
16843 weight = CW_Register;
16844 break;
16845 case 'v':
16846 if (type->isVectorTy())
16847 weight = CW_Register;
16848 break;
16849 case 'y':
16850 weight = CW_Register;
16851 break;
16852 case 'Z':
16853 weight = CW_Memory;
16854 break;
16855 }
16856 return weight;
16857}
16858
16859std::pair<unsigned, const TargetRegisterClass *>
16861 StringRef Constraint,
16862 MVT VT) const {
16863 if (Constraint.size() == 1) {
16864 // GCC RS6000 Constraint Letters
16865 switch (Constraint[0]) {
16866 case 'b': // R1-R31
16867 if (VT == MVT::i64 && Subtarget.isPPC64())
16868 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16869 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16870 case 'r': // R0-R31
16871 if (VT == MVT::i64 && Subtarget.isPPC64())
16872 return std::make_pair(0U, &PPC::G8RCRegClass);
16873 return std::make_pair(0U, &PPC::GPRCRegClass);
16874 // 'd' and 'f' constraints are both defined to be "the floating point
16875 // registers", where one is for 32-bit and the other for 64-bit. We don't
16876 // really care overly much here so just give them all the same reg classes.
16877 case 'd':
16878 case 'f':
16879 if (Subtarget.hasSPE()) {
16880 if (VT == MVT::f32 || VT == MVT::i32)
16881 return std::make_pair(0U, &PPC::GPRCRegClass);
16882 if (VT == MVT::f64 || VT == MVT::i64)
16883 return std::make_pair(0U, &PPC::SPERCRegClass);
16884 } else {
16885 if (VT == MVT::f32 || VT == MVT::i32)
16886 return std::make_pair(0U, &PPC::F4RCRegClass);
16887 if (VT == MVT::f64 || VT == MVT::i64)
16888 return std::make_pair(0U, &PPC::F8RCRegClass);
16889 }
16890 break;
16891 case 'v':
16892 if (Subtarget.hasAltivec() && VT.isVector())
16893 return std::make_pair(0U, &PPC::VRRCRegClass);
16894 else if (Subtarget.hasVSX())
16895 // Scalars in Altivec registers only make sense with VSX.
16896 return std::make_pair(0U, &PPC::VFRCRegClass);
16897 break;
16898 case 'y': // crrc
16899 return std::make_pair(0U, &PPC::CRRCRegClass);
16900 }
16901 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16902 // An individual CR bit.
16903 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16904 } else if ((Constraint == "wa" || Constraint == "wd" ||
16905 Constraint == "wf" || Constraint == "wi") &&
16906 Subtarget.hasVSX()) {
16907 // A VSX register for either a scalar (FP) or vector. There is no
16908 // support for single precision scalars on subtargets prior to Power8.
16909 if (VT.isVector())
16910 return std::make_pair(0U, &PPC::VSRCRegClass);
16911 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16912 return std::make_pair(0U, &PPC::VSSRCRegClass);
16913 return std::make_pair(0U, &PPC::VSFRCRegClass);
16914 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16915 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16916 return std::make_pair(0U, &PPC::VSSRCRegClass);
16917 else
16918 return std::make_pair(0U, &PPC::VSFRCRegClass);
16919 } else if (Constraint == "lr") {
16920 if (VT == MVT::i64)
16921 return std::make_pair(0U, &PPC::LR8RCRegClass);
16922 else
16923 return std::make_pair(0U, &PPC::LRRCRegClass);
16924 }
16925
16926 // Handle special cases of physical registers that are not properly handled
16927 // by the base class.
16928 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16929 // If we name a VSX register, we can't defer to the base class because it
16930 // will not recognize the correct register (their names will be VSL{0-31}
16931 // and V{0-31} so they won't match). So we match them here.
16932 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16933 int VSNum = atoi(Constraint.data() + 3);
16934 assert(VSNum >= 0 && VSNum <= 63 &&
16935 "Attempted to access a vsr out of range");
16936 if (VSNum < 32)
16937 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16938 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16939 }
16940
16941 // For float registers, we can't defer to the base class as it will match
16942 // the SPILLTOVSRRC class.
16943 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16944 int RegNum = atoi(Constraint.data() + 2);
16945 if (RegNum > 31 || RegNum < 0)
16946 report_fatal_error("Invalid floating point register number");
16947 if (VT == MVT::f32 || VT == MVT::i32)
16948 return Subtarget.hasSPE()
16949 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16950 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16951 if (VT == MVT::f64 || VT == MVT::i64)
16952 return Subtarget.hasSPE()
16953 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16954 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16955 }
16956 }
16957
16958 std::pair<unsigned, const TargetRegisterClass *> R =
16960
16961 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16962 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16963 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16964 // register.
16965 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16966 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16967 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16968 PPC::GPRCRegClass.contains(R.first))
16969 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16970 PPC::sub_32, &PPC::G8RCRegClass),
16971 &PPC::G8RCRegClass);
16972
16973 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16974 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16975 R.first = PPC::CR0;
16976 R.second = &PPC::CRRCRegClass;
16977 }
16978 // FIXME: This warning should ideally be emitted in the front end.
16979 const auto &TM = getTargetMachine();
16980 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16981 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16982 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16983 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16984 errs() << "warning: vector registers 20 to 32 are reserved in the "
16985 "default AIX AltiVec ABI and cannot be used\n";
16986 }
16987
16988 return R;
16989}
16990
16991/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16992/// vector. If it is invalid, don't add anything to Ops.
16994 StringRef Constraint,
16995 std::vector<SDValue> &Ops,
16996 SelectionDAG &DAG) const {
16997 SDValue Result;
16998
16999 // Only support length 1 constraints.
17000 if (Constraint.size() > 1)
17001 return;
17002
17003 char Letter = Constraint[0];
17004 switch (Letter) {
17005 default: break;
17006 case 'I':
17007 case 'J':
17008 case 'K':
17009 case 'L':
17010 case 'M':
17011 case 'N':
17012 case 'O':
17013 case 'P': {
17014 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
17015 if (!CST) return; // Must be an immediate to match.
17016 SDLoc dl(Op);
17017 int64_t Value = CST->getSExtValue();
17018 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17019 // numbers are printed as such.
17020 switch (Letter) {
17021 default: llvm_unreachable("Unknown constraint letter!");
17022 case 'I': // "I" is a signed 16-bit constant.
17023 if (isInt<16>(Value))
17024 Result = DAG.getTargetConstant(Value, dl, TCVT);
17025 break;
17026 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17027 if (isShiftedUInt<16, 16>(Value))
17028 Result = DAG.getTargetConstant(Value, dl, TCVT);
17029 break;
17030 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17031 if (isShiftedInt<16, 16>(Value))
17032 Result = DAG.getTargetConstant(Value, dl, TCVT);
17033 break;
17034 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17035 if (isUInt<16>(Value))
17036 Result = DAG.getTargetConstant(Value, dl, TCVT);
17037 break;
17038 case 'M': // "M" is a constant that is greater than 31.
17039 if (Value > 31)
17040 Result = DAG.getTargetConstant(Value, dl, TCVT);
17041 break;
17042 case 'N': // "N" is a positive constant that is an exact power of two.
17043 if (Value > 0 && isPowerOf2_64(Value))
17044 Result = DAG.getTargetConstant(Value, dl, TCVT);
17045 break;
17046 case 'O': // "O" is the constant zero.
17047 if (Value == 0)
17048 Result = DAG.getTargetConstant(Value, dl, TCVT);
17049 break;
17050 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17051 if (isInt<16>(-Value))
17052 Result = DAG.getTargetConstant(Value, dl, TCVT);
17053 break;
17054 }
17055 break;
17056 }
17057 }
17058
17059 if (Result.getNode()) {
17060 Ops.push_back(Result);
17061 return;
17062 }
17063
17064 // Handle standard constraint letters.
17065 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17066}
17067
17070 SelectionDAG &DAG) const {
17071 if (I.getNumOperands() <= 1)
17072 return;
17073 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17074 return;
17075 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17076 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17077 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17078 return;
17079
17080 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17081 Ops.push_back(DAG.getMDNode(MDN));
17082}
17083
17084// isLegalAddressingMode - Return true if the addressing mode represented
17085// by AM is legal for this target, for a load/store of the specified type.
17087 const AddrMode &AM, Type *Ty,
17088 unsigned AS,
17089 Instruction *I) const {
17090 // Vector type r+i form is supported since power9 as DQ form. We don't check
17091 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17092 // imm form is preferred and the offset can be adjusted to use imm form later
17093 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17094 // max offset to check legal addressing mode, we should be a little aggressive
17095 // to contain other offsets for that LSRUse.
17096 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17097 return false;
17098
17099 // PPC allows a sign-extended 16-bit immediate field.
17100 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17101 return false;
17102
17103 // No global is ever allowed as a base.
17104 if (AM.BaseGV)
17105 return false;
17106
17107 // PPC only support r+r,
17108 switch (AM.Scale) {
17109 case 0: // "r+i" or just "i", depending on HasBaseReg.
17110 break;
17111 case 1:
17112 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17113 return false;
17114 // Otherwise we have r+r or r+i.
17115 break;
17116 case 2:
17117 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17118 return false;
17119 // Allow 2*r as r+r.
17120 break;
17121 default:
17122 // No other scales are supported.
17123 return false;
17124 }
17125
17126 return true;
17127}
17128
17129SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17130 SelectionDAG &DAG) const {
17132 MachineFrameInfo &MFI = MF.getFrameInfo();
17133 MFI.setReturnAddressIsTaken(true);
17134
17136 return SDValue();
17137
17138 SDLoc dl(Op);
17139 unsigned Depth = Op.getConstantOperandVal(0);
17140
17141 // Make sure the function does not optimize away the store of the RA to
17142 // the stack.
17143 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17144 FuncInfo->setLRStoreRequired();
17145 bool isPPC64 = Subtarget.isPPC64();
17146 auto PtrVT = getPointerTy(MF.getDataLayout());
17147
17148 if (Depth > 0) {
17149 // The link register (return address) is saved in the caller's frame
17150 // not the callee's stack frame. So we must get the caller's frame
17151 // address and load the return address at the LR offset from there.
17152 SDValue FrameAddr =
17153 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17154 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17155 SDValue Offset =
17156 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17157 isPPC64 ? MVT::i64 : MVT::i32);
17158 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17159 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17161 }
17162
17163 // Just load the return address off the stack.
17164 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17165 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17167}
17168
17169SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17170 SelectionDAG &DAG) const {
17171 SDLoc dl(Op);
17172 unsigned Depth = Op.getConstantOperandVal(0);
17173
17175 MachineFrameInfo &MFI = MF.getFrameInfo();
17176 MFI.setFrameAddressIsTaken(true);
17177
17178 EVT PtrVT = getPointerTy(MF.getDataLayout());
17179 bool isPPC64 = PtrVT == MVT::i64;
17180
17181 // Naked functions never have a frame pointer, and so we use r1. For all
17182 // other functions, this decision must be delayed until during PEI.
17183 unsigned FrameReg;
17184 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17185 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17186 else
17187 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17188
17189 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17190 PtrVT);
17191 while (Depth--)
17192 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17193 FrameAddr, MachinePointerInfo());
17194 return FrameAddr;
17195}
17196
17197// FIXME? Maybe this could be a TableGen attribute on some registers and
17198// this table could be generated automatically from RegInfo.
17200 const MachineFunction &MF) const {
17201 bool isPPC64 = Subtarget.isPPC64();
17202
17203 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17204 if (!is64Bit && VT != LLT::scalar(32))
17205 report_fatal_error("Invalid register global variable type");
17206
17208 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17209 .Case("r2", isPPC64 ? Register() : PPC::R2)
17210 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17211 .Default(Register());
17212
17213 if (Reg)
17214 return Reg;
17215 report_fatal_error("Invalid register name global variable");
17216}
17217
17219 // 32-bit SVR4 ABI access everything as got-indirect.
17220 if (Subtarget.is32BitELFABI())
17221 return true;
17222
17223 // AIX accesses everything indirectly through the TOC, which is similar to
17224 // the GOT.
17225 if (Subtarget.isAIXABI())
17226 return true;
17227
17229 // If it is small or large code model, module locals are accessed
17230 // indirectly by loading their address from .toc/.got.
17231 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17232 return true;
17233
17234 // JumpTable and BlockAddress are accessed as got-indirect.
17235 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17236 return true;
17237
17238 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17239 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17240
17241 return false;
17242}
17243
17244bool
17246 // The PowerPC target isn't yet aware of offsets.
17247 return false;
17248}
17249
17251 const CallInst &I,
17252 MachineFunction &MF,
17253 unsigned Intrinsic) const {
17254 switch (Intrinsic) {
17255 case Intrinsic::ppc_atomicrmw_xchg_i128:
17256 case Intrinsic::ppc_atomicrmw_add_i128:
17257 case Intrinsic::ppc_atomicrmw_sub_i128:
17258 case Intrinsic::ppc_atomicrmw_nand_i128:
17259 case Intrinsic::ppc_atomicrmw_and_i128:
17260 case Intrinsic::ppc_atomicrmw_or_i128:
17261 case Intrinsic::ppc_atomicrmw_xor_i128:
17262 case Intrinsic::ppc_cmpxchg_i128:
17264 Info.memVT = MVT::i128;
17265 Info.ptrVal = I.getArgOperand(0);
17266 Info.offset = 0;
17267 Info.align = Align(16);
17270 return true;
17271 case Intrinsic::ppc_atomic_load_i128:
17273 Info.memVT = MVT::i128;
17274 Info.ptrVal = I.getArgOperand(0);
17275 Info.offset = 0;
17276 Info.align = Align(16);
17278 return true;
17279 case Intrinsic::ppc_atomic_store_i128:
17281 Info.memVT = MVT::i128;
17282 Info.ptrVal = I.getArgOperand(2);
17283 Info.offset = 0;
17284 Info.align = Align(16);
17286 return true;
17287 case Intrinsic::ppc_altivec_lvx:
17288 case Intrinsic::ppc_altivec_lvxl:
17289 case Intrinsic::ppc_altivec_lvebx:
17290 case Intrinsic::ppc_altivec_lvehx:
17291 case Intrinsic::ppc_altivec_lvewx:
17292 case Intrinsic::ppc_vsx_lxvd2x:
17293 case Intrinsic::ppc_vsx_lxvw4x:
17294 case Intrinsic::ppc_vsx_lxvd2x_be:
17295 case Intrinsic::ppc_vsx_lxvw4x_be:
17296 case Intrinsic::ppc_vsx_lxvl:
17297 case Intrinsic::ppc_vsx_lxvll: {
17298 EVT VT;
17299 switch (Intrinsic) {
17300 case Intrinsic::ppc_altivec_lvebx:
17301 VT = MVT::i8;
17302 break;
17303 case Intrinsic::ppc_altivec_lvehx:
17304 VT = MVT::i16;
17305 break;
17306 case Intrinsic::ppc_altivec_lvewx:
17307 VT = MVT::i32;
17308 break;
17309 case Intrinsic::ppc_vsx_lxvd2x:
17310 case Intrinsic::ppc_vsx_lxvd2x_be:
17311 VT = MVT::v2f64;
17312 break;
17313 default:
17314 VT = MVT::v4i32;
17315 break;
17316 }
17317
17319 Info.memVT = VT;
17320 Info.ptrVal = I.getArgOperand(0);
17321 Info.offset = -VT.getStoreSize()+1;
17322 Info.size = 2*VT.getStoreSize()-1;
17323 Info.align = Align(1);
17325 return true;
17326 }
17327 case Intrinsic::ppc_altivec_stvx:
17328 case Intrinsic::ppc_altivec_stvxl:
17329 case Intrinsic::ppc_altivec_stvebx:
17330 case Intrinsic::ppc_altivec_stvehx:
17331 case Intrinsic::ppc_altivec_stvewx:
17332 case Intrinsic::ppc_vsx_stxvd2x:
17333 case Intrinsic::ppc_vsx_stxvw4x:
17334 case Intrinsic::ppc_vsx_stxvd2x_be:
17335 case Intrinsic::ppc_vsx_stxvw4x_be:
17336 case Intrinsic::ppc_vsx_stxvl:
17337 case Intrinsic::ppc_vsx_stxvll: {
17338 EVT VT;
17339 switch (Intrinsic) {
17340 case Intrinsic::ppc_altivec_stvebx:
17341 VT = MVT::i8;
17342 break;
17343 case Intrinsic::ppc_altivec_stvehx:
17344 VT = MVT::i16;
17345 break;
17346 case Intrinsic::ppc_altivec_stvewx:
17347 VT = MVT::i32;
17348 break;
17349 case Intrinsic::ppc_vsx_stxvd2x:
17350 case Intrinsic::ppc_vsx_stxvd2x_be:
17351 VT = MVT::v2f64;
17352 break;
17353 default:
17354 VT = MVT::v4i32;
17355 break;
17356 }
17357
17359 Info.memVT = VT;
17360 Info.ptrVal = I.getArgOperand(1);
17361 Info.offset = -VT.getStoreSize()+1;
17362 Info.size = 2*VT.getStoreSize()-1;
17363 Info.align = Align(1);
17365 return true;
17366 }
17367 case Intrinsic::ppc_stdcx:
17368 case Intrinsic::ppc_stwcx:
17369 case Intrinsic::ppc_sthcx:
17370 case Intrinsic::ppc_stbcx: {
17371 EVT VT;
17372 auto Alignment = Align(8);
17373 switch (Intrinsic) {
17374 case Intrinsic::ppc_stdcx:
17375 VT = MVT::i64;
17376 break;
17377 case Intrinsic::ppc_stwcx:
17378 VT = MVT::i32;
17379 Alignment = Align(4);
17380 break;
17381 case Intrinsic::ppc_sthcx:
17382 VT = MVT::i16;
17383 Alignment = Align(2);
17384 break;
17385 case Intrinsic::ppc_stbcx:
17386 VT = MVT::i8;
17387 Alignment = Align(1);
17388 break;
17389 }
17391 Info.memVT = VT;
17392 Info.ptrVal = I.getArgOperand(0);
17393 Info.offset = 0;
17394 Info.align = Alignment;
17396 return true;
17397 }
17398 default:
17399 break;
17400 }
17401
17402 return false;
17403}
17404
17405/// It returns EVT::Other if the type should be determined using generic
17406/// target-independent logic.
17408 const MemOp &Op, const AttributeList &FuncAttributes) const {
17409 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17410 // We should use Altivec/VSX loads and stores when available. For unaligned
17411 // addresses, unaligned VSX loads are only fast starting with the P8.
17412 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17413 if (Op.isMemset() && Subtarget.hasVSX()) {
17414 uint64_t TailSize = Op.size() % 16;
17415 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17416 // element if vector element type matches tail store. For tail size
17417 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17418 if (TailSize > 2 && TailSize <= 4) {
17419 return MVT::v8i16;
17420 }
17421 return MVT::v4i32;
17422 }
17423 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17424 return MVT::v4i32;
17425 }
17426 }
17427
17428 if (Subtarget.isPPC64()) {
17429 return MVT::i64;
17430 }
17431
17432 return MVT::i32;
17433}
17434
17435/// Returns true if it is beneficial to convert a load of a constant
17436/// to just the constant itself.
17438 Type *Ty) const {
17439 assert(Ty->isIntegerTy());
17440
17441 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17442 return !(BitSize == 0 || BitSize > 64);
17443}
17444
17446 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17447 return false;
17448 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17449 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17450 return NumBits1 == 64 && NumBits2 == 32;
17451}
17452
17454 if (!VT1.isInteger() || !VT2.isInteger())
17455 return false;
17456 unsigned NumBits1 = VT1.getSizeInBits();
17457 unsigned NumBits2 = VT2.getSizeInBits();
17458 return NumBits1 == 64 && NumBits2 == 32;
17459}
17460
17462 // Generally speaking, zexts are not free, but they are free when they can be
17463 // folded with other operations.
17464 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17465 EVT MemVT = LD->getMemoryVT();
17466 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17467 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17468 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17469 LD->getExtensionType() == ISD::ZEXTLOAD))
17470 return true;
17471 }
17472
17473 // FIXME: Add other cases...
17474 // - 32-bit shifts with a zext to i64
17475 // - zext after ctlz, bswap, etc.
17476 // - zext after and by a constant mask
17477
17478 return TargetLowering::isZExtFree(Val, VT2);
17479}
17480
17481bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17482 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17483 "invalid fpext types");
17484 // Extending to float128 is not free.
17485 if (DestVT == MVT::f128)
17486 return false;
17487 return true;
17488}
17489
17491 return isInt<16>(Imm) || isUInt<16>(Imm);
17492}
17493
17495 return isInt<16>(Imm) || isUInt<16>(Imm);
17496}
17497
17500 unsigned *Fast) const {
17502 return false;
17503
17504 // PowerPC supports unaligned memory access for simple non-vector types.
17505 // Although accessing unaligned addresses is not as efficient as accessing
17506 // aligned addresses, it is generally more efficient than manual expansion,
17507 // and generally only traps for software emulation when crossing page
17508 // boundaries.
17509
17510 if (!VT.isSimple())
17511 return false;
17512
17513 if (VT.isFloatingPoint() && !VT.isVector() &&
17514 !Subtarget.allowsUnalignedFPAccess())
17515 return false;
17516
17517 if (VT.getSimpleVT().isVector()) {
17518 if (Subtarget.hasVSX()) {
17519 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17520 VT != MVT::v4f32 && VT != MVT::v4i32)
17521 return false;
17522 } else {
17523 return false;
17524 }
17525 }
17526
17527 if (VT == MVT::ppcf128)
17528 return false;
17529
17530 if (Fast)
17531 *Fast = 1;
17532
17533 return true;
17534}
17535
17537 SDValue C) const {
17538 // Check integral scalar types.
17539 if (!VT.isScalarInteger())
17540 return false;
17541 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17542 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17543 return false;
17544 // This transformation will generate >= 2 operations. But the following
17545 // cases will generate <= 2 instructions during ISEL. So exclude them.
17546 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17547 // HW instruction, ie. MULLI
17548 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17549 // instruction is needed than case 1, ie. MULLI and RLDICR
17550 int64_t Imm = ConstNode->getSExtValue();
17551 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17552 Imm >>= Shift;
17553 if (isInt<16>(Imm))
17554 return false;
17555 uint64_t UImm = static_cast<uint64_t>(Imm);
17556 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17557 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17558 return true;
17559 }
17560 return false;
17561}
17562
17564 EVT VT) const {
17567}
17568
17570 Type *Ty) const {
17571 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17572 return false;
17573 switch (Ty->getScalarType()->getTypeID()) {
17574 case Type::FloatTyID:
17575 case Type::DoubleTyID:
17576 return true;
17577 case Type::FP128TyID:
17578 return Subtarget.hasP9Vector();
17579 default:
17580 return false;
17581 }
17582}
17583
17584// FIXME: add more patterns which are not profitable to hoist.
17586 if (!I->hasOneUse())
17587 return true;
17588
17589 Instruction *User = I->user_back();
17590 assert(User && "A single use instruction with no uses.");
17591
17592 switch (I->getOpcode()) {
17593 case Instruction::FMul: {
17594 // Don't break FMA, PowerPC prefers FMA.
17595 if (User->getOpcode() != Instruction::FSub &&
17596 User->getOpcode() != Instruction::FAdd)
17597 return true;
17598
17600 const Function *F = I->getFunction();
17601 const DataLayout &DL = F->getDataLayout();
17602 Type *Ty = User->getOperand(0)->getType();
17603
17604 return !(
17607 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17608 }
17609 case Instruction::Load: {
17610 // Don't break "store (load float*)" pattern, this pattern will be combined
17611 // to "store (load int32)" in later InstCombine pass. See function
17612 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17613 // cycles than loading a 32 bit integer.
17614 LoadInst *LI = cast<LoadInst>(I);
17615 // For the loads that combineLoadToOperationType does nothing, like
17616 // ordered load, it should be profitable to hoist them.
17617 // For swifterror load, it can only be used for pointer to pointer type, so
17618 // later type check should get rid of this case.
17619 if (!LI->isUnordered())
17620 return true;
17621
17622 if (User->getOpcode() != Instruction::Store)
17623 return true;
17624
17625 if (I->getType()->getTypeID() != Type::FloatTyID)
17626 return true;
17627
17628 return false;
17629 }
17630 default:
17631 return true;
17632 }
17633 return true;
17634}
17635
17636const MCPhysReg *
17638 // LR is a callee-save register, but we must treat it as clobbered by any call
17639 // site. Hence we include LR in the scratch registers, which are in turn added
17640 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17641 // to CTR, which is used by any indirect call.
17642 static const MCPhysReg ScratchRegs[] = {
17643 PPC::X12, PPC::LR8, PPC::CTR8, 0
17644 };
17645
17646 return ScratchRegs;
17647}
17648
17650 const Constant *PersonalityFn) const {
17651 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17652}
17653
17655 const Constant *PersonalityFn) const {
17656 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17657}
17658
17659bool
17661 EVT VT , unsigned DefinedValues) const {
17662 if (VT == MVT::v2i64)
17663 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17664
17665 if (Subtarget.hasVSX())
17666 return true;
17667
17669}
17670
17672 if (DisableILPPref || Subtarget.enableMachineScheduler())
17674
17675 return Sched::ILP;
17676}
17677
17678// Create a fast isel object.
17679FastISel *
17681 const TargetLibraryInfo *LibInfo) const {
17682 return PPC::createFastISel(FuncInfo, LibInfo);
17683}
17684
17685// 'Inverted' means the FMA opcode after negating one multiplicand.
17686// For example, (fma -a b c) = (fnmsub a b c)
17687static unsigned invertFMAOpcode(unsigned Opc) {
17688 switch (Opc) {
17689 default:
17690 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17691 case ISD::FMA:
17692 return PPCISD::FNMSUB;
17693 case PPCISD::FNMSUB:
17694 return ISD::FMA;
17695 }
17696}
17697
17699 bool LegalOps, bool OptForSize,
17701 unsigned Depth) const {
17703 return SDValue();
17704
17705 unsigned Opc = Op.getOpcode();
17706 EVT VT = Op.getValueType();
17707 SDNodeFlags Flags = Op.getNode()->getFlags();
17708
17709 switch (Opc) {
17710 case PPCISD::FNMSUB:
17711 if (!Op.hasOneUse() || !isTypeLegal(VT))
17712 break;
17713
17715 SDValue N0 = Op.getOperand(0);
17716 SDValue N1 = Op.getOperand(1);
17717 SDValue N2 = Op.getOperand(2);
17718 SDLoc Loc(Op);
17719
17721 SDValue NegN2 =
17722 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17723
17724 if (!NegN2)
17725 return SDValue();
17726
17727 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17728 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17729 // These transformations may change sign of zeroes. For example,
17730 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17731 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17732 // Try and choose the cheaper one to negate.
17734 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17735 N0Cost, Depth + 1);
17736
17738 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17739 N1Cost, Depth + 1);
17740
17741 if (NegN0 && N0Cost <= N1Cost) {
17742 Cost = std::min(N0Cost, N2Cost);
17743 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17744 } else if (NegN1) {
17745 Cost = std::min(N1Cost, N2Cost);
17746 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17747 }
17748 }
17749
17750 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17751 if (isOperationLegal(ISD::FMA, VT)) {
17752 Cost = N2Cost;
17753 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17754 }
17755
17756 break;
17757 }
17758
17759 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17760 Cost, Depth);
17761}
17762
17763// Override to enable LOAD_STACK_GUARD lowering on Linux.
17765 if (!Subtarget.isTargetLinux())
17767 return true;
17768}
17769
17770// Override to disable global variable loading on Linux and insert AIX canary
17771// word declaration.
17773 if (Subtarget.isAIXABI()) {
17774 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17775 PointerType::getUnqual(M.getContext()));
17776 return;
17777 }
17778 if (!Subtarget.isTargetLinux())
17780}
17781
17783 if (Subtarget.isAIXABI())
17784 return M.getGlobalVariable(AIXSSPCanaryWordName);
17786}
17787
17789 bool ForCodeSize) const {
17790 if (!VT.isSimple() || !Subtarget.hasVSX())
17791 return false;
17792
17793 switch(VT.getSimpleVT().SimpleTy) {
17794 default:
17795 // For FP types that are currently not supported by PPC backend, return
17796 // false. Examples: f16, f80.
17797 return false;
17798 case MVT::f32:
17799 case MVT::f64: {
17800 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
17801 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17802 return true;
17803 }
17804 bool IsExact;
17805 APSInt IntResult(16, false);
17806 // The rounding mode doesn't really matter because we only care about floats
17807 // that can be converted to integers exactly.
17808 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17809 // For exact values in the range [-16, 15] we can materialize the float.
17810 if (IsExact && IntResult <= 15 && IntResult >= -16)
17811 return true;
17812 return Imm.isZero();
17813 }
17814 case MVT::ppcf128:
17815 return Imm.isPosZero();
17816 }
17817}
17818
17819// For vector shift operation op, fold
17820// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17822 SelectionDAG &DAG) {
17823 SDValue N0 = N->getOperand(0);
17824 SDValue N1 = N->getOperand(1);
17825 EVT VT = N0.getValueType();
17826 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17827 unsigned Opcode = N->getOpcode();
17828 unsigned TargetOpcode;
17829
17830 switch (Opcode) {
17831 default:
17832 llvm_unreachable("Unexpected shift operation");
17833 case ISD::SHL:
17834 TargetOpcode = PPCISD::SHL;
17835 break;
17836 case ISD::SRL:
17837 TargetOpcode = PPCISD::SRL;
17838 break;
17839 case ISD::SRA:
17840 TargetOpcode = PPCISD::SRA;
17841 break;
17842 }
17843
17844 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17845 N1->getOpcode() == ISD::AND)
17846 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17847 if (Mask->getZExtValue() == OpSizeInBits - 1)
17848 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17849
17850 return SDValue();
17851}
17852
17853SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17854 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17855 return Value;
17856
17857 SDValue N0 = N->getOperand(0);
17858 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17859 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17860 N0.getOpcode() != ISD::SIGN_EXTEND ||
17861 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17862 N->getValueType(0) != MVT::i64)
17863 return SDValue();
17864
17865 // We can't save an operation here if the value is already extended, and
17866 // the existing shift is easier to combine.
17867 SDValue ExtsSrc = N0.getOperand(0);
17868 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17869 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17870 return SDValue();
17871
17872 SDLoc DL(N0);
17873 SDValue ShiftBy = SDValue(CN1, 0);
17874 // We want the shift amount to be i32 on the extswli, but the shift could
17875 // have an i64.
17876 if (ShiftBy.getValueType() == MVT::i64)
17877 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17878
17879 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17880 ShiftBy);
17881}
17882
17883SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17884 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17885 return Value;
17886
17887 return SDValue();
17888}
17889
17890SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17891 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17892 return Value;
17893
17894 return SDValue();
17895}
17896
17897// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17898// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17899// When C is zero, the equation (addi Z, -C) can be simplified to Z
17900// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17902 const PPCSubtarget &Subtarget) {
17903 if (!Subtarget.isPPC64())
17904 return SDValue();
17905
17906 SDValue LHS = N->getOperand(0);
17907 SDValue RHS = N->getOperand(1);
17908
17909 auto isZextOfCompareWithConstant = [](SDValue Op) {
17910 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17911 Op.getValueType() != MVT::i64)
17912 return false;
17913
17914 SDValue Cmp = Op.getOperand(0);
17915 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17916 Cmp.getOperand(0).getValueType() != MVT::i64)
17917 return false;
17918
17919 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17920 int64_t NegConstant = 0 - Constant->getSExtValue();
17921 // Due to the limitations of the addi instruction,
17922 // -C is required to be [-32768, 32767].
17923 return isInt<16>(NegConstant);
17924 }
17925
17926 return false;
17927 };
17928
17929 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17930 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17931
17932 // If there is a pattern, canonicalize a zext operand to the RHS.
17933 if (LHSHasPattern && !RHSHasPattern)
17934 std::swap(LHS, RHS);
17935 else if (!LHSHasPattern && !RHSHasPattern)
17936 return SDValue();
17937
17938 SDLoc DL(N);
17939 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17940 SDValue Cmp = RHS.getOperand(0);
17941 SDValue Z = Cmp.getOperand(0);
17942 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17943 int64_t NegConstant = 0 - Constant->getSExtValue();
17944
17945 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17946 default: break;
17947 case ISD::SETNE: {
17948 // when C == 0
17949 // --> addze X, (addic Z, -1).carry
17950 // /
17951 // add X, (zext(setne Z, C))--
17952 // \ when -32768 <= -C <= 32767 && C != 0
17953 // --> addze X, (addic (addi Z, -C), -1).carry
17954 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17955 DAG.getConstant(NegConstant, DL, MVT::i64));
17956 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17957 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17958 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17959 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17960 SDValue(Addc.getNode(), 1));
17961 }
17962 case ISD::SETEQ: {
17963 // when C == 0
17964 // --> addze X, (subfic Z, 0).carry
17965 // /
17966 // add X, (zext(sete Z, C))--
17967 // \ when -32768 <= -C <= 32767 && C != 0
17968 // --> addze X, (subfic (addi Z, -C), 0).carry
17969 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17970 DAG.getConstant(NegConstant, DL, MVT::i64));
17971 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17972 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17973 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17974 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17975 SDValue(Subc.getNode(), 1));
17976 }
17977 }
17978
17979 return SDValue();
17980}
17981
17982// Transform
17983// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17984// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17985// In this case both C1 and C2 must be known constants.
17986// C1+C2 must fit into a 34 bit signed integer.
17988 const PPCSubtarget &Subtarget) {
17989 if (!Subtarget.isUsingPCRelativeCalls())
17990 return SDValue();
17991
17992 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17993 // If we find that node try to cast the Global Address and the Constant.
17994 SDValue LHS = N->getOperand(0);
17995 SDValue RHS = N->getOperand(1);
17996
17997 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17998 std::swap(LHS, RHS);
17999
18000 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18001 return SDValue();
18002
18003 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
18004 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
18005 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
18006
18007 // Check that both casts succeeded.
18008 if (!GSDN || !ConstNode)
18009 return SDValue();
18010
18011 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
18012 SDLoc DL(GSDN);
18013
18014 // The signed int offset needs to fit in 34 bits.
18015 if (!isInt<34>(NewOffset))
18016 return SDValue();
18017
18018 // The new global address is a copy of the old global address except
18019 // that it has the updated Offset.
18020 SDValue GA =
18021 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
18022 NewOffset, GSDN->getTargetFlags());
18023 SDValue MatPCRel =
18024 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
18025 return MatPCRel;
18026}
18027
18028SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
18029 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
18030 return Value;
18031
18032 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
18033 return Value;
18034
18035 return SDValue();
18036}
18037
18038// Detect TRUNCATE operations on bitcasts of float128 values.
18039// What we are looking for here is the situtation where we extract a subset
18040// of bits from a 128 bit float.
18041// This can be of two forms:
18042// 1) BITCAST of f128 feeding TRUNCATE
18043// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18044// The reason this is required is because we do not have a legal i128 type
18045// and so we want to prevent having to store the f128 and then reload part
18046// of it.
18047SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18048 DAGCombinerInfo &DCI) const {
18049 // If we are using CRBits then try that first.
18050 if (Subtarget.useCRBits()) {
18051 // Check if CRBits did anything and return that if it did.
18052 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18053 return CRTruncValue;
18054 }
18055
18056 SDLoc dl(N);
18057 SDValue Op0 = N->getOperand(0);
18058
18059 // Looking for a truncate of i128 to i64.
18060 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18061 return SDValue();
18062
18063 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18064
18065 // SRL feeding TRUNCATE.
18066 if (Op0.getOpcode() == ISD::SRL) {
18067 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18068 // The right shift has to be by 64 bits.
18069 if (!ConstNode || ConstNode->getZExtValue() != 64)
18070 return SDValue();
18071
18072 // Switch the element number to extract.
18073 EltToExtract = EltToExtract ? 0 : 1;
18074 // Update Op0 past the SRL.
18075 Op0 = Op0.getOperand(0);
18076 }
18077
18078 // BITCAST feeding a TRUNCATE possibly via SRL.
18079 if (Op0.getOpcode() == ISD::BITCAST &&
18080 Op0.getValueType() == MVT::i128 &&
18081 Op0.getOperand(0).getValueType() == MVT::f128) {
18082 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18083 return DCI.DAG.getNode(
18084 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18085 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18086 }
18087 return SDValue();
18088}
18089
18090SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18091 SelectionDAG &DAG = DCI.DAG;
18092
18093 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18094 if (!ConstOpOrElement)
18095 return SDValue();
18096
18097 // An imul is usually smaller than the alternative sequence for legal type.
18099 isOperationLegal(ISD::MUL, N->getValueType(0)))
18100 return SDValue();
18101
18102 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18103 switch (this->Subtarget.getCPUDirective()) {
18104 default:
18105 // TODO: enhance the condition for subtarget before pwr8
18106 return false;
18107 case PPC::DIR_PWR8:
18108 // type mul add shl
18109 // scalar 4 1 1
18110 // vector 7 2 2
18111 return true;
18112 case PPC::DIR_PWR9:
18113 case PPC::DIR_PWR10:
18114 case PPC::DIR_PWR11:
18116 // type mul add shl
18117 // scalar 5 2 2
18118 // vector 7 2 2
18119
18120 // The cycle RATIO of related operations are showed as a table above.
18121 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18122 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18123 // are 4, it is always profitable; but for 3 instrs patterns
18124 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18125 // So we should only do it for vector type.
18126 return IsAddOne && IsNeg ? VT.isVector() : true;
18127 }
18128 };
18129
18130 EVT VT = N->getValueType(0);
18131 SDLoc DL(N);
18132
18133 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18134 bool IsNeg = MulAmt.isNegative();
18135 APInt MulAmtAbs = MulAmt.abs();
18136
18137 if ((MulAmtAbs - 1).isPowerOf2()) {
18138 // (mul x, 2^N + 1) => (add (shl x, N), x)
18139 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18140
18141 if (!IsProfitable(IsNeg, true, VT))
18142 return SDValue();
18143
18144 SDValue Op0 = N->getOperand(0);
18145 SDValue Op1 =
18146 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18147 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18148 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18149
18150 if (!IsNeg)
18151 return Res;
18152
18153 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18154 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18155 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18156 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18157
18158 if (!IsProfitable(IsNeg, false, VT))
18159 return SDValue();
18160
18161 SDValue Op0 = N->getOperand(0);
18162 SDValue Op1 =
18163 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18164 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18165
18166 if (!IsNeg)
18167 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18168 else
18169 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18170
18171 } else {
18172 return SDValue();
18173 }
18174}
18175
18176// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18177// in combiner since we need to check SD flags and other subtarget features.
18178SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18179 DAGCombinerInfo &DCI) const {
18180 SDValue N0 = N->getOperand(0);
18181 SDValue N1 = N->getOperand(1);
18182 SDValue N2 = N->getOperand(2);
18183 SDNodeFlags Flags = N->getFlags();
18184 EVT VT = N->getValueType(0);
18185 SelectionDAG &DAG = DCI.DAG;
18187 unsigned Opc = N->getOpcode();
18188 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18189 bool LegalOps = !DCI.isBeforeLegalizeOps();
18190 SDLoc Loc(N);
18191
18192 if (!isOperationLegal(ISD::FMA, VT))
18193 return SDValue();
18194
18195 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18196 // since (fnmsub a b c)=-0 while c-ab=+0.
18197 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18198 return SDValue();
18199
18200 // (fma (fneg a) b c) => (fnmsub a b c)
18201 // (fnmsub (fneg a) b c) => (fma a b c)
18202 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18203 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18204
18205 // (fma a (fneg b) c) => (fnmsub a b c)
18206 // (fnmsub a (fneg b) c) => (fma a b c)
18207 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18208 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18209
18210 return SDValue();
18211}
18212
18213bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18214 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18215 if (!Subtarget.is64BitELFABI())
18216 return false;
18217
18218 // If not a tail call then no need to proceed.
18219 if (!CI->isTailCall())
18220 return false;
18221
18222 // If sibling calls have been disabled and tail-calls aren't guaranteed
18223 // there is no reason to duplicate.
18224 auto &TM = getTargetMachine();
18225 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18226 return false;
18227
18228 // Can't tail call a function called indirectly, or if it has variadic args.
18229 const Function *Callee = CI->getCalledFunction();
18230 if (!Callee || Callee->isVarArg())
18231 return false;
18232
18233 // Make sure the callee and caller calling conventions are eligible for tco.
18234 const Function *Caller = CI->getParent()->getParent();
18235 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18236 CI->getCallingConv()))
18237 return false;
18238
18239 // If the function is local then we have a good chance at tail-calling it
18240 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18241}
18242
18243bool PPCTargetLowering::
18244isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18245 const Value *Mask = AndI.getOperand(1);
18246 // If the mask is suitable for andi. or andis. we should sink the and.
18247 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18248 // Can't handle constants wider than 64-bits.
18249 if (CI->getBitWidth() > 64)
18250 return false;
18251 int64_t ConstVal = CI->getZExtValue();
18252 return isUInt<16>(ConstVal) ||
18253 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18254 }
18255
18256 // For non-constant masks, we can always use the record-form and.
18257 return true;
18258}
18259
18260/// getAddrModeForFlags - Based on the set of address flags, select the most
18261/// optimal instruction format to match by.
18262PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18263 // This is not a node we should be handling here.
18264 if (Flags == PPC::MOF_None)
18265 return PPC::AM_None;
18266 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18267 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18268 if ((Flags & FlagSet) == FlagSet)
18269 return PPC::AM_DForm;
18270 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18271 if ((Flags & FlagSet) == FlagSet)
18272 return PPC::AM_DSForm;
18273 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18274 if ((Flags & FlagSet) == FlagSet)
18275 return PPC::AM_DQForm;
18276 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18277 if ((Flags & FlagSet) == FlagSet)
18278 return PPC::AM_PrefixDForm;
18279 // If no other forms are selected, return an X-Form as it is the most
18280 // general addressing mode.
18281 return PPC::AM_XForm;
18282}
18283
18284/// Set alignment flags based on whether or not the Frame Index is aligned.
18285/// Utilized when computing flags for address computation when selecting
18286/// load and store instructions.
18287static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18288 SelectionDAG &DAG) {
18289 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18290 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18291 if (!FI)
18292 return;
18294 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18295 // If this is (add $FI, $S16Imm), the alignment flags are already set
18296 // based on the immediate. We just need to clear the alignment flags
18297 // if the FI alignment is weaker.
18298 if ((FrameIndexAlign % 4) != 0)
18299 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18300 if ((FrameIndexAlign % 16) != 0)
18301 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18302 // If the address is a plain FrameIndex, set alignment flags based on
18303 // FI alignment.
18304 if (!IsAdd) {
18305 if ((FrameIndexAlign % 4) == 0)
18306 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18307 if ((FrameIndexAlign % 16) == 0)
18308 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18309 }
18310}
18311
18312/// Given a node, compute flags that are used for address computation when
18313/// selecting load and store instructions. The flags computed are stored in
18314/// FlagSet. This function takes into account whether the node is a constant,
18315/// an ADD, OR, or a constant, and computes the address flags accordingly.
18316static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18317 SelectionDAG &DAG) {
18318 // Set the alignment flags for the node depending on if the node is
18319 // 4-byte or 16-byte aligned.
18320 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18321 if ((Imm & 0x3) == 0)
18322 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18323 if ((Imm & 0xf) == 0)
18324 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18325 };
18326
18327 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18328 // All 32-bit constants can be computed as LIS + Disp.
18329 const APInt &ConstImm = CN->getAPIntValue();
18330 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18331 FlagSet |= PPC::MOF_AddrIsSImm32;
18332 SetAlignFlagsForImm(ConstImm.getZExtValue());
18333 setAlignFlagsForFI(N, FlagSet, DAG);
18334 }
18335 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18336 FlagSet |= PPC::MOF_RPlusSImm34;
18337 else // Let constant materialization handle large constants.
18338 FlagSet |= PPC::MOF_NotAddNorCst;
18339 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18340 // This address can be represented as an addition of:
18341 // - Register + Imm16 (possibly a multiple of 4/16)
18342 // - Register + Imm34
18343 // - Register + PPCISD::Lo
18344 // - Register + Register
18345 // In any case, we won't have to match this as Base + Zero.
18346 SDValue RHS = N.getOperand(1);
18347 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18348 const APInt &ConstImm = CN->getAPIntValue();
18349 if (ConstImm.isSignedIntN(16)) {
18350 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18351 SetAlignFlagsForImm(ConstImm.getZExtValue());
18352 setAlignFlagsForFI(N, FlagSet, DAG);
18353 }
18354 if (ConstImm.isSignedIntN(34))
18355 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18356 else
18357 FlagSet |= PPC::MOF_RPlusR; // Register.
18358 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18359 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18360 else
18361 FlagSet |= PPC::MOF_RPlusR;
18362 } else { // The address computation is not a constant or an addition.
18363 setAlignFlagsForFI(N, FlagSet, DAG);
18364 FlagSet |= PPC::MOF_NotAddNorCst;
18365 }
18366}
18367
18368static bool isPCRelNode(SDValue N) {
18369 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18370 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18371 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18372 isValidPCRelNode<JumpTableSDNode>(N) ||
18373 isValidPCRelNode<BlockAddressSDNode>(N));
18374}
18375
18376/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18377/// the address flags of the load/store instruction that is to be matched.
18378unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18379 SelectionDAG &DAG) const {
18380 unsigned FlagSet = PPC::MOF_None;
18381
18382 // Compute subtarget flags.
18383 if (!Subtarget.hasP9Vector())
18384 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18385 else
18386 FlagSet |= PPC::MOF_SubtargetP9;
18387
18388 if (Subtarget.hasPrefixInstrs())
18389 FlagSet |= PPC::MOF_SubtargetP10;
18390
18391 if (Subtarget.hasSPE())
18392 FlagSet |= PPC::MOF_SubtargetSPE;
18393
18394 // Check if we have a PCRel node and return early.
18395 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18396 return FlagSet;
18397
18398 // If the node is the paired load/store intrinsics, compute flags for
18399 // address computation and return early.
18400 unsigned ParentOp = Parent->getOpcode();
18401 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18402 (ParentOp == ISD::INTRINSIC_VOID))) {
18403 unsigned ID = Parent->getConstantOperandVal(1);
18404 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18405 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18406 ? Parent->getOperand(2)
18407 : Parent->getOperand(3);
18408 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18409 FlagSet |= PPC::MOF_Vector;
18410 return FlagSet;
18411 }
18412 }
18413
18414 // Mark this as something we don't want to handle here if it is atomic
18415 // or pre-increment instruction.
18416 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18417 if (LSB->isIndexed())
18418 return PPC::MOF_None;
18419
18420 // Compute in-memory type flags. This is based on if there are scalars,
18421 // floats or vectors.
18422 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18423 assert(MN && "Parent should be a MemSDNode!");
18424 EVT MemVT = MN->getMemoryVT();
18425 unsigned Size = MemVT.getSizeInBits();
18426 if (MemVT.isScalarInteger()) {
18427 assert(Size <= 128 &&
18428 "Not expecting scalar integers larger than 16 bytes!");
18429 if (Size < 32)
18430 FlagSet |= PPC::MOF_SubWordInt;
18431 else if (Size == 32)
18432 FlagSet |= PPC::MOF_WordInt;
18433 else
18434 FlagSet |= PPC::MOF_DoubleWordInt;
18435 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18436 if (Size == 128)
18437 FlagSet |= PPC::MOF_Vector;
18438 else if (Size == 256) {
18439 assert(Subtarget.pairedVectorMemops() &&
18440 "256-bit vectors are only available when paired vector memops is "
18441 "enabled!");
18442 FlagSet |= PPC::MOF_Vector;
18443 } else
18444 llvm_unreachable("Not expecting illegal vectors!");
18445 } else { // Floating point type: can be scalar, f128 or vector types.
18446 if (Size == 32 || Size == 64)
18447 FlagSet |= PPC::MOF_ScalarFloat;
18448 else if (MemVT == MVT::f128 || MemVT.isVector())
18449 FlagSet |= PPC::MOF_Vector;
18450 else
18451 llvm_unreachable("Not expecting illegal scalar floats!");
18452 }
18453
18454 // Compute flags for address computation.
18455 computeFlagsForAddressComputation(N, FlagSet, DAG);
18456
18457 // Compute type extension flags.
18458 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18459 switch (LN->getExtensionType()) {
18460 case ISD::SEXTLOAD:
18461 FlagSet |= PPC::MOF_SExt;
18462 break;
18463 case ISD::EXTLOAD:
18464 case ISD::ZEXTLOAD:
18465 FlagSet |= PPC::MOF_ZExt;
18466 break;
18467 case ISD::NON_EXTLOAD:
18468 FlagSet |= PPC::MOF_NoExt;
18469 break;
18470 }
18471 } else
18472 FlagSet |= PPC::MOF_NoExt;
18473
18474 // For integers, no extension is the same as zero extension.
18475 // We set the extension mode to zero extension so we don't have
18476 // to add separate entries in AddrModesMap for loads and stores.
18477 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18478 FlagSet |= PPC::MOF_ZExt;
18479 FlagSet &= ~PPC::MOF_NoExt;
18480 }
18481
18482 // If we don't have prefixed instructions, 34-bit constants should be
18483 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18484 bool IsNonP1034BitConst =
18486 FlagSet) == PPC::MOF_RPlusSImm34;
18487 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18488 IsNonP1034BitConst)
18489 FlagSet |= PPC::MOF_NotAddNorCst;
18490
18491 return FlagSet;
18492}
18493
18494/// SelectForceXFormMode - Given the specified address, force it to be
18495/// represented as an indexed [r+r] operation (an XForm instruction).
18497 SDValue &Base,
18498 SelectionDAG &DAG) const {
18499
18501 int16_t ForceXFormImm = 0;
18502 if (provablyDisjointOr(DAG, N) &&
18503 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18504 Disp = N.getOperand(0);
18505 Base = N.getOperand(1);
18506 return Mode;
18507 }
18508
18509 // If the address is the result of an add, we will utilize the fact that the
18510 // address calculation includes an implicit add. However, we can reduce
18511 // register pressure if we do not materialize a constant just for use as the
18512 // index register. We only get rid of the add if it is not an add of a
18513 // value and a 16-bit signed constant and both have a single use.
18514 if (N.getOpcode() == ISD::ADD &&
18515 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18516 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18517 Disp = N.getOperand(0);
18518 Base = N.getOperand(1);
18519 return Mode;
18520 }
18521
18522 // Otherwise, use R0 as the base register.
18523 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18524 N.getValueType());
18525 Base = N;
18526
18527 return Mode;
18528}
18529
18531 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18532 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18533 EVT ValVT = Val.getValueType();
18534 // If we are splitting a scalar integer into f64 parts (i.e. so they
18535 // can be placed into VFRC registers), we need to zero extend and
18536 // bitcast the values. This will ensure the value is placed into a
18537 // VSR using direct moves or stack operations as needed.
18538 if (PartVT == MVT::f64 &&
18539 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18540 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18541 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18542 Parts[0] = Val;
18543 return true;
18544 }
18545 return false;
18546}
18547
18548SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18549 SelectionDAG &DAG) const {
18550 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18552 EVT RetVT = Op.getValueType();
18553 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18554 SDValue Callee =
18555 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18556 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18559 for (const SDValue &N : Op->op_values()) {
18560 EVT ArgVT = N.getValueType();
18561 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18562 Entry.Node = N;
18563 Entry.Ty = ArgTy;
18564 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18565 Entry.IsZExt = !Entry.IsSExt;
18566 Args.push_back(Entry);
18567 }
18568
18569 SDValue InChain = DAG.getEntryNode();
18570 SDValue TCChain = InChain;
18571 const Function &F = DAG.getMachineFunction().getFunction();
18572 bool isTailCall =
18573 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18574 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18575 if (isTailCall)
18576 InChain = TCChain;
18577 CLI.setDebugLoc(SDLoc(Op))
18578 .setChain(InChain)
18579 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18580 .setTailCall(isTailCall)
18581 .setSExtResult(SignExtend)
18582 .setZExtResult(!SignExtend)
18584 return TLI.LowerCallTo(CLI).first;
18585}
18586
18587SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18588 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18589 SelectionDAG &DAG) const {
18590 if (Op.getValueType() == MVT::f32)
18591 return lowerToLibCall(LibCallFloatName, Op, DAG);
18592
18593 if (Op.getValueType() == MVT::f64)
18594 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18595
18596 return SDValue();
18597}
18598
18599bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18600 SDNodeFlags Flags = Op.getNode()->getFlags();
18601 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18602 Flags.hasNoNaNs() && Flags.hasNoInfs();
18603}
18604
18605bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18606 return Op.getNode()->getFlags().hasApproximateFuncs();
18607}
18608
18609bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18611}
18612
18613SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18614 const char *LibCallFloatName,
18615 const char *LibCallDoubleNameFinite,
18616 const char *LibCallFloatNameFinite,
18617 SDValue Op,
18618 SelectionDAG &DAG) const {
18619 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18620 return SDValue();
18621
18622 if (!isLowringToMASSFiniteSafe(Op))
18623 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18624 DAG);
18625
18626 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18627 LibCallDoubleNameFinite, Op, DAG);
18628}
18629
18630SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18631 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18632 "__xl_powf_finite", Op, DAG);
18633}
18634
18635SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18636 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18637 "__xl_sinf_finite", Op, DAG);
18638}
18639
18640SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18641 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18642 "__xl_cosf_finite", Op, DAG);
18643}
18644
18645SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18646 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18647 "__xl_logf_finite", Op, DAG);
18648}
18649
18650SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18651 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18652 "__xl_log10f_finite", Op, DAG);
18653}
18654
18655SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18656 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18657 "__xl_expf_finite", Op, DAG);
18658}
18659
18660// If we happen to match to an aligned D-Form, check if the Frame Index is
18661// adequately aligned. If it is not, reset the mode to match to X-Form.
18662static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18663 PPC::AddrMode &Mode) {
18664 if (!isa<FrameIndexSDNode>(N))
18665 return;
18666 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18667 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18668 Mode = PPC::AM_XForm;
18669}
18670
18671/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18672/// compute the address flags of the node, get the optimal address mode based
18673/// on the flags, and set the Base and Disp based on the address mode.
18675 SDValue N, SDValue &Disp,
18676 SDValue &Base,
18677 SelectionDAG &DAG,
18678 MaybeAlign Align) const {
18679 SDLoc DL(Parent);
18680
18681 // Compute the address flags.
18682 unsigned Flags = computeMOFlags(Parent, N, DAG);
18683
18684 // Get the optimal address mode based on the Flags.
18685 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18686
18687 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18688 // Select an X-Form load if it is not.
18689 setXFormForUnalignedFI(N, Flags, Mode);
18690
18691 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18692 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18693 assert(Subtarget.isUsingPCRelativeCalls() &&
18694 "Must be using PC-Relative calls when a valid PC-Relative node is "
18695 "present!");
18696 Mode = PPC::AM_PCRel;
18697 }
18698
18699 // Set Base and Disp accordingly depending on the address mode.
18700 switch (Mode) {
18701 case PPC::AM_DForm:
18702 case PPC::AM_DSForm:
18703 case PPC::AM_DQForm: {
18704 // This is a register plus a 16-bit immediate. The base will be the
18705 // register and the displacement will be the immediate unless it
18706 // isn't sufficiently aligned.
18707 if (Flags & PPC::MOF_RPlusSImm16) {
18708 SDValue Op0 = N.getOperand(0);
18709 SDValue Op1 = N.getOperand(1);
18710 int16_t Imm = Op1->getAsZExtVal();
18711 if (!Align || isAligned(*Align, Imm)) {
18712 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18713 Base = Op0;
18714 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18715 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18716 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18717 }
18718 break;
18719 }
18720 }
18721 // This is a register plus the @lo relocation. The base is the register
18722 // and the displacement is the global address.
18723 else if (Flags & PPC::MOF_RPlusLo) {
18724 Disp = N.getOperand(1).getOperand(0); // The global address.
18729 Base = N.getOperand(0);
18730 break;
18731 }
18732 // This is a constant address at most 32 bits. The base will be
18733 // zero or load-immediate-shifted and the displacement will be
18734 // the low 16 bits of the address.
18735 else if (Flags & PPC::MOF_AddrIsSImm32) {
18736 auto *CN = cast<ConstantSDNode>(N);
18737 EVT CNType = CN->getValueType(0);
18738 uint64_t CNImm = CN->getZExtValue();
18739 // If this address fits entirely in a 16-bit sext immediate field, codegen
18740 // this as "d, 0".
18741 int16_t Imm;
18742 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18743 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18744 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18745 CNType);
18746 break;
18747 }
18748 // Handle 32-bit sext immediate with LIS + Addr mode.
18749 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18750 (!Align || isAligned(*Align, CNImm))) {
18751 int32_t Addr = (int32_t)CNImm;
18752 // Otherwise, break this down into LIS + Disp.
18753 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18754 Base =
18755 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18756 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18757 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18758 break;
18759 }
18760 }
18761 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18762 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18763 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18764 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18765 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18766 } else
18767 Base = N;
18768 break;
18769 }
18770 case PPC::AM_PrefixDForm: {
18771 int64_t Imm34 = 0;
18772 unsigned Opcode = N.getOpcode();
18773 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18774 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18775 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18776 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18777 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18778 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18779 else
18780 Base = N.getOperand(0);
18781 } else if (isIntS34Immediate(N, Imm34)) {
18782 // The address is a 34-bit signed immediate.
18783 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18784 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18785 }
18786 break;
18787 }
18788 case PPC::AM_PCRel: {
18789 // When selecting PC-Relative instructions, "Base" is not utilized as
18790 // we select the address as [PC+imm].
18791 Disp = N;
18792 break;
18793 }
18794 case PPC::AM_None:
18795 break;
18796 default: { // By default, X-Form is always available to be selected.
18797 // When a frame index is not aligned, we also match by XForm.
18798 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18799 Base = FI ? N : N.getOperand(1);
18800 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18801 N.getValueType())
18802 : N.getOperand(0);
18803 break;
18804 }
18805 }
18806 return Mode;
18807}
18808
18810 bool Return,
18811 bool IsVarArg) const {
18812 switch (CC) {
18813 case CallingConv::Cold:
18814 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18815 default:
18816 return CC_PPC64_ELF;
18817 }
18818}
18819
18821 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18822}
18823
18826 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18827 if (shouldInlineQuadwordAtomics() && Size == 128)
18829
18830 switch (AI->getOperation()) {
18834 default:
18836 }
18837
18838 llvm_unreachable("unreachable atomicrmw operation");
18839}
18840
18843 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18844 if (shouldInlineQuadwordAtomics() && Size == 128)
18847}
18848
18849static Intrinsic::ID
18851 switch (BinOp) {
18852 default:
18853 llvm_unreachable("Unexpected AtomicRMW BinOp");
18855 return Intrinsic::ppc_atomicrmw_xchg_i128;
18856 case AtomicRMWInst::Add:
18857 return Intrinsic::ppc_atomicrmw_add_i128;
18858 case AtomicRMWInst::Sub:
18859 return Intrinsic::ppc_atomicrmw_sub_i128;
18860 case AtomicRMWInst::And:
18861 return Intrinsic::ppc_atomicrmw_and_i128;
18862 case AtomicRMWInst::Or:
18863 return Intrinsic::ppc_atomicrmw_or_i128;
18864 case AtomicRMWInst::Xor:
18865 return Intrinsic::ppc_atomicrmw_xor_i128;
18867 return Intrinsic::ppc_atomicrmw_nand_i128;
18868 }
18869}
18870
18872 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18873 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18874 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18875 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18876 Type *ValTy = Incr->getType();
18877 assert(ValTy->getPrimitiveSizeInBits() == 128);
18880 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18881 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18882 Value *IncrHi =
18883 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18884 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18885 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18886 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18887 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18888 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18889 return Builder.CreateOr(
18890 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18891}
18892
18894 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18895 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18896 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18897 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18898 Type *ValTy = CmpVal->getType();
18899 assert(ValTy->getPrimitiveSizeInBits() == 128);
18900 Function *IntCmpXchg =
18901 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18902 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18903 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18904 Value *CmpHi =
18905 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18906 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18907 Value *NewHi =
18908 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18909 emitLeadingFence(Builder, CI, Ord);
18910 Value *LoHi =
18911 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18912 emitTrailingFence(Builder, CI, Ord);
18913 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18914 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18915 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18916 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18917 return Builder.CreateOr(
18918 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18919}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
This defines the Use class.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5337
bool isDenormal() const
Definition: APFloat.h:1361
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1387
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:429
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
APInt abs() const
Get the absolute value.
Definition: APInt.h:1753
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:415
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:451
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1680
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ Xor
*p = old ^ v
Definition: Instructions.h:722
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
BinOp getOperation() const
Definition: Instructions.h:787
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:178
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:890
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1971
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1523
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1385
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1391
unsigned arg_size() const
Definition: InstrTypes.h:1408
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:109
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:214
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:907
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:885
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:877
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:480
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:705
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:384
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:781
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
BasicBlockListType::const_iterator const_iterator
Definition: Function.h:70
arg_iterator arg_begin()
Definition: Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
size_t arg_size() const
Definition: Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:232
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:588
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:267
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:183
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:124
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2524
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1442
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1421
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2029
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2015
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1502
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2420
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:174
bool isUnordered() const
Definition: Instructions.h:247
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1069
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:300
bool is32BitELFABI() const
Definition: PPCSubtarget.h:220
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:260
bool isAIXABI() const
Definition: PPCSubtarget.h:215
bool useSoftFloat() const
Definition: PPCSubtarget.h:175
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:143
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:203
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:254
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:272
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:146
bool isSVR4ABI() const
Definition: PPCSubtarget.h:216
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:135
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:207
bool isLittleEndian() const
Definition: PPCSubtarget.h:182
bool isTargetLinux() const
Definition: PPCSubtarget.h:213
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:278
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:290
bool is64BitELFABI() const
Definition: PPCSubtarget.h:219
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:156
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:296
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:153
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:266
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:491
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:495
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:453
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:844
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:489
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:743
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:490
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:789
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:692
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:784
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:484
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:815
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:861
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:502
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:755
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:572
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:96
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:436
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:298
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:242
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:826
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1169
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1165
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1198
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1284
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:820
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:943
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1242
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:976
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1090
@ TargetJumpTable
Definition: ISDOpcodes.h:173
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1264
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1031
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:960
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1120
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1099
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1280
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1194
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:910
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1021
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:438
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:439
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1291
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1008
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1084
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:918
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1140
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:866
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1225
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1251
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:899
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1137
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1189
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1113
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1183
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1363
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1248
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1611
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1527
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1617
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:92
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:555
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:573
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:281
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:284
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:258
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)