LLVM 19.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/CallingConv.h"
61#include "llvm/IR/Constant.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/DataLayout.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/IntrinsicsPowerPC.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/Value.h"
76#include "llvm/MC/MCContext.h"
77#include "llvm/MC/MCExpr.h"
87#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
95#include <algorithm>
96#include <cassert>
97#include <cstdint>
98#include <iterator>
99#include <list>
100#include <optional>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "ppc-lowering"
107
108static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
109cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
110
111static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
112cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
113
114static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
115cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
116
117static cl::opt<bool> DisableSCO("disable-ppc-sco",
118cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
119
120static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
121cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
122
123static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124cl::desc("use absolute jump tables on ppc"), cl::Hidden);
125
126static cl::opt<bool>
127 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
128 cl::desc("disable vector permute decomposition"),
129 cl::init(true), cl::Hidden);
130
132 "disable-auto-paired-vec-st",
133 cl::desc("disable automatically generated 32byte paired vector stores"),
134 cl::init(true), cl::Hidden);
135
137 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
138 cl::desc("Set minimum number of entries to use a jump table on PPC"));
139
140STATISTIC(NumTailCalls, "Number of tail calls");
141STATISTIC(NumSiblingCalls, "Number of sibling calls");
142STATISTIC(ShufflesHandledWithVPERM,
143 "Number of shuffles lowered to a VPERM or XXPERM");
144STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
145
146static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
147
148static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
149
150static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
151
152// A faster local-exec TLS access sequence (enabled with the
153// -maix-small-local-exec-tls option) can be produced for TLS variables;
154// consistent with the IBM XL compiler, we apply a max size of slightly under
155// 32KB.
157
158// FIXME: Remove this once the bug has been fixed!
160
162 const PPCSubtarget &STI)
163 : TargetLowering(TM), Subtarget(STI) {
164 // Initialize map that relates the PPC addressing modes to the computed flags
165 // of a load/store instruction. The map is used to determine the optimal
166 // addressing mode when selecting load and stores.
167 initializeAddrModeMap();
168 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
169 // arguments are at least 4/8 bytes aligned.
170 bool isPPC64 = Subtarget.isPPC64();
171 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
172
173 // Set up the register classes.
174 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
175 if (!useSoftFloat()) {
176 if (hasSPE()) {
177 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
178 // EFPU2 APU only supports f32
179 if (!Subtarget.hasEFPU2())
180 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
181 } else {
182 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
183 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
184 }
185 }
186
187 // Match BITREVERSE to customized fast code sequence in the td file.
190
191 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
193
194 // Custom lower inline assembly to check for special registers.
197
198 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
199 for (MVT VT : MVT::integer_valuetypes()) {
202 }
203
204 if (Subtarget.isISA3_0()) {
205 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
206 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
207 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
208 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
209 } else {
210 // No extending loads from f16 or HW conversions back and forth.
211 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
214 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
217 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 }
220
221 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
222
223 // PowerPC has pre-inc load and store's.
234 if (!Subtarget.hasSPE()) {
239 }
240
241 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
242 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
243 for (MVT VT : ScalarIntVTs) {
248 }
249
250 if (Subtarget.useCRBits()) {
252
253 if (isPPC64 || Subtarget.hasFPCVT()) {
256 isPPC64 ? MVT::i64 : MVT::i32);
259 isPPC64 ? MVT::i64 : MVT::i32);
260
263 isPPC64 ? MVT::i64 : MVT::i32);
266 isPPC64 ? MVT::i64 : MVT::i32);
267
270 isPPC64 ? MVT::i64 : MVT::i32);
273 isPPC64 ? MVT::i64 : MVT::i32);
274
277 isPPC64 ? MVT::i64 : MVT::i32);
280 isPPC64 ? MVT::i64 : MVT::i32);
281 } else {
286 }
287
288 // PowerPC does not support direct load/store of condition registers.
291
292 // FIXME: Remove this once the ANDI glue bug is fixed:
293 if (ANDIGlueBug)
295
296 for (MVT VT : MVT::integer_valuetypes()) {
299 setTruncStoreAction(VT, MVT::i1, Expand);
300 }
301
302 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
303 }
304
305 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
306 // PPC (the libcall is not available).
311
312 // We do not currently implement these libm ops for PowerPC.
313 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
314 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
315 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
316 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
318 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
319
320 // PowerPC has no SREM/UREM instructions unless we are on P9
321 // On P9 we may use a hardware instruction to compute the remainder.
322 // When the result of both the remainder and the division is required it is
323 // more efficient to compute the remainder from the result of the division
324 // rather than use the remainder instruction. The instructions are legalized
325 // directly because the DivRemPairsPass performs the transformation at the IR
326 // level.
327 if (Subtarget.isISA3_0()) {
332 } else {
337 }
338
339 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
348
349 // Handle constrained floating-point operations of scalar.
350 // TODO: Handle SPE specific operation.
356
361
362 if (!Subtarget.hasSPE()) {
365 }
366
367 if (Subtarget.hasVSX()) {
370 }
371
372 if (Subtarget.hasFSQRT()) {
375 }
376
377 if (Subtarget.hasFPRND()) {
382
387 }
388
389 // We don't support sin/cos/sqrt/fmod/pow
400
401 // MASS transformation for LLVM intrinsics with replicating fast-math flag
402 // to be consistent to PPCGenScalarMASSEntries pass
403 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
416 }
417
418 if (Subtarget.hasSPE()) {
421 } else {
422 setOperationAction(ISD::FMA , MVT::f64, Legal);
423 setOperationAction(ISD::FMA , MVT::f32, Legal);
424 }
425
426 if (Subtarget.hasSPE())
427 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
428
430
431 // If we're enabling GP optimizations, use hardware square root
432 if (!Subtarget.hasFSQRT() &&
433 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
434 Subtarget.hasFRE()))
436
437 if (!Subtarget.hasFSQRT() &&
438 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
439 Subtarget.hasFRES()))
441
442 if (Subtarget.hasFCPSGN()) {
445 } else {
448 }
449
450 if (Subtarget.hasFPRND()) {
455
460 }
461
462 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
463 // instruction xxbrd to speed up scalar BSWAP64.
464 if (Subtarget.isISA3_1()) {
467 } else {
470 ISD::BSWAP, MVT::i64,
471 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
472 }
473
474 // CTPOP or CTTZ were introduced in P8/P9 respectively
475 if (Subtarget.isISA3_0()) {
476 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
477 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
478 } else {
479 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
480 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
481 }
482
483 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
486 } else {
489 }
490
491 // PowerPC does not have ROTR
494
495 if (!Subtarget.useCRBits()) {
496 // PowerPC does not have Select
501 }
502
503 // PowerPC wants to turn select_cc of FP into fsel when possible.
506
507 // PowerPC wants to optimize integer setcc a bit
508 if (!Subtarget.useCRBits())
510
511 if (Subtarget.hasFPU()) {
515
519 }
520
521 // PowerPC does not have BRCOND which requires SetCC
522 if (!Subtarget.useCRBits())
524
526
527 if (Subtarget.hasSPE()) {
528 // SPE has built-in conversions
535
536 // SPE supports signaling compare of f32/f64.
539 } else {
540 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
543
544 // PowerPC does not have [U|S]INT_TO_FP
549 }
550
551 if (Subtarget.hasDirectMove() && isPPC64) {
556 if (TM.Options.UnsafeFPMath) {
565 }
566 } else {
571 }
572
573 // We cannot sextinreg(i1). Expand to shifts.
575
576 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
577 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
578 // support continuation, user-level threading, and etc.. As a result, no
579 // other SjLj exception interfaces are implemented and please don't build
580 // your own exception handling based on them.
581 // LLVM/Clang supports zero-cost DWARF exception handling.
584
585 // We want to legalize GlobalAddress and ConstantPool nodes into the
586 // appropriate instructions to materialize the address.
597
598 // TRAP is legal.
599 setOperationAction(ISD::TRAP, MVT::Other, Legal);
600
601 // TRAMPOLINE is custom lowered.
604
605 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
607
608 if (Subtarget.is64BitELFABI()) {
609 // VAARG always uses double-word chunks, so promote anything smaller.
611 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
613 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
615 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
617 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
619 } else if (Subtarget.is32BitELFABI()) {
620 // VAARG is custom lowered with the 32-bit SVR4 ABI.
623 } else
625
626 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
627 if (Subtarget.is32BitELFABI())
629 else
631
632 // Use the default implementation.
633 setOperationAction(ISD::VAEND , MVT::Other, Expand);
642
643 // We want to custom lower some of our intrinsics.
649
650 // To handle counter-based loop conditions.
652
657
658 // Comparisons that require checking two conditions.
659 if (Subtarget.hasSPE()) {
664 }
677
680
681 if (Subtarget.has64BitSupport()) {
682 // They also have instructions for converting between i64 and fp.
691 // This is just the low 32 bits of a (signed) fp->i64 conversion.
692 // We cannot do this with Promote because i64 is not a legal type.
695
696 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
699 }
700 } else {
701 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
702 if (Subtarget.hasSPE()) {
705 } else {
708 }
709 }
710
711 // With the instructions enabled under FPCVT, we can do everything.
712 if (Subtarget.hasFPCVT()) {
713 if (Subtarget.has64BitSupport()) {
722 }
723
732 }
733
734 if (Subtarget.use64BitRegs()) {
735 // 64-bit PowerPC implementations can support i64 types directly
736 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
737 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
739 // 64-bit PowerPC wants to expand i128 shifts itself.
743 } else {
744 // 32-bit PowerPC wants to expand i64 shifts itself.
748 }
749
750 // PowerPC has better expansions for funnel shifts than the generic
751 // TargetLowering::expandFunnelShift.
752 if (Subtarget.has64BitSupport()) {
755 }
758
759 if (Subtarget.hasVSX()) {
764 }
765
766 if (Subtarget.hasAltivec()) {
767 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
772 }
773 // First set operation action for all vector types to expand. Then we
774 // will selectively turn on ones that can be effectively codegen'd.
776 // add/sub are legal for all supported vector VT's.
779
780 // For v2i64, these are only valid with P8Vector. This is corrected after
781 // the loop.
782 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
787 }
788 else {
793 }
794
795 if (Subtarget.hasVSX()) {
798 }
799
800 // Vector instructions introduced in P8
801 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
804 }
805 else {
808 }
809
810 // Vector instructions introduced in P9
811 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
813 else
815
816 // We promote all shuffles to v16i8.
818 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
819
820 // We promote all non-typed operations to v4i32.
822 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
824 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
826 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
828 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
830 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
833 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
835 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
836
837 // No other operations are legal.
876
877 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
878 setTruncStoreAction(VT, InnerVT, Expand);
881 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
882 }
883 }
885 if (!Subtarget.hasP8Vector()) {
886 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
887 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
888 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
889 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
890 }
891
892 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
893 // with merges, splats, etc.
895
896 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
897 // are cheap, so handle them before they get expanded to scalar.
903
904 setOperationAction(ISD::AND , MVT::v4i32, Legal);
905 setOperationAction(ISD::OR , MVT::v4i32, Legal);
906 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
907 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
909 Subtarget.useCRBits() ? Legal : Expand);
910 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
920 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
923
924 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
925 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
926 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
927 if (Subtarget.hasAltivec())
928 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
930 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
931 if (Subtarget.hasP8Altivec())
932 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
933
934 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
935 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
936 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
937 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
938
939 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
940 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
941
942 if (Subtarget.hasVSX()) {
943 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
944 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
946 }
947
948 if (Subtarget.hasP8Altivec())
949 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
950 else
951 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
952
953 if (Subtarget.isISA3_1()) {
954 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
955 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
956 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
957 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
958 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
959 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
960 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
961 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
962 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
963 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
964 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
965 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
966 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
967 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
968 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
969 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
970 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
971 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
972 }
973
974 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
975 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
976
979
984
985 // Altivec does not contain unordered floating-point compare instructions
986 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
988 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
990
991 if (Subtarget.hasVSX()) {
994 if (Subtarget.hasP8Vector()) {
997 }
998 if (Subtarget.hasDirectMove() && isPPC64) {
1007 }
1009
1010 // The nearbyint variants are not allowed to raise the inexact exception
1011 // so we can only code-gen them with unsafe math.
1012 if (TM.Options.UnsafeFPMath) {
1015 }
1016
1017 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1018 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1019 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1021 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1022 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1025
1027 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1028 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1031
1032 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1033 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1034
1035 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1036 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1037
1038 // Share the Altivec comparison restrictions.
1039 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1040 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1041 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1042 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1043
1044 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1045 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1046
1048
1049 if (Subtarget.hasP8Vector())
1050 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1051
1052 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1053
1054 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1055 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1056 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1057
1058 if (Subtarget.hasP8Altivec()) {
1059 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1060 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1061 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1062
1063 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1064 // SRL, but not for SRA because of the instructions available:
1065 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1066 // doing
1067 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1068 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1069 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1070
1071 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1072 }
1073 else {
1074 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1075 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1076 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1077
1078 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1079
1080 // VSX v2i64 only supports non-arithmetic operations.
1081 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1082 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1083 }
1084
1085 if (Subtarget.isISA3_1())
1086 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1087 else
1088 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1089
1090 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1091 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1093 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1094
1096
1105
1106 // Custom handling for partial vectors of integers converted to
1107 // floating point. We already have optimal handling for v2i32 through
1108 // the DAG combine, so those aren't necessary.
1125
1126 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1127 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1128 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1129 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1132
1135
1136 // Handle constrained floating-point operations of vector.
1137 // The predictor is `hasVSX` because altivec instruction has
1138 // no exception but VSX vector instruction has.
1152
1166
1167 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1168 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1169
1170 for (MVT FPT : MVT::fp_valuetypes())
1171 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1172
1173 // Expand the SELECT to SELECT_CC
1175
1176 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1177 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1178
1179 // No implementation for these ops for PowerPC.
1181 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1182 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1183 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1185 setOperationAction(ISD::FREM, MVT::f128, Expand);
1186 }
1187
1188 if (Subtarget.hasP8Altivec()) {
1189 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1190 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1191 }
1192
1193 if (Subtarget.hasP9Vector()) {
1196
1197 // Test data class instructions store results in CR bits.
1198 if (Subtarget.useCRBits()) {
1202 }
1203
1204 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1205 // SRL, but not for SRA because of the instructions available:
1206 // VS{RL} and VS{RL}O.
1207 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1208 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1209 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1210
1211 setOperationAction(ISD::FADD, MVT::f128, Legal);
1212 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1213 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1214 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1216
1217 setOperationAction(ISD::FMA, MVT::f128, Legal);
1224
1226 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1228 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1231
1235
1236 // Handle constrained floating-point operations of fp128
1253 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1254 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1255 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1256 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1257 } else if (Subtarget.hasVSX()) {
1260
1261 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1262 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1263
1264 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1265 // fp_to_uint and int_to_fp.
1268
1269 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1270 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1271 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1272 setOperationAction(ISD::FABS, MVT::f128, Expand);
1274 setOperationAction(ISD::FMA, MVT::f128, Expand);
1276
1277 // Expand the fp_extend if the target type is fp128.
1280
1281 // Expand the fp_round if the source type is fp128.
1282 for (MVT VT : {MVT::f32, MVT::f64}) {
1285 }
1286
1291
1292 // Lower following f128 select_cc pattern:
1293 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1295
1296 // We need to handle f128 SELECT_CC with integer result type.
1298 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1299 }
1300
1301 if (Subtarget.hasP9Altivec()) {
1302 if (Subtarget.isISA3_1()) {
1307 } else {
1310 }
1318
1319 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1320 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1321 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1322 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1323 }
1324
1325 if (Subtarget.hasP10Vector()) {
1327 }
1328 }
1329
1330 if (Subtarget.pairedVectorMemops()) {
1331 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1332 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1333 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1334 }
1335 if (Subtarget.hasMMA()) {
1336 if (Subtarget.isISAFuture())
1337 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1338 else
1339 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1340 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1341 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1343 }
1344
1345 if (Subtarget.has64BitSupport())
1347
1348 if (Subtarget.isISA3_1())
1349 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1350
1351 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1352
1353 if (!isPPC64) {
1356 }
1357
1362 }
1363
1365
1366 if (Subtarget.hasAltivec()) {
1367 // Altivec instructions set fields to all zeros or all ones.
1369 }
1370
1371 setLibcallName(RTLIB::MULO_I128, nullptr);
1372 if (!isPPC64) {
1373 // These libcalls are not available in 32-bit.
1374 setLibcallName(RTLIB::SHL_I128, nullptr);
1375 setLibcallName(RTLIB::SRL_I128, nullptr);
1376 setLibcallName(RTLIB::SRA_I128, nullptr);
1377 setLibcallName(RTLIB::MUL_I128, nullptr);
1378 setLibcallName(RTLIB::MULO_I64, nullptr);
1379 }
1380
1383 else if (isPPC64)
1385 else
1387
1388 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1389
1390 // We have target-specific dag combine patterns for the following nodes:
1393 if (Subtarget.hasFPCVT())
1396 if (Subtarget.useCRBits())
1400
1402
1404
1405 if (Subtarget.useCRBits()) {
1407 }
1408
1409 setLibcallName(RTLIB::LOG_F128, "logf128");
1410 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1411 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1412 setLibcallName(RTLIB::EXP_F128, "expf128");
1413 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1414 setLibcallName(RTLIB::SIN_F128, "sinf128");
1415 setLibcallName(RTLIB::COS_F128, "cosf128");
1416 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1417 setLibcallName(RTLIB::POW_F128, "powf128");
1418 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1419 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1420 setLibcallName(RTLIB::REM_F128, "fmodf128");
1421 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1422 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1423 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1424 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1425 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1426 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1427 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1428 setLibcallName(RTLIB::RINT_F128, "rintf128");
1429 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1430 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1431 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1432 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1433 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1434
1435 if (Subtarget.isAIXABI()) {
1436 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1437 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1438 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1439 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1440 }
1441
1442 // With 32 condition bits, we don't need to sink (and duplicate) compares
1443 // aggressively in CodeGenPrep.
1444 if (Subtarget.useCRBits()) {
1447 }
1448
1449 // TODO: The default entry number is set to 64. This stops most jump table
1450 // generation on PPC. But it is good for current PPC HWs because the indirect
1451 // branch instruction mtctr to the jump table may lead to bad branch predict.
1452 // Re-evaluate this value on future HWs that can do better with mtctr.
1454
1456
1457 switch (Subtarget.getCPUDirective()) {
1458 default: break;
1459 case PPC::DIR_970:
1460 case PPC::DIR_A2:
1461 case PPC::DIR_E500:
1462 case PPC::DIR_E500mc:
1463 case PPC::DIR_E5500:
1464 case PPC::DIR_PWR4:
1465 case PPC::DIR_PWR5:
1466 case PPC::DIR_PWR5X:
1467 case PPC::DIR_PWR6:
1468 case PPC::DIR_PWR6X:
1469 case PPC::DIR_PWR7:
1470 case PPC::DIR_PWR8:
1471 case PPC::DIR_PWR9:
1472 case PPC::DIR_PWR10:
1476 break;
1477 }
1478
1479 if (Subtarget.enableMachineScheduler())
1481 else
1483
1485
1486 // The Freescale cores do better with aggressive inlining of memcpy and
1487 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1488 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1489 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1490 MaxStoresPerMemset = 32;
1492 MaxStoresPerMemcpy = 32;
1496 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1497 // The A2 also benefits from (very) aggressive inlining of memcpy and
1498 // friends. The overhead of a the function call, even when warm, can be
1499 // over one hundred cycles.
1500 MaxStoresPerMemset = 128;
1501 MaxStoresPerMemcpy = 128;
1502 MaxStoresPerMemmove = 128;
1503 MaxLoadsPerMemcmp = 128;
1504 } else {
1507 }
1508
1509 IsStrictFPEnabled = true;
1510
1511 // Let the subtarget (CPU) decide if a predictable select is more expensive
1512 // than the corresponding branch. This information is used in CGP to decide
1513 // when to convert selects into branches.
1515}
1516
1517// *********************************** NOTE ************************************
1518// For selecting load and store instructions, the addressing modes are defined
1519// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1520// patterns to match the load the store instructions.
1521//
1522// The TD definitions for the addressing modes correspond to their respective
1523// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1524// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1525// address mode flags of a particular node. Afterwards, the computed address
1526// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1527// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1528// accordingly, based on the preferred addressing mode.
1529//
1530// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1531// MemOpFlags contains all the possible flags that can be used to compute the
1532// optimal addressing mode for load and store instructions.
1533// AddrMode contains all the possible load and store addressing modes available
1534// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1535//
1536// When adding new load and store instructions, it is possible that new address
1537// flags may need to be added into MemOpFlags, and a new addressing mode will
1538// need to be added to AddrMode. An entry of the new addressing mode (consisting
1539// of the minimal and main distinguishing address flags for the new load/store
1540// instructions) will need to be added into initializeAddrModeMap() below.
1541// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1542// need to be updated to account for selecting the optimal addressing mode.
1543// *****************************************************************************
1544/// Initialize the map that relates the different addressing modes of the load
1545/// and store instructions to a set of flags. This ensures the load/store
1546/// instruction is correctly matched during instruction selection.
1547void PPCTargetLowering::initializeAddrModeMap() {
1548 AddrModesMap[PPC::AM_DForm] = {
1549 // LWZ, STW
1554 // LBZ, LHZ, STB, STH
1559 // LHA
1564 // LFS, LFD, STFS, STFD
1569 };
1570 AddrModesMap[PPC::AM_DSForm] = {
1571 // LWA
1575 // LD, STD
1579 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1583 };
1584 AddrModesMap[PPC::AM_DQForm] = {
1585 // LXV, STXV
1589 };
1590 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1592 // TODO: Add mapping for quadword load/store.
1593}
1594
1595/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1596/// the desired ByVal argument alignment.
1597static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1598 if (MaxAlign == MaxMaxAlign)
1599 return;
1600 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1601 if (MaxMaxAlign >= 32 &&
1602 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1603 MaxAlign = Align(32);
1604 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1605 MaxAlign < 16)
1606 MaxAlign = Align(16);
1607 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1608 Align EltAlign;
1609 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1610 if (EltAlign > MaxAlign)
1611 MaxAlign = EltAlign;
1612 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1613 for (auto *EltTy : STy->elements()) {
1614 Align EltAlign;
1615 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1616 if (EltAlign > MaxAlign)
1617 MaxAlign = EltAlign;
1618 if (MaxAlign == MaxMaxAlign)
1619 break;
1620 }
1621 }
1622}
1623
1624/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1625/// function arguments in the caller parameter area.
1627 const DataLayout &DL) const {
1628 // 16byte and wider vectors are passed on 16byte boundary.
1629 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1630 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1631 if (Subtarget.hasAltivec())
1632 getMaxByValAlign(Ty, Alignment, Align(16));
1633 return Alignment.value();
1634}
1635
1637 return Subtarget.useSoftFloat();
1638}
1639
1641 return Subtarget.hasSPE();
1642}
1643
1645 return VT.isScalarInteger();
1646}
1647
1649 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1650 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1651 return false;
1652
1653 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1654 if (VTy->getScalarType()->isIntegerTy()) {
1655 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1656 if (ElemSizeInBits == 32) {
1657 Index = Subtarget.isLittleEndian() ? 2 : 1;
1658 return true;
1659 }
1660 if (ElemSizeInBits == 64) {
1661 Index = Subtarget.isLittleEndian() ? 1 : 0;
1662 return true;
1663 }
1664 }
1665 }
1666 return false;
1667}
1668
1669const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1670 switch ((PPCISD::NodeType)Opcode) {
1671 case PPCISD::FIRST_NUMBER: break;
1672 case PPCISD::FSEL: return "PPCISD::FSEL";
1673 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1674 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1675 case PPCISD::FCFID: return "PPCISD::FCFID";
1676 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1677 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1678 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1679 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1680 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1681 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1682 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1683 case PPCISD::FRE: return "PPCISD::FRE";
1684 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1685 case PPCISD::FTSQRT:
1686 return "PPCISD::FTSQRT";
1687 case PPCISD::FSQRT:
1688 return "PPCISD::FSQRT";
1689 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1690 case PPCISD::VPERM: return "PPCISD::VPERM";
1691 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1693 return "PPCISD::XXSPLTI_SP_TO_DP";
1695 return "PPCISD::XXSPLTI32DX";
1696 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1697 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1698 case PPCISD::XXPERM:
1699 return "PPCISD::XXPERM";
1700 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1701 case PPCISD::CMPB: return "PPCISD::CMPB";
1702 case PPCISD::Hi: return "PPCISD::Hi";
1703 case PPCISD::Lo: return "PPCISD::Lo";
1704 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1705 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1706 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1707 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1708 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1709 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1710 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1711 case PPCISD::SRL: return "PPCISD::SRL";
1712 case PPCISD::SRA: return "PPCISD::SRA";
1713 case PPCISD::SHL: return "PPCISD::SHL";
1714 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1715 case PPCISD::CALL: return "PPCISD::CALL";
1716 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1717 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1718 case PPCISD::CALL_RM:
1719 return "PPCISD::CALL_RM";
1721 return "PPCISD::CALL_NOP_RM";
1723 return "PPCISD::CALL_NOTOC_RM";
1724 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1725 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1726 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1727 case PPCISD::BCTRL_RM:
1728 return "PPCISD::BCTRL_RM";
1730 return "PPCISD::BCTRL_LOAD_TOC_RM";
1731 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1732 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1733 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1734 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1735 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1736 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1737 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1738 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1739 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1740 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1742 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1744 return "PPCISD::ANDI_rec_1_EQ_BIT";
1746 return "PPCISD::ANDI_rec_1_GT_BIT";
1747 case PPCISD::VCMP: return "PPCISD::VCMP";
1748 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1749 case PPCISD::LBRX: return "PPCISD::LBRX";
1750 case PPCISD::STBRX: return "PPCISD::STBRX";
1751 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1752 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1753 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1754 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1755 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1756 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1757 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1758 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1759 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1761 return "PPCISD::ST_VSR_SCAL_INT";
1762 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1763 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1764 case PPCISD::BDZ: return "PPCISD::BDZ";
1765 case PPCISD::MFFS: return "PPCISD::MFFS";
1766 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1767 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1768 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1769 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1770 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1771 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1772 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1773 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1774 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1775 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1776 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1777 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1778 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1779 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1780 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1781 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1782 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1783 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1784 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1785 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1786 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1787 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1788 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1790 return "PPCISD::PADDI_DTPREL";
1791 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1792 case PPCISD::SC: return "PPCISD::SC";
1793 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1794 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1795 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1796 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1797 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1798 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1799 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1800 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1801 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1802 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1803 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1804 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1806 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1808 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1809 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1810 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1811 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1812 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1813 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1814 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1815 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1816 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1818 return "PPCISD::STRICT_FADDRTZ";
1820 return "PPCISD::STRICT_FCTIDZ";
1822 return "PPCISD::STRICT_FCTIWZ";
1824 return "PPCISD::STRICT_FCTIDUZ";
1826 return "PPCISD::STRICT_FCTIWUZ";
1828 return "PPCISD::STRICT_FCFID";
1830 return "PPCISD::STRICT_FCFIDU";
1832 return "PPCISD::STRICT_FCFIDS";
1834 return "PPCISD::STRICT_FCFIDUS";
1835 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1836 case PPCISD::STORE_COND:
1837 return "PPCISD::STORE_COND";
1838 }
1839 return nullptr;
1840}
1841
1843 EVT VT) const {
1844 if (!VT.isVector())
1845 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1846
1848}
1849
1851 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1852 return true;
1853}
1854
1855//===----------------------------------------------------------------------===//
1856// Node matching predicates, for use by the tblgen matching code.
1857//===----------------------------------------------------------------------===//
1858
1859/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1861 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1862 return CFP->getValueAPF().isZero();
1863 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1864 // Maybe this has already been legalized into the constant pool?
1865 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1866 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1867 return CFP->getValueAPF().isZero();
1868 }
1869 return false;
1870}
1871
1872/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1873/// true if Op is undef or if it matches the specified value.
1874static bool isConstantOrUndef(int Op, int Val) {
1875 return Op < 0 || Op == Val;
1876}
1877
1878/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1879/// VPKUHUM instruction.
1880/// The ShuffleKind distinguishes between big-endian operations with
1881/// two different inputs (0), either-endian operations with two identical
1882/// inputs (1), and little-endian operations with two different inputs (2).
1883/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1885 SelectionDAG &DAG) {
1886 bool IsLE = DAG.getDataLayout().isLittleEndian();
1887 if (ShuffleKind == 0) {
1888 if (IsLE)
1889 return false;
1890 for (unsigned i = 0; i != 16; ++i)
1891 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1892 return false;
1893 } else if (ShuffleKind == 2) {
1894 if (!IsLE)
1895 return false;
1896 for (unsigned i = 0; i != 16; ++i)
1897 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1898 return false;
1899 } else if (ShuffleKind == 1) {
1900 unsigned j = IsLE ? 0 : 1;
1901 for (unsigned i = 0; i != 8; ++i)
1902 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1903 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1904 return false;
1905 }
1906 return true;
1907}
1908
1909/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1910/// VPKUWUM instruction.
1911/// The ShuffleKind distinguishes between big-endian operations with
1912/// two different inputs (0), either-endian operations with two identical
1913/// inputs (1), and little-endian operations with two different inputs (2).
1914/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1916 SelectionDAG &DAG) {
1917 bool IsLE = DAG.getDataLayout().isLittleEndian();
1918 if (ShuffleKind == 0) {
1919 if (IsLE)
1920 return false;
1921 for (unsigned i = 0; i != 16; i += 2)
1922 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1923 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1924 return false;
1925 } else if (ShuffleKind == 2) {
1926 if (!IsLE)
1927 return false;
1928 for (unsigned i = 0; i != 16; i += 2)
1929 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1930 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1931 return false;
1932 } else if (ShuffleKind == 1) {
1933 unsigned j = IsLE ? 0 : 2;
1934 for (unsigned i = 0; i != 8; i += 2)
1935 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1936 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1937 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1938 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1939 return false;
1940 }
1941 return true;
1942}
1943
1944/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1945/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1946/// current subtarget.
1947///
1948/// The ShuffleKind distinguishes between big-endian operations with
1949/// two different inputs (0), either-endian operations with two identical
1950/// inputs (1), and little-endian operations with two different inputs (2).
1951/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1953 SelectionDAG &DAG) {
1954 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1955 if (!Subtarget.hasP8Vector())
1956 return false;
1957
1958 bool IsLE = DAG.getDataLayout().isLittleEndian();
1959 if (ShuffleKind == 0) {
1960 if (IsLE)
1961 return false;
1962 for (unsigned i = 0; i != 16; i += 4)
1963 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1964 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1965 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1966 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1967 return false;
1968 } else if (ShuffleKind == 2) {
1969 if (!IsLE)
1970 return false;
1971 for (unsigned i = 0; i != 16; i += 4)
1972 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1973 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1974 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1975 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1976 return false;
1977 } else if (ShuffleKind == 1) {
1978 unsigned j = IsLE ? 0 : 4;
1979 for (unsigned i = 0; i != 8; i += 4)
1980 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1981 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1982 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1983 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1984 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1985 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1986 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1987 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1988 return false;
1989 }
1990 return true;
1991}
1992
1993/// isVMerge - Common function, used to match vmrg* shuffles.
1994///
1995static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1996 unsigned LHSStart, unsigned RHSStart) {
1997 if (N->getValueType(0) != MVT::v16i8)
1998 return false;
1999 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2000 "Unsupported merge size!");
2001
2002 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2003 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2004 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2005 LHSStart+j+i*UnitSize) ||
2006 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2007 RHSStart+j+i*UnitSize))
2008 return false;
2009 }
2010 return true;
2011}
2012
2013/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2014/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2015/// The ShuffleKind distinguishes between big-endian merges with two
2016/// different inputs (0), either-endian merges with two identical inputs (1),
2017/// and little-endian merges with two different inputs (2). For the latter,
2018/// the input operands are swapped (see PPCInstrAltivec.td).
2020 unsigned ShuffleKind, SelectionDAG &DAG) {
2021 if (DAG.getDataLayout().isLittleEndian()) {
2022 if (ShuffleKind == 1) // unary
2023 return isVMerge(N, UnitSize, 0, 0);
2024 else if (ShuffleKind == 2) // swapped
2025 return isVMerge(N, UnitSize, 0, 16);
2026 else
2027 return false;
2028 } else {
2029 if (ShuffleKind == 1) // unary
2030 return isVMerge(N, UnitSize, 8, 8);
2031 else if (ShuffleKind == 0) // normal
2032 return isVMerge(N, UnitSize, 8, 24);
2033 else
2034 return false;
2035 }
2036}
2037
2038/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2039/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2040/// The ShuffleKind distinguishes between big-endian merges with two
2041/// different inputs (0), either-endian merges with two identical inputs (1),
2042/// and little-endian merges with two different inputs (2). For the latter,
2043/// the input operands are swapped (see PPCInstrAltivec.td).
2045 unsigned ShuffleKind, SelectionDAG &DAG) {
2046 if (DAG.getDataLayout().isLittleEndian()) {
2047 if (ShuffleKind == 1) // unary
2048 return isVMerge(N, UnitSize, 8, 8);
2049 else if (ShuffleKind == 2) // swapped
2050 return isVMerge(N, UnitSize, 8, 24);
2051 else
2052 return false;
2053 } else {
2054 if (ShuffleKind == 1) // unary
2055 return isVMerge(N, UnitSize, 0, 0);
2056 else if (ShuffleKind == 0) // normal
2057 return isVMerge(N, UnitSize, 0, 16);
2058 else
2059 return false;
2060 }
2061}
2062
2063/**
2064 * Common function used to match vmrgew and vmrgow shuffles
2065 *
2066 * The indexOffset determines whether to look for even or odd words in
2067 * the shuffle mask. This is based on the of the endianness of the target
2068 * machine.
2069 * - Little Endian:
2070 * - Use offset of 0 to check for odd elements
2071 * - Use offset of 4 to check for even elements
2072 * - Big Endian:
2073 * - Use offset of 0 to check for even elements
2074 * - Use offset of 4 to check for odd elements
2075 * A detailed description of the vector element ordering for little endian and
2076 * big endian can be found at
2077 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2078 * Targeting your applications - what little endian and big endian IBM XL C/C++
2079 * compiler differences mean to you
2080 *
2081 * The mask to the shuffle vector instruction specifies the indices of the
2082 * elements from the two input vectors to place in the result. The elements are
2083 * numbered in array-access order, starting with the first vector. These vectors
2084 * are always of type v16i8, thus each vector will contain 16 elements of size
2085 * 8. More info on the shuffle vector can be found in the
2086 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2087 * Language Reference.
2088 *
2089 * The RHSStartValue indicates whether the same input vectors are used (unary)
2090 * or two different input vectors are used, based on the following:
2091 * - If the instruction uses the same vector for both inputs, the range of the
2092 * indices will be 0 to 15. In this case, the RHSStart value passed should
2093 * be 0.
2094 * - If the instruction has two different vectors then the range of the
2095 * indices will be 0 to 31. In this case, the RHSStart value passed should
2096 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2097 * to 31 specify elements in the second vector).
2098 *
2099 * \param[in] N The shuffle vector SD Node to analyze
2100 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2101 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2102 * vector to the shuffle_vector instruction
2103 * \return true iff this shuffle vector represents an even or odd word merge
2104 */
2105static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2106 unsigned RHSStartValue) {
2107 if (N->getValueType(0) != MVT::v16i8)
2108 return false;
2109
2110 for (unsigned i = 0; i < 2; ++i)
2111 for (unsigned j = 0; j < 4; ++j)
2112 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2113 i*RHSStartValue+j+IndexOffset) ||
2114 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2115 i*RHSStartValue+j+IndexOffset+8))
2116 return false;
2117 return true;
2118}
2119
2120/**
2121 * Determine if the specified shuffle mask is suitable for the vmrgew or
2122 * vmrgow instructions.
2123 *
2124 * \param[in] N The shuffle vector SD Node to analyze
2125 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2126 * \param[in] ShuffleKind Identify the type of merge:
2127 * - 0 = big-endian merge with two different inputs;
2128 * - 1 = either-endian merge with two identical inputs;
2129 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2130 * little-endian merges).
2131 * \param[in] DAG The current SelectionDAG
2132 * \return true iff this shuffle mask
2133 */
2135 unsigned ShuffleKind, SelectionDAG &DAG) {
2136 if (DAG.getDataLayout().isLittleEndian()) {
2137 unsigned indexOffset = CheckEven ? 4 : 0;
2138 if (ShuffleKind == 1) // Unary
2139 return isVMerge(N, indexOffset, 0);
2140 else if (ShuffleKind == 2) // swapped
2141 return isVMerge(N, indexOffset, 16);
2142 else
2143 return false;
2144 }
2145 else {
2146 unsigned indexOffset = CheckEven ? 0 : 4;
2147 if (ShuffleKind == 1) // Unary
2148 return isVMerge(N, indexOffset, 0);
2149 else if (ShuffleKind == 0) // Normal
2150 return isVMerge(N, indexOffset, 16);
2151 else
2152 return false;
2153 }
2154 return false;
2155}
2156
2157/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2158/// amount, otherwise return -1.
2159/// The ShuffleKind distinguishes between big-endian operations with two
2160/// different inputs (0), either-endian operations with two identical inputs
2161/// (1), and little-endian operations with two different inputs (2). For the
2162/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2163int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2164 SelectionDAG &DAG) {
2165 if (N->getValueType(0) != MVT::v16i8)
2166 return -1;
2167
2168 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2169
2170 // Find the first non-undef value in the shuffle mask.
2171 unsigned i;
2172 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2173 /*search*/;
2174
2175 if (i == 16) return -1; // all undef.
2176
2177 // Otherwise, check to see if the rest of the elements are consecutively
2178 // numbered from this value.
2179 unsigned ShiftAmt = SVOp->getMaskElt(i);
2180 if (ShiftAmt < i) return -1;
2181
2182 ShiftAmt -= i;
2183 bool isLE = DAG.getDataLayout().isLittleEndian();
2184
2185 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2186 // Check the rest of the elements to see if they are consecutive.
2187 for (++i; i != 16; ++i)
2188 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2189 return -1;
2190 } else if (ShuffleKind == 1) {
2191 // Check the rest of the elements to see if they are consecutive.
2192 for (++i; i != 16; ++i)
2193 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2194 return -1;
2195 } else
2196 return -1;
2197
2198 if (isLE)
2199 ShiftAmt = 16 - ShiftAmt;
2200
2201 return ShiftAmt;
2202}
2203
2204/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2205/// specifies a splat of a single element that is suitable for input to
2206/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2208 EVT VT = N->getValueType(0);
2209 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2210 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2211
2212 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2213 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2214
2215 // The consecutive indices need to specify an element, not part of two
2216 // different elements. So abandon ship early if this isn't the case.
2217 if (N->getMaskElt(0) % EltSize != 0)
2218 return false;
2219
2220 // This is a splat operation if each element of the permute is the same, and
2221 // if the value doesn't reference the second vector.
2222 unsigned ElementBase = N->getMaskElt(0);
2223
2224 // FIXME: Handle UNDEF elements too!
2225 if (ElementBase >= 16)
2226 return false;
2227
2228 // Check that the indices are consecutive, in the case of a multi-byte element
2229 // splatted with a v16i8 mask.
2230 for (unsigned i = 1; i != EltSize; ++i)
2231 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2232 return false;
2233
2234 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2235 if (N->getMaskElt(i) < 0) continue;
2236 for (unsigned j = 0; j != EltSize; ++j)
2237 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2238 return false;
2239 }
2240 return true;
2241}
2242
2243/// Check that the mask is shuffling N byte elements. Within each N byte
2244/// element of the mask, the indices could be either in increasing or
2245/// decreasing order as long as they are consecutive.
2246/// \param[in] N the shuffle vector SD Node to analyze
2247/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2248/// Word/DoubleWord/QuadWord).
2249/// \param[in] StepLen the delta indices number among the N byte element, if
2250/// the mask is in increasing/decreasing order then it is 1/-1.
2251/// \return true iff the mask is shuffling N byte elements.
2252static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2253 int StepLen) {
2254 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2255 "Unexpected element width.");
2256 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2257
2258 unsigned NumOfElem = 16 / Width;
2259 unsigned MaskVal[16]; // Width is never greater than 16
2260 for (unsigned i = 0; i < NumOfElem; ++i) {
2261 MaskVal[0] = N->getMaskElt(i * Width);
2262 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2263 return false;
2264 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2265 return false;
2266 }
2267
2268 for (unsigned int j = 1; j < Width; ++j) {
2269 MaskVal[j] = N->getMaskElt(i * Width + j);
2270 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2271 return false;
2272 }
2273 }
2274 }
2275
2276 return true;
2277}
2278
2279bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2280 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2281 if (!isNByteElemShuffleMask(N, 4, 1))
2282 return false;
2283
2284 // Now we look at mask elements 0,4,8,12
2285 unsigned M0 = N->getMaskElt(0) / 4;
2286 unsigned M1 = N->getMaskElt(4) / 4;
2287 unsigned M2 = N->getMaskElt(8) / 4;
2288 unsigned M3 = N->getMaskElt(12) / 4;
2289 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2290 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2291
2292 // Below, let H and L be arbitrary elements of the shuffle mask
2293 // where H is in the range [4,7] and L is in the range [0,3].
2294 // H, 1, 2, 3 or L, 5, 6, 7
2295 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2296 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2297 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2298 InsertAtByte = IsLE ? 12 : 0;
2299 Swap = M0 < 4;
2300 return true;
2301 }
2302 // 0, H, 2, 3 or 4, L, 6, 7
2303 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2304 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2305 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2306 InsertAtByte = IsLE ? 8 : 4;
2307 Swap = M1 < 4;
2308 return true;
2309 }
2310 // 0, 1, H, 3 or 4, 5, L, 7
2311 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2312 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2313 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2314 InsertAtByte = IsLE ? 4 : 8;
2315 Swap = M2 < 4;
2316 return true;
2317 }
2318 // 0, 1, 2, H or 4, 5, 6, L
2319 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2320 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2321 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2322 InsertAtByte = IsLE ? 0 : 12;
2323 Swap = M3 < 4;
2324 return true;
2325 }
2326
2327 // If both vector operands for the shuffle are the same vector, the mask will
2328 // contain only elements from the first one and the second one will be undef.
2329 if (N->getOperand(1).isUndef()) {
2330 ShiftElts = 0;
2331 Swap = true;
2332 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2333 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2334 InsertAtByte = IsLE ? 12 : 0;
2335 return true;
2336 }
2337 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2338 InsertAtByte = IsLE ? 8 : 4;
2339 return true;
2340 }
2341 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2342 InsertAtByte = IsLE ? 4 : 8;
2343 return true;
2344 }
2345 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2346 InsertAtByte = IsLE ? 0 : 12;
2347 return true;
2348 }
2349 }
2350
2351 return false;
2352}
2353
2355 bool &Swap, bool IsLE) {
2356 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2357 // Ensure each byte index of the word is consecutive.
2358 if (!isNByteElemShuffleMask(N, 4, 1))
2359 return false;
2360
2361 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2362 unsigned M0 = N->getMaskElt(0) / 4;
2363 unsigned M1 = N->getMaskElt(4) / 4;
2364 unsigned M2 = N->getMaskElt(8) / 4;
2365 unsigned M3 = N->getMaskElt(12) / 4;
2366
2367 // If both vector operands for the shuffle are the same vector, the mask will
2368 // contain only elements from the first one and the second one will be undef.
2369 if (N->getOperand(1).isUndef()) {
2370 assert(M0 < 4 && "Indexing into an undef vector?");
2371 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2372 return false;
2373
2374 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2375 Swap = false;
2376 return true;
2377 }
2378
2379 // Ensure each word index of the ShuffleVector Mask is consecutive.
2380 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2381 return false;
2382
2383 if (IsLE) {
2384 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2385 // Input vectors don't need to be swapped if the leading element
2386 // of the result is one of the 3 left elements of the second vector
2387 // (or if there is no shift to be done at all).
2388 Swap = false;
2389 ShiftElts = (8 - M0) % 8;
2390 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2391 // Input vectors need to be swapped if the leading element
2392 // of the result is one of the 3 left elements of the first vector
2393 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2394 Swap = true;
2395 ShiftElts = (4 - M0) % 4;
2396 }
2397
2398 return true;
2399 } else { // BE
2400 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2401 // Input vectors don't need to be swapped if the leading element
2402 // of the result is one of the 4 elements of the first vector.
2403 Swap = false;
2404 ShiftElts = M0;
2405 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2406 // Input vectors need to be swapped if the leading element
2407 // of the result is one of the 4 elements of the right vector.
2408 Swap = true;
2409 ShiftElts = M0 - 4;
2410 }
2411
2412 return true;
2413 }
2414}
2415
2417 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2418
2419 if (!isNByteElemShuffleMask(N, Width, -1))
2420 return false;
2421
2422 for (int i = 0; i < 16; i += Width)
2423 if (N->getMaskElt(i) != i + Width - 1)
2424 return false;
2425
2426 return true;
2427}
2428
2430 return isXXBRShuffleMaskHelper(N, 2);
2431}
2432
2434 return isXXBRShuffleMaskHelper(N, 4);
2435}
2436
2438 return isXXBRShuffleMaskHelper(N, 8);
2439}
2440
2442 return isXXBRShuffleMaskHelper(N, 16);
2443}
2444
2445/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2446/// if the inputs to the instruction should be swapped and set \p DM to the
2447/// value for the immediate.
2448/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2449/// AND element 0 of the result comes from the first input (LE) or second input
2450/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2451/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2452/// mask.
2454 bool &Swap, bool IsLE) {
2455 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2456
2457 // Ensure each byte index of the double word is consecutive.
2458 if (!isNByteElemShuffleMask(N, 8, 1))
2459 return false;
2460
2461 unsigned M0 = N->getMaskElt(0) / 8;
2462 unsigned M1 = N->getMaskElt(8) / 8;
2463 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2464
2465 // If both vector operands for the shuffle are the same vector, the mask will
2466 // contain only elements from the first one and the second one will be undef.
2467 if (N->getOperand(1).isUndef()) {
2468 if ((M0 | M1) < 2) {
2469 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2470 Swap = false;
2471 return true;
2472 } else
2473 return false;
2474 }
2475
2476 if (IsLE) {
2477 if (M0 > 1 && M1 < 2) {
2478 Swap = false;
2479 } else if (M0 < 2 && M1 > 1) {
2480 M0 = (M0 + 2) % 4;
2481 M1 = (M1 + 2) % 4;
2482 Swap = true;
2483 } else
2484 return false;
2485
2486 // Note: if control flow comes here that means Swap is already set above
2487 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2488 return true;
2489 } else { // BE
2490 if (M0 < 2 && M1 > 1) {
2491 Swap = false;
2492 } else if (M0 > 1 && M1 < 2) {
2493 M0 = (M0 + 2) % 4;
2494 M1 = (M1 + 2) % 4;
2495 Swap = true;
2496 } else
2497 return false;
2498
2499 // Note: if control flow comes here that means Swap is already set above
2500 DM = (M0 << 1) + (M1 & 1);
2501 return true;
2502 }
2503}
2504
2505
2506/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2507/// appropriate for PPC mnemonics (which have a big endian bias - namely
2508/// elements are counted from the left of the vector register).
2509unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2510 SelectionDAG &DAG) {
2511 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2512 assert(isSplatShuffleMask(SVOp, EltSize));
2513 EVT VT = SVOp->getValueType(0);
2514
2515 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2516 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2517 : SVOp->getMaskElt(0);
2518
2519 if (DAG.getDataLayout().isLittleEndian())
2520 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2521 else
2522 return SVOp->getMaskElt(0) / EltSize;
2523}
2524
2525/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2526/// by using a vspltis[bhw] instruction of the specified element size, return
2527/// the constant being splatted. The ByteSize field indicates the number of
2528/// bytes of each element [124] -> [bhw].
2530 SDValue OpVal;
2531
2532 // If ByteSize of the splat is bigger than the element size of the
2533 // build_vector, then we have a case where we are checking for a splat where
2534 // multiple elements of the buildvector are folded together into a single
2535 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2536 unsigned EltSize = 16/N->getNumOperands();
2537 if (EltSize < ByteSize) {
2538 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2539 SDValue UniquedVals[4];
2540 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2541
2542 // See if all of the elements in the buildvector agree across.
2543 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2544 if (N->getOperand(i).isUndef()) continue;
2545 // If the element isn't a constant, bail fully out.
2546 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2547
2548 if (!UniquedVals[i&(Multiple-1)].getNode())
2549 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2550 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2551 return SDValue(); // no match.
2552 }
2553
2554 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2555 // either constant or undef values that are identical for each chunk. See
2556 // if these chunks can form into a larger vspltis*.
2557
2558 // Check to see if all of the leading entries are either 0 or -1. If
2559 // neither, then this won't fit into the immediate field.
2560 bool LeadingZero = true;
2561 bool LeadingOnes = true;
2562 for (unsigned i = 0; i != Multiple-1; ++i) {
2563 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2564
2565 LeadingZero &= isNullConstant(UniquedVals[i]);
2566 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2567 }
2568 // Finally, check the least significant entry.
2569 if (LeadingZero) {
2570 if (!UniquedVals[Multiple-1].getNode())
2571 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2572 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2573 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2574 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2575 }
2576 if (LeadingOnes) {
2577 if (!UniquedVals[Multiple-1].getNode())
2578 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2579 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2580 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2581 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2582 }
2583
2584 return SDValue();
2585 }
2586
2587 // Check to see if this buildvec has a single non-undef value in its elements.
2588 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2589 if (N->getOperand(i).isUndef()) continue;
2590 if (!OpVal.getNode())
2591 OpVal = N->getOperand(i);
2592 else if (OpVal != N->getOperand(i))
2593 return SDValue();
2594 }
2595
2596 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2597
2598 unsigned ValSizeInBytes = EltSize;
2599 uint64_t Value = 0;
2600 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2601 Value = CN->getZExtValue();
2602 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2603 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2604 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2605 }
2606
2607 // If the splat value is larger than the element value, then we can never do
2608 // this splat. The only case that we could fit the replicated bits into our
2609 // immediate field for would be zero, and we prefer to use vxor for it.
2610 if (ValSizeInBytes < ByteSize) return SDValue();
2611
2612 // If the element value is larger than the splat value, check if it consists
2613 // of a repeated bit pattern of size ByteSize.
2614 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2615 return SDValue();
2616
2617 // Properly sign extend the value.
2618 int MaskVal = SignExtend32(Value, ByteSize * 8);
2619
2620 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2621 if (MaskVal == 0) return SDValue();
2622
2623 // Finally, if this value fits in a 5 bit sext field, return it
2624 if (SignExtend32<5>(MaskVal) == MaskVal)
2625 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2626 return SDValue();
2627}
2628
2629//===----------------------------------------------------------------------===//
2630// Addressing Mode Selection
2631//===----------------------------------------------------------------------===//
2632
2633/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2634/// or 64-bit immediate, and if the value can be accurately represented as a
2635/// sign extension from a 16-bit value. If so, this returns true and the
2636/// immediate.
2637bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2638 if (!isa<ConstantSDNode>(N))
2639 return false;
2640
2641 Imm = (int16_t)N->getAsZExtVal();
2642 if (N->getValueType(0) == MVT::i32)
2643 return Imm == (int32_t)N->getAsZExtVal();
2644 else
2645 return Imm == (int64_t)N->getAsZExtVal();
2646}
2648 return isIntS16Immediate(Op.getNode(), Imm);
2649}
2650
2651/// Used when computing address flags for selecting loads and stores.
2652/// If we have an OR, check if the LHS and RHS are provably disjoint.
2653/// An OR of two provably disjoint values is equivalent to an ADD.
2654/// Most PPC load/store instructions compute the effective address as a sum,
2655/// so doing this conversion is useful.
2656static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2657 if (N.getOpcode() != ISD::OR)
2658 return false;
2659 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2660 if (!LHSKnown.Zero.getBoolValue())
2661 return false;
2662 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2663 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2664}
2665
2666/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2667/// be represented as an indexed [r+r] operation.
2669 SDValue &Index,
2670 SelectionDAG &DAG) const {
2671 for (SDNode *U : N->uses()) {
2672 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2673 if (Memop->getMemoryVT() == MVT::f64) {
2674 Base = N.getOperand(0);
2675 Index = N.getOperand(1);
2676 return true;
2677 }
2678 }
2679 }
2680 return false;
2681}
2682
2683/// isIntS34Immediate - This method tests if value of node given can be
2684/// accurately represented as a sign extension from a 34-bit value. If so,
2685/// this returns true and the immediate.
2686bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2687 if (!isa<ConstantSDNode>(N))
2688 return false;
2689
2690 Imm = (int64_t)N->getAsZExtVal();
2691 return isInt<34>(Imm);
2692}
2694 return isIntS34Immediate(Op.getNode(), Imm);
2695}
2696
2697/// SelectAddressRegReg - Given the specified addressed, check to see if it
2698/// can be represented as an indexed [r+r] operation. Returns false if it
2699/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2700/// non-zero and N can be represented by a base register plus a signed 16-bit
2701/// displacement, make a more precise judgement by checking (displacement % \p
2702/// EncodingAlignment).
2705 MaybeAlign EncodingAlignment) const {
2706 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2707 // a [pc+imm].
2709 return false;
2710
2711 int16_t Imm = 0;
2712 if (N.getOpcode() == ISD::ADD) {
2713 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2714 // SPE load/store can only handle 8-bit offsets.
2715 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2716 return true;
2717 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2718 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2719 return false; // r+i
2720 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2721 return false; // r+i
2722
2723 Base = N.getOperand(0);
2724 Index = N.getOperand(1);
2725 return true;
2726 } else if (N.getOpcode() == ISD::OR) {
2727 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2728 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2729 return false; // r+i can fold it if we can.
2730
2731 // If this is an or of disjoint bitfields, we can codegen this as an add
2732 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2733 // disjoint.
2734 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2735
2736 if (LHSKnown.Zero.getBoolValue()) {
2737 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2738 // If all of the bits are known zero on the LHS or RHS, the add won't
2739 // carry.
2740 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2741 Base = N.getOperand(0);
2742 Index = N.getOperand(1);
2743 return true;
2744 }
2745 }
2746 }
2747
2748 return false;
2749}
2750
2751// If we happen to be doing an i64 load or store into a stack slot that has
2752// less than a 4-byte alignment, then the frame-index elimination may need to
2753// use an indexed load or store instruction (because the offset may not be a
2754// multiple of 4). The extra register needed to hold the offset comes from the
2755// register scavenger, and it is possible that the scavenger will need to use
2756// an emergency spill slot. As a result, we need to make sure that a spill slot
2757// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2758// stack slot.
2759static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2760 // FIXME: This does not handle the LWA case.
2761 if (VT != MVT::i64)
2762 return;
2763
2764 // NOTE: We'll exclude negative FIs here, which come from argument
2765 // lowering, because there are no known test cases triggering this problem
2766 // using packed structures (or similar). We can remove this exclusion if
2767 // we find such a test case. The reason why this is so test-case driven is
2768 // because this entire 'fixup' is only to prevent crashes (from the
2769 // register scavenger) on not-really-valid inputs. For example, if we have:
2770 // %a = alloca i1
2771 // %b = bitcast i1* %a to i64*
2772 // store i64* a, i64 b
2773 // then the store should really be marked as 'align 1', but is not. If it
2774 // were marked as 'align 1' then the indexed form would have been
2775 // instruction-selected initially, and the problem this 'fixup' is preventing
2776 // won't happen regardless.
2777 if (FrameIdx < 0)
2778 return;
2779
2781 MachineFrameInfo &MFI = MF.getFrameInfo();
2782
2783 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2784 return;
2785
2786 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2787 FuncInfo->setHasNonRISpills();
2788}
2789
2790/// Returns true if the address N can be represented by a base register plus
2791/// a signed 16-bit displacement [r+imm], and if it is not better
2792/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2793/// displacements that are multiples of that value.
2795 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2796 MaybeAlign EncodingAlignment) const {
2797 // FIXME dl should come from parent load or store, not from address
2798 SDLoc dl(N);
2799
2800 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2801 // a [pc+imm].
2803 return false;
2804
2805 // If this can be more profitably realized as r+r, fail.
2806 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2807 return false;
2808
2809 if (N.getOpcode() == ISD::ADD) {
2810 int16_t imm = 0;
2811 if (isIntS16Immediate(N.getOperand(1), imm) &&
2812 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2813 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2814 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2815 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2816 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2817 } else {
2818 Base = N.getOperand(0);
2819 }
2820 return true; // [r+i]
2821 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2822 // Match LOAD (ADD (X, Lo(G))).
2823 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2824 "Cannot handle constant offsets yet!");
2825 Disp = N.getOperand(1).getOperand(0); // The global address.
2830 Base = N.getOperand(0);
2831 return true; // [&g+r]
2832 }
2833 } else if (N.getOpcode() == ISD::OR) {
2834 int16_t imm = 0;
2835 if (isIntS16Immediate(N.getOperand(1), imm) &&
2836 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2837 // If this is an or of disjoint bitfields, we can codegen this as an add
2838 // (for better address arithmetic) if the LHS and RHS of the OR are
2839 // provably disjoint.
2840 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2841
2842 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2843 // If all of the bits are known zero on the LHS or RHS, the add won't
2844 // carry.
2845 if (FrameIndexSDNode *FI =
2846 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2847 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2848 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2849 } else {
2850 Base = N.getOperand(0);
2851 }
2852 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2853 return true;
2854 }
2855 }
2856 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2857 // Loading from a constant address.
2858
2859 // If this address fits entirely in a 16-bit sext immediate field, codegen
2860 // this as "d, 0"
2861 int16_t Imm;
2862 if (isIntS16Immediate(CN, Imm) &&
2863 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2864 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2865 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2866 CN->getValueType(0));
2867 return true;
2868 }
2869
2870 // Handle 32-bit sext immediates with LIS + addr mode.
2871 if ((CN->getValueType(0) == MVT::i32 ||
2872 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2873 (!EncodingAlignment ||
2874 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2875 int Addr = (int)CN->getZExtValue();
2876
2877 // Otherwise, break this down into an LIS + disp.
2878 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2879
2880 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2881 MVT::i32);
2882 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2883 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2884 return true;
2885 }
2886 }
2887
2888 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2889 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2890 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2891 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2892 } else
2893 Base = N;
2894 return true; // [r+0]
2895}
2896
2897/// Similar to the 16-bit case but for instructions that take a 34-bit
2898/// displacement field (prefixed loads/stores).
2900 SDValue &Base,
2901 SelectionDAG &DAG) const {
2902 // Only on 64-bit targets.
2903 if (N.getValueType() != MVT::i64)
2904 return false;
2905
2906 SDLoc dl(N);
2907 int64_t Imm = 0;
2908
2909 if (N.getOpcode() == ISD::ADD) {
2910 if (!isIntS34Immediate(N.getOperand(1), Imm))
2911 return false;
2912 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2913 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2914 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2915 else
2916 Base = N.getOperand(0);
2917 return true;
2918 }
2919
2920 if (N.getOpcode() == ISD::OR) {
2921 if (!isIntS34Immediate(N.getOperand(1), Imm))
2922 return false;
2923 // If this is an or of disjoint bitfields, we can codegen this as an add
2924 // (for better address arithmetic) if the LHS and RHS of the OR are
2925 // provably disjoint.
2926 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2927 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2928 return false;
2929 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2930 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2931 else
2932 Base = N.getOperand(0);
2933 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2934 return true;
2935 }
2936
2937 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2938 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2939 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2940 return true;
2941 }
2942
2943 return false;
2944}
2945
2946/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2947/// represented as an indexed [r+r] operation.
2949 SDValue &Index,
2950 SelectionDAG &DAG) const {
2951 // Check to see if we can easily represent this as an [r+r] address. This
2952 // will fail if it thinks that the address is more profitably represented as
2953 // reg+imm, e.g. where imm = 0.
2954 if (SelectAddressRegReg(N, Base, Index, DAG))
2955 return true;
2956
2957 // If the address is the result of an add, we will utilize the fact that the
2958 // address calculation includes an implicit add. However, we can reduce
2959 // register pressure if we do not materialize a constant just for use as the
2960 // index register. We only get rid of the add if it is not an add of a
2961 // value and a 16-bit signed constant and both have a single use.
2962 int16_t imm = 0;
2963 if (N.getOpcode() == ISD::ADD &&
2964 (!isIntS16Immediate(N.getOperand(1), imm) ||
2965 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2966 Base = N.getOperand(0);
2967 Index = N.getOperand(1);
2968 return true;
2969 }
2970
2971 // Otherwise, do it the hard way, using R0 as the base register.
2972 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2973 N.getValueType());
2974 Index = N;
2975 return true;
2976}
2977
2978template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2979 Ty *PCRelCand = dyn_cast<Ty>(N);
2980 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2981}
2982
2983/// Returns true if this address is a PC Relative address.
2984/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2985/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2987 // This is a materialize PC Relative node. Always select this as PC Relative.
2988 Base = N;
2989 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2990 return true;
2991 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2992 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2993 isValidPCRelNode<JumpTableSDNode>(N) ||
2994 isValidPCRelNode<BlockAddressSDNode>(N))
2995 return true;
2996 return false;
2997}
2998
2999/// Returns true if we should use a direct load into vector instruction
3000/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3001static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3002
3003 // If there are any other uses other than scalar to vector, then we should
3004 // keep it as a scalar load -> direct move pattern to prevent multiple
3005 // loads.
3006 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3007 if (!LD)
3008 return false;
3009
3010 EVT MemVT = LD->getMemoryVT();
3011 if (!MemVT.isSimple())
3012 return false;
3013 switch(MemVT.getSimpleVT().SimpleTy) {
3014 case MVT::i64:
3015 break;
3016 case MVT::i32:
3017 if (!ST.hasP8Vector())
3018 return false;
3019 break;
3020 case MVT::i16:
3021 case MVT::i8:
3022 if (!ST.hasP9Vector())
3023 return false;
3024 break;
3025 default:
3026 return false;
3027 }
3028
3029 SDValue LoadedVal(N, 0);
3030 if (!LoadedVal.hasOneUse())
3031 return false;
3032
3033 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3034 UI != UE; ++UI)
3035 if (UI.getUse().get().getResNo() == 0 &&
3036 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3037 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3038 return false;
3039
3040 return true;
3041}
3042
3043/// getPreIndexedAddressParts - returns true by value, base pointer and
3044/// offset pointer and addressing mode by reference if the node's address
3045/// can be legally represented as pre-indexed load / store address.
3047 SDValue &Offset,
3049 SelectionDAG &DAG) const {
3050 if (DisablePPCPreinc) return false;
3051
3052 bool isLoad = true;
3053 SDValue Ptr;
3054 EVT VT;
3055 Align Alignment;
3056 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3057 Ptr = LD->getBasePtr();
3058 VT = LD->getMemoryVT();
3059 Alignment = LD->getAlign();
3060 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3061 Ptr = ST->getBasePtr();
3062 VT = ST->getMemoryVT();
3063 Alignment = ST->getAlign();
3064 isLoad = false;
3065 } else
3066 return false;
3067
3068 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3069 // instructions because we can fold these into a more efficient instruction
3070 // instead, (such as LXSD).
3071 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3072 return false;
3073 }
3074
3075 // PowerPC doesn't have preinc load/store instructions for vectors
3076 if (VT.isVector())
3077 return false;
3078
3079 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3080 // Common code will reject creating a pre-inc form if the base pointer
3081 // is a frame index, or if N is a store and the base pointer is either
3082 // the same as or a predecessor of the value being stored. Check for
3083 // those situations here, and try with swapped Base/Offset instead.
3084 bool Swap = false;
3085
3086 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3087 Swap = true;
3088 else if (!isLoad) {
3089 SDValue Val = cast<StoreSDNode>(N)->getValue();
3090 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3091 Swap = true;
3092 }
3093
3094 if (Swap)
3096
3097 AM = ISD::PRE_INC;
3098 return true;
3099 }
3100
3101 // LDU/STU can only handle immediates that are a multiple of 4.
3102 if (VT != MVT::i64) {
3103 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3104 return false;
3105 } else {
3106 // LDU/STU need an address with at least 4-byte alignment.
3107 if (Alignment < Align(4))
3108 return false;
3109
3110 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3111 return false;
3112 }
3113
3114 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3115 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3116 // sext i32 to i64 when addr mode is r+i.
3117 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3118 LD->getExtensionType() == ISD::SEXTLOAD &&
3119 isa<ConstantSDNode>(Offset))
3120 return false;
3121 }
3122
3123 AM = ISD::PRE_INC;
3124 return true;
3125}
3126
3127//===----------------------------------------------------------------------===//
3128// LowerOperation implementation
3129//===----------------------------------------------------------------------===//
3130
3131/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3132/// and LoOpFlags to the target MO flags.
3133static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3134 unsigned &HiOpFlags, unsigned &LoOpFlags,
3135 const GlobalValue *GV = nullptr) {
3136 HiOpFlags = PPCII::MO_HA;
3137 LoOpFlags = PPCII::MO_LO;
3138
3139 // Don't use the pic base if not in PIC relocation model.
3140 if (IsPIC) {
3141 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3142 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3143 }
3144}
3145
3146static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3147 SelectionDAG &DAG) {
3148 SDLoc DL(HiPart);
3149 EVT PtrVT = HiPart.getValueType();
3150 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3151
3152 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3153 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3154
3155 // With PIC, the first instruction is actually "GR+hi(&G)".
3156 if (isPIC)
3157 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3158 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3159
3160 // Generate non-pic code that has direct accesses to the constant pool.
3161 // The address of the global is just (hi(&g)+lo(&g)).
3162 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3163}
3164
3166 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3167 FuncInfo->setUsesTOCBasePtr();
3168}
3169
3172}
3173
3174SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3175 SDValue GA) const {
3176 const bool Is64Bit = Subtarget.isPPC64();
3177 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3178 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3179 : Subtarget.isAIXABI()
3180 ? DAG.getRegister(PPC::R2, VT)
3181 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3182 SDValue Ops[] = { GA, Reg };
3183 return DAG.getMemIntrinsicNode(
3184 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3187}
3188
3189SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3190 SelectionDAG &DAG) const {
3191 EVT PtrVT = Op.getValueType();
3192 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3193 const Constant *C = CP->getConstVal();
3194
3195 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3196 // The actual address of the GlobalValue is stored in the TOC.
3197 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3198 if (Subtarget.isUsingPCRelativeCalls()) {
3199 SDLoc DL(CP);
3200 EVT Ty = getPointerTy(DAG.getDataLayout());
3201 SDValue ConstPool = DAG.getTargetConstantPool(
3202 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3203 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3204 }
3205 setUsesTOCBasePtr(DAG);
3206 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3207 return getTOCEntry(DAG, SDLoc(CP), GA);
3208 }
3209
3210 unsigned MOHiFlag, MOLoFlag;
3211 bool IsPIC = isPositionIndependent();
3212 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3213
3214 if (IsPIC && Subtarget.isSVR4ABI()) {
3215 SDValue GA =
3216 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3217 return getTOCEntry(DAG, SDLoc(CP), GA);
3218 }
3219
3220 SDValue CPIHi =
3221 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3222 SDValue CPILo =
3223 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3224 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3225}
3226
3227// For 64-bit PowerPC, prefer the more compact relative encodings.
3228// This trades 32 bits per jump table entry for one or two instructions
3229// on the jump site.
3231 if (isJumpTableRelative())
3233
3235}
3236
3239 return false;
3240 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3241 return true;
3243}
3244
3246 SelectionDAG &DAG) const {
3247 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3249
3250 switch (getTargetMachine().getCodeModel()) {
3251 case CodeModel::Small:
3252 case CodeModel::Medium:
3254 default:
3255 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3257 }
3258}
3259
3260const MCExpr *
3262 unsigned JTI,
3263 MCContext &Ctx) const {
3264 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3266
3267 switch (getTargetMachine().getCodeModel()) {
3268 case CodeModel::Small:
3269 case CodeModel::Medium:
3271 default:
3272 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3273 }
3274}
3275
3276SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3277 EVT PtrVT = Op.getValueType();
3278 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3279
3280 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3281 if (Subtarget.isUsingPCRelativeCalls()) {
3282 SDLoc DL(JT);
3283 EVT Ty = getPointerTy(DAG.getDataLayout());
3284 SDValue GA =
3285 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3286 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3287 return MatAddr;
3288 }
3289
3290 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3291 // The actual address of the GlobalValue is stored in the TOC.
3292 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3293 setUsesTOCBasePtr(DAG);
3294 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3295 return getTOCEntry(DAG, SDLoc(JT), GA);
3296 }
3297
3298 unsigned MOHiFlag, MOLoFlag;
3299 bool IsPIC = isPositionIndependent();
3300 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3301
3302 if (IsPIC && Subtarget.isSVR4ABI()) {
3303 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3305 return getTOCEntry(DAG, SDLoc(GA), GA);
3306 }
3307
3308 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3309 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3310 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3311}
3312
3313SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3314 SelectionDAG &DAG) const {
3315 EVT PtrVT = Op.getValueType();
3316 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3317 const BlockAddress *BA = BASDN->getBlockAddress();
3318
3319 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3320 if (Subtarget.isUsingPCRelativeCalls()) {
3321 SDLoc DL(BASDN);
3322 EVT Ty = getPointerTy(DAG.getDataLayout());
3323 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3325 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3326 return MatAddr;
3327 }
3328
3329 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3330 // The actual BlockAddress is stored in the TOC.
3331 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3332 setUsesTOCBasePtr(DAG);
3333 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3334 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3335 }
3336
3337 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3338 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3339 return getTOCEntry(
3340 DAG, SDLoc(BASDN),
3341 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3342
3343 unsigned MOHiFlag, MOLoFlag;
3344 bool IsPIC = isPositionIndependent();
3345 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3346 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3347 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3348 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3349}
3350
3351SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3352 SelectionDAG &DAG) const {
3353 if (Subtarget.isAIXABI())
3354 return LowerGlobalTLSAddressAIX(Op, DAG);
3355
3356 return LowerGlobalTLSAddressLinux(Op, DAG);
3357}
3358
3359SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3360 SelectionDAG &DAG) const {
3361 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3362
3363 if (DAG.getTarget().useEmulatedTLS())
3364 report_fatal_error("Emulated TLS is not yet supported on AIX");
3365
3366 SDLoc dl(GA);
3367 const GlobalValue *GV = GA->getGlobal();
3368 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3369 bool Is64Bit = Subtarget.isPPC64();
3370 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3372 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3373
3374 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3375 SDValue VariableOffsetTGA =
3376 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3377 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3378 SDValue TLSReg;
3379 if (Is64Bit) {
3380 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3381 // involves a load of the variable offset (from the TOC), followed by an
3382 // add of the loaded variable offset to R13 (the thread pointer).
3383 // This code sequence looks like:
3384 // ld reg1,var[TC](2)
3385 // add reg2, reg1, r13 // r13 contains the thread pointer
3386 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3387
3388 // With the -maix-small-local-exec-tls option, produce a faster access
3389 // sequence for local-exec TLS variables where the offset from the TLS
3390 // base is encoded as an immediate operand.
3391 //
3392 // We only utilize the faster local-exec access sequence when the TLS
3393 // variable has a size within the policy limit. We treat types that are
3394 // not sized or are empty as being over the policy size limit.
3395 if (HasAIXSmallLocalExecTLS && IsTLSLocalExecModel) {
3396 Type *GVType = GV->getValueType();
3397 if (GVType->isSized() && !GVType->isEmptyTy() &&
3398 GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
3400 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3401 }
3402 } else {
3403 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3404 // involves loading the variable offset from the TOC, generating a call to
3405 // .__get_tpointer to get the thread pointer (which will be in R3), and
3406 // adding the two together:
3407 // lwz reg1,var[TC](2)
3408 // bla .__get_tpointer
3409 // add reg2, reg1, r3
3410 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3411
3412 // We do not implement the 32-bit version of the faster access sequence
3413 // for local-exec that is controlled by -maix-small-local-exec-tls.
3414 if (HasAIXSmallLocalExecTLS)
3415 report_fatal_error("The small-local-exec TLS access sequence is "
3416 "currently only supported on AIX (64-bit mode).");
3417 }
3418 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3419 }
3420
3421 if (Model == TLSModel::LocalDynamic) {
3422 // For local-dynamic on AIX, we need to generate one TOC entry for each
3423 // variable offset, and a single module-handle TOC entry for the entire
3424 // file.
3425
3426 SDValue VariableOffsetTGA =
3427 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3428 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3429
3431 GlobalVariable *TLSGV =
3432 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3433 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3435 assert(TLSGV && "Not able to create GV for _$TLSML.");
3436 SDValue ModuleHandleTGA =
3437 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3438 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3439 SDValue ModuleHandle =
3440 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3441
3442 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3443 }
3444
3445 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3446 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3447 // need to generate two TOC entries, one for the variable offset, one for the
3448 // region handle. The global address for the TOC entry of the region handle is
3449 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3450 // entry of the variable offset is created with MO_TLSGD_FLAG.
3451 SDValue VariableOffsetTGA =
3452 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3453 SDValue RegionHandleTGA =
3454 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3455 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3456 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3457 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3458 RegionHandle);
3459}
3460
3461SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3462 SelectionDAG &DAG) const {
3463 // FIXME: TLS addresses currently use medium model code sequences,
3464 // which is the most useful form. Eventually support for small and
3465 // large models could be added if users need it, at the cost of
3466 // additional complexity.
3467 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3468 if (DAG.getTarget().useEmulatedTLS())
3469 return LowerToTLSEmulatedModel(GA, DAG);
3470
3471 SDLoc dl(GA);
3472 const GlobalValue *GV = GA->getGlobal();
3473 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3474 bool is64bit = Subtarget.isPPC64();
3475 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3476 PICLevel::Level picLevel = M->getPICLevel();
3477
3479 TLSModel::Model Model = TM.getTLSModel(GV);
3480
3481 if (Model == TLSModel::LocalExec) {
3482 if (Subtarget.isUsingPCRelativeCalls()) {
3483 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3484 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3486 SDValue MatAddr =
3487 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3488 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3489 }
3490
3491 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3493 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3495 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3496 : DAG.getRegister(PPC::R2, MVT::i32);
3497
3498 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3499 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3500 }
3501
3502 if (Model == TLSModel::InitialExec) {
3503 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3505 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3506 SDValue TGATLS = DAG.getTargetGlobalAddress(
3507 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3508 SDValue TPOffset;
3509 if (IsPCRel) {
3510 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3511 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3513 } else {
3514 SDValue GOTPtr;
3515 if (is64bit) {
3516 setUsesTOCBasePtr(DAG);
3517 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3518 GOTPtr =
3519 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3520 } else {
3521 if (!TM.isPositionIndependent())
3522 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3523 else if (picLevel == PICLevel::SmallPIC)
3524 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3525 else
3526 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3527 }
3528 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3529 }
3530 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3531 }
3532
3533 if (Model == TLSModel::GeneralDynamic) {
3534 if (Subtarget.isUsingPCRelativeCalls()) {
3535 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3537 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3538 }
3539
3540 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3541 SDValue GOTPtr;
3542 if (is64bit) {
3543 setUsesTOCBasePtr(DAG);
3544 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3545 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3546 GOTReg, TGA);
3547 } else {
3548 if (picLevel == PICLevel::SmallPIC)
3549 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3550 else
3551 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3552 }
3553 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3554 GOTPtr, TGA, TGA);
3555 }
3556
3557 if (Model == TLSModel::LocalDynamic) {
3558 if (Subtarget.isUsingPCRelativeCalls()) {
3559 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3561 SDValue MatPCRel =
3562 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3563 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3564 }
3565
3566 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3567 SDValue GOTPtr;
3568 if (is64bit) {
3569 setUsesTOCBasePtr(DAG);
3570 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3571 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3572 GOTReg, TGA);
3573 } else {
3574 if (picLevel == PICLevel::SmallPIC)
3575 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3576 else
3577 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3578 }
3579 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3580 PtrVT, GOTPtr, TGA, TGA);
3581 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3582 PtrVT, TLSAddr, TGA);
3583 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3584 }
3585
3586 llvm_unreachable("Unknown TLS model!");
3587}
3588
3589SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3590 SelectionDAG &DAG) const {
3591 EVT PtrVT = Op.getValueType();
3592 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3593 SDLoc DL(GSDN);
3594 const GlobalValue *GV = GSDN->getGlobal();
3595
3596 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3597 // The actual address of the GlobalValue is stored in the TOC.
3598 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3599 if (Subtarget.isUsingPCRelativeCalls()) {
3600 EVT Ty = getPointerTy(DAG.getDataLayout());
3602 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3604 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3605 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3607 return Load;
3608 } else {
3609 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3611 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3612 }
3613 }
3614 setUsesTOCBasePtr(DAG);
3615 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3616 return getTOCEntry(DAG, DL, GA);
3617 }
3618
3619 unsigned MOHiFlag, MOLoFlag;
3620 bool IsPIC = isPositionIndependent();
3621 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3622
3623 if (IsPIC && Subtarget.isSVR4ABI()) {
3624 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3625 GSDN->getOffset(),
3627 return getTOCEntry(DAG, DL, GA);
3628 }
3629
3630 SDValue GAHi =
3631 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3632 SDValue GALo =
3633 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3634
3635 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3636}
3637
3638SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3639 bool IsStrict = Op->isStrictFPOpcode();
3641 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3642 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3643 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3644 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3645 EVT LHSVT = LHS.getValueType();
3646 SDLoc dl(Op);
3647
3648 // Soften the setcc with libcall if it is fp128.
3649 if (LHSVT == MVT::f128) {
3650 assert(!Subtarget.hasP9Vector() &&
3651 "SETCC for f128 is already legal under Power9!");
3652 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3653 Op->getOpcode() == ISD::STRICT_FSETCCS);
3654 if (RHS.getNode())
3655 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3656 DAG.getCondCode(CC));
3657 if (IsStrict)
3658 return DAG.getMergeValues({LHS, Chain}, dl);
3659 return LHS;
3660 }
3661
3662 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3663
3664 if (Op.getValueType() == MVT::v2i64) {
3665 // When the operands themselves are v2i64 values, we need to do something
3666 // special because VSX has no underlying comparison operations for these.
3667 if (LHS.getValueType() == MVT::v2i64) {
3668 // Equality can be handled by casting to the legal type for Altivec
3669 // comparisons, everything else needs to be expanded.
3670 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3671 return SDValue();
3672 SDValue SetCC32 = DAG.getSetCC(
3673 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3674 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3675 int ShuffV[] = {1, 0, 3, 2};
3676 SDValue Shuff =
3677 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3678 return DAG.getBitcast(MVT::v2i64,
3680 dl, MVT::v4i32, Shuff, SetCC32));
3681 }
3682
3683 // We handle most of these in the usual way.
3684 return Op;
3685 }
3686
3687 // If we're comparing for equality to zero, expose the fact that this is
3688 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3689 // fold the new nodes.
3690 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3691 return V;
3692
3693 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3694 // Leave comparisons against 0 and -1 alone for now, since they're usually
3695 // optimized. FIXME: revisit this when we can custom lower all setcc
3696 // optimizations.
3697 if (C->isAllOnes() || C->isZero())
3698 return SDValue();
3699 }
3700
3701 // If we have an integer seteq/setne, turn it into a compare against zero
3702 // by xor'ing the rhs with the lhs, which is faster than setting a
3703 // condition register, reading it back out, and masking the correct bit. The
3704 // normal approach here uses sub to do this instead of xor. Using xor exposes
3705 // the result to other bit-twiddling opportunities.
3706 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3707 EVT VT = Op.getValueType();
3708 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3709 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3710 }
3711 return SDValue();
3712}
3713
3714SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3715 SDNode *Node = Op.getNode();
3716 EVT VT = Node->getValueType(0);
3717 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3718 SDValue InChain = Node->getOperand(0);
3719 SDValue VAListPtr = Node->getOperand(1);
3720 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3721 SDLoc dl(Node);
3722
3723 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3724
3725 // gpr_index
3726 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3727 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3728 InChain = GprIndex.getValue(1);
3729
3730 if (VT == MVT::i64) {
3731 // Check if GprIndex is even
3732 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3733 DAG.getConstant(1, dl, MVT::i32));
3734 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3735 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3736 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3737 DAG.getConstant(1, dl, MVT::i32));
3738 // Align GprIndex to be even if it isn't
3739 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3740 GprIndex);
3741 }
3742
3743 // fpr index is 1 byte after gpr
3744 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3745 DAG.getConstant(1, dl, MVT::i32));
3746
3747 // fpr
3748 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3749 FprPtr, MachinePointerInfo(SV), MVT::i8);
3750 InChain = FprIndex.getValue(1);
3751
3752 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3753 DAG.getConstant(8, dl, MVT::i32));
3754
3755 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3756 DAG.getConstant(4, dl, MVT::i32));
3757
3758 // areas
3759 SDValue OverflowArea =
3760 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3761 InChain = OverflowArea.getValue(1);
3762
3763 SDValue RegSaveArea =
3764 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3765 InChain = RegSaveArea.getValue(1);
3766
3767 // select overflow_area if index > 8
3768 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3769 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3770
3771 // adjustment constant gpr_index * 4/8
3772 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3773 VT.isInteger() ? GprIndex : FprIndex,
3774 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3775 MVT::i32));
3776
3777 // OurReg = RegSaveArea + RegConstant
3778 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3779 RegConstant);
3780
3781 // Floating types are 32 bytes into RegSaveArea
3782 if (VT.isFloatingPoint())
3783 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3784 DAG.getConstant(32, dl, MVT::i32));
3785
3786 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3787 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3788 VT.isInteger() ? GprIndex : FprIndex,
3789 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3790 MVT::i32));
3791
3792 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3793 VT.isInteger() ? VAListPtr : FprPtr,
3794 MachinePointerInfo(SV), MVT::i8);
3795
3796 // determine if we should load from reg_save_area or overflow_area
3797 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3798
3799 // increase overflow_area by 4/8 if gpr/fpr > 8
3800 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3801 DAG.getConstant(VT.isInteger() ? 4 : 8,
3802 dl, MVT::i32));
3803
3804 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3805 OverflowAreaPlusN);
3806
3807 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3808 MachinePointerInfo(), MVT::i32);
3809
3810 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3811}
3812
3813SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3814 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3815
3816 // We have to copy the entire va_list struct:
3817 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3818 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3819 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3820 false, true, false, MachinePointerInfo(),
3822}
3823
3824SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3825 SelectionDAG &DAG) const {
3826 if (Subtarget.isAIXABI())
3827 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3828
3829 return Op.getOperand(0);
3830}
3831
3832SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3835
3836 assert((Op.getOpcode() == ISD::INLINEASM ||
3837 Op.getOpcode() == ISD::INLINEASM_BR) &&
3838 "Expecting Inline ASM node.");
3839
3840 // If an LR store is already known to be required then there is not point in
3841 // checking this ASM as well.
3842 if (MFI.isLRStoreRequired())
3843 return Op;
3844
3845 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3846 // type MVT::Glue. We want to ignore this last operand if that is the case.
3847 unsigned NumOps = Op.getNumOperands();
3848 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3849 --NumOps;
3850
3851 // Check all operands that may contain the LR.
3852 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3853 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3854 unsigned NumVals = Flags.getNumOperandRegisters();
3855 ++i; // Skip the ID value.
3856
3857 switch (Flags.getKind()) {
3858 default:
3859 llvm_unreachable("Bad flags!");
3863 i += NumVals;
3864 break;
3868 for (; NumVals; --NumVals, ++i) {
3869 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3870 if (Reg != PPC::LR && Reg != PPC::LR8)
3871 continue;
3872 MFI.setLRStoreRequired();
3873 return Op;
3874 }
3875 break;
3876 }
3877 }
3878 }
3879
3880 return Op;
3881}
3882
3883SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3884 SelectionDAG &DAG) const {
3885 if (Subtarget.isAIXABI())
3886 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3887
3888 SDValue Chain = Op.getOperand(0);
3889 SDValue Trmp = Op.getOperand(1); // trampoline
3890 SDValue FPtr = Op.getOperand(2); // nested function
3891 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3892 SDLoc dl(Op);
3893
3894 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3895 bool isPPC64 = (PtrVT == MVT::i64);
3896 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3897
3900
3901 Entry.Ty = IntPtrTy;
3902 Entry.Node = Trmp; Args.push_back(Entry);
3903
3904 // TrampSize == (isPPC64 ? 48 : 40);
3905 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3906 isPPC64 ? MVT::i64 : MVT::i32);
3907 Args.push_back(Entry);
3908
3909 Entry.Node = FPtr; Args.push_back(Entry);
3910 Entry.Node = Nest; Args.push_back(Entry);
3911
3912 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3914 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3916 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3917
3918 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3919 return CallResult.second;
3920}
3921
3922SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3924 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3925 EVT PtrVT = getPointerTy(MF.getDataLayout());
3926
3927 SDLoc dl(Op);
3928
3929 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3930 // vastart just stores the address of the VarArgsFrameIndex slot into the
3931 // memory location argument.
3932 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3933 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3934 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3935 MachinePointerInfo(SV));
3936 }
3937
3938 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3939 // We suppose the given va_list is already allocated.
3940 //
3941 // typedef struct {
3942 // char gpr; /* index into the array of 8 GPRs
3943 // * stored in the register save area
3944 // * gpr=0 corresponds to r3,
3945 // * gpr=1 to r4, etc.
3946 // */
3947 // char fpr; /* index into the array of 8 FPRs
3948 // * stored in the register save area
3949 // * fpr=0 corresponds to f1,
3950 // * fpr=1 to f2, etc.
3951 // */
3952 // char *overflow_arg_area;
3953 // /* location on stack that holds
3954 // * the next overflow argument
3955 // */
3956 // char *reg_save_area;
3957 // /* where r3:r10 and f1:f8 (if saved)
3958 // * are stored
3959 // */
3960 // } va_list[1];
3961
3962 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3963 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3964 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3965 PtrVT);
3966 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3967 PtrVT);
3968
3969 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3970 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3971
3972 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3973 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3974
3975 uint64_t FPROffset = 1;
3976 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3977
3978 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3979
3980 // Store first byte : number of int regs
3981 SDValue firstStore =
3982 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3983 MachinePointerInfo(SV), MVT::i8);
3984 uint64_t nextOffset = FPROffset;
3985 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3986 ConstFPROffset);
3987
3988 // Store second byte : number of float regs
3989 SDValue secondStore =
3990 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3991 MachinePointerInfo(SV, nextOffset), MVT::i8);
3992 nextOffset += StackOffset;
3993 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3994
3995 // Store second word : arguments given on stack
3996 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3997 MachinePointerInfo(SV, nextOffset));
3998 nextOffset += FrameOffset;
3999 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4000
4001 // Store third word : arguments given in registers
4002 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4003 MachinePointerInfo(SV, nextOffset));
4004}
4005
4006/// FPR - The set of FP registers that should be allocated for arguments
4007/// on Darwin and AIX.
4008static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4009 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4010 PPC::F11, PPC::F12, PPC::F13};
4011
4012/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4013/// the stack.
4014static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4015 unsigned PtrByteSize) {
4016 unsigned ArgSize = ArgVT.getStoreSize();
4017 if (Flags.isByVal())
4018 ArgSize = Flags.getByValSize();
4019
4020 // Round up to multiples of the pointer size, except for array members,
4021 // which are always packed.
4022 if (!Flags.isInConsecutiveRegs())
4023 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4024
4025 return ArgSize;
4026}
4027
4028/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4029/// on the stack.
4031 ISD::ArgFlagsTy Flags,
4032 unsigned PtrByteSize) {
4033 Align Alignment(PtrByteSize);
4034
4035 // Altivec parameters are padded to a 16 byte boundary.
4036 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4037 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4038 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4039 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4040 Alignment = Align(16);
4041
4042 // ByVal parameters are aligned as requested.
4043 if (Flags.isByVal()) {
4044 auto BVAlign = Flags.getNonZeroByValAlign();
4045 if (BVAlign > PtrByteSize) {
4046 if (BVAlign.value() % PtrByteSize != 0)
4048 "ByVal alignment is not a multiple of the pointer size");
4049
4050 Alignment = BVAlign;
4051 }
4052 }
4053
4054 // Array members are always packed to their original alignment.
4055 if (Flags.isInConsecutiveRegs()) {
4056 // If the array member was split into multiple registers, the first
4057 // needs to be aligned to the size of the full type. (Except for
4058 // ppcf128, which is only aligned as its f64 components.)
4059 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4060 Alignment = Align(OrigVT.getStoreSize());
4061 else
4062 Alignment = Align(ArgVT.getStoreSize());
4063 }
4064
4065 return Alignment;
4066}
4067
4068/// CalculateStackSlotUsed - Return whether this argument will use its
4069/// stack slot (instead of being passed in registers). ArgOffset,
4070/// AvailableFPRs, and AvailableVRs must hold the current argument
4071/// position, and will be updated to account for this argument.
4072static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4073 unsigned PtrByteSize, unsigned LinkageSize,
4074 unsigned ParamAreaSize, unsigned &ArgOffset,
4075 unsigned &AvailableFPRs,
4076 unsigned &AvailableVRs) {
4077 bool UseMemory = false;
4078
4079 // Respect alignment of argument on the stack.
4080 Align Alignment =
4081 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4082 ArgOffset = alignTo(ArgOffset, Alignment);
4083 // If there's no space left in the argument save area, we must
4084 // use memory (this check also catches zero-sized arguments).
4085 if (ArgOffset >= LinkageSize + ParamAreaSize)
4086 UseMemory = true;
4087
4088 // Allocate argument on the stack.
4089 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4090 if (Flags.isInConsecutiveRegsLast())
4091 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4092 // If we overran the argument save area, we must use memory
4093 // (this check catches arguments passed partially in memory)
4094 if (ArgOffset > LinkageSize + ParamAreaSize)
4095 UseMemory = true;
4096
4097 // However, if the argument is actually passed in an FPR or a VR,
4098 // we don't use memory after all.
4099 if (!Flags.isByVal()) {
4100 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4101 if (AvailableFPRs > 0) {
4102 --AvailableFPRs;
4103 return false;
4104 }
4105 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4106 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4107 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4108 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4109 if (AvailableVRs > 0) {
4110 --AvailableVRs;
4111 return false;
4112 }
4113 }
4114
4115 return UseMemory;
4116}
4117
4118/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4119/// ensure minimum alignment required for target.
4121 unsigned NumBytes) {
4122 return alignTo(NumBytes, Lowering->getStackAlign());
4123}
4124
4125SDValue PPCTargetLowering::LowerFormalArguments(
4126 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4127 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4128 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4129 if (Subtarget.isAIXABI())
4130 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4131 InVals);
4132 if (Subtarget.is64BitELFABI())
4133 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4134 InVals);
4135 assert(Subtarget.is32BitELFABI());
4136 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4137 InVals);
4138}
4139
4140SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4141 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4142 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4143 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4144
4145 // 32-bit SVR4 ABI Stack Frame Layout:
4146 // +-----------------------------------+
4147 // +--> | Back chain |
4148 // | +-----------------------------------+
4149 // | | Floating-point register save area |
4150 // | +-----------------------------------+
4151 // | | General register save area |
4152 // | +-----------------------------------+
4153 // | | CR save word |
4154 // | +-----------------------------------+
4155 // | | VRSAVE save word |
4156 // | +-----------------------------------+
4157 // | | Alignment padding |
4158 // | +-----------------------------------+
4159 // | | Vector register save area |
4160 // | +-----------------------------------+
4161 // | | Local variable space |
4162 // | +-----------------------------------+
4163 // | | Parameter list area |
4164 // | +-----------------------------------+
4165 // | | LR save word |
4166 // | +-----------------------------------+
4167 // SP--> +--- | Back chain |
4168 // +-----------------------------------+
4169 //
4170 // Specifications:
4171 // System V Application Binary Interface PowerPC Processor Supplement
4172 // AltiVec Technology Programming Interface Manual
4173
4175 MachineFrameInfo &MFI = MF.getFrameInfo();
4176 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4177
4178 EVT PtrVT = getPointerTy(MF.getDataLayout());
4179 // Potential tail calls could cause overwriting of argument stack slots.
4180 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4181 (CallConv == CallingConv::Fast));
4182 const Align PtrAlign(4);
4183
4184 // Assign locations to all of the incoming arguments.
4186 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4187 *DAG.getContext());
4188
4189 // Reserve space for the linkage area on the stack.
4190 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4191 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4192 if (useSoftFloat())
4193 CCInfo.PreAnalyzeFormalArguments(Ins);
4194
4195 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4196 CCInfo.clearWasPPCF128();
4197
4198 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4199 CCValAssign &VA = ArgLocs[i];
4200
4201 // Arguments stored in registers.
4202 if (VA.isRegLoc()) {
4203 const TargetRegisterClass *RC;
4204 EVT ValVT = VA.getValVT();
4205
4206 switch (ValVT.getSimpleVT().SimpleTy) {
4207 default:
4208 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4209 case MVT::i1:
4210 case MVT::i32:
4211 RC = &PPC::GPRCRegClass;
4212 break;
4213 case MVT::f32:
4214 if (Subtarget.hasP8Vector())
4215 RC = &PPC::VSSRCRegClass;
4216 else if (Subtarget.hasSPE())
4217 RC = &PPC::GPRCRegClass;
4218 else
4219 RC = &PPC::F4RCRegClass;
4220 break;
4221 case MVT::f64:
4222 if (Subtarget.hasVSX())
4223 RC = &PPC::VSFRCRegClass;
4224 else if (Subtarget.hasSPE())
4225 // SPE passes doubles in GPR pairs.
4226 RC = &PPC::GPRCRegClass;
4227 else
4228 RC = &PPC::F8RCRegClass;
4229 break;
4230 case MVT::v16i8:
4231 case MVT::v8i16:
4232 case MVT::v4i32:
4233 RC = &PPC::VRRCRegClass;
4234 break;
4235 case MVT::v4f32:
4236 RC = &PPC::VRRCRegClass;
4237 break;
4238 case MVT::v2f64:
4239 case MVT::v2i64:
4240 RC = &PPC::VRRCRegClass;
4241 break;
4242 }
4243
4244 SDValue ArgValue;
4245 // Transform the arguments stored in physical registers into
4246 // virtual ones.
4247 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4248 assert(i + 1 < e && "No second half of double precision argument");
4249 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4250 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4251 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4252 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4253 if (!Subtarget.isLittleEndian())
4254 std::swap (ArgValueLo, ArgValueHi);
4255 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4256 ArgValueHi);
4257 } else {
4258 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4259 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4260 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4261 if (ValVT == MVT::i1)
4262 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4263 }
4264
4265 InVals.push_back(ArgValue);
4266 } else {
4267 // Argument stored in memory.
4268 assert(VA.isMemLoc());
4269
4270 // Get the extended size of the argument type in stack
4271 unsigned ArgSize = VA.getLocVT().getStoreSize();
4272 // Get the actual size of the argument type
4273 unsigned ObjSize = VA.getValVT().getStoreSize();
4274 unsigned ArgOffset = VA.getLocMemOffset();
4275 // Stack objects in PPC32 are right justified.
4276 ArgOffset += ArgSize - ObjSize;
4277 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4278
4279 // Create load nodes to retrieve arguments from the stack.
4280 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4281 InVals.push_back(
4282 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4283 }
4284 }
4285
4286 // Assign locations to all of the incoming aggregate by value arguments.
4287 // Aggregates passed by value are stored in the local variable space of the
4288 // caller's stack frame, right above the parameter list area.
4289 SmallVector<CCValAssign, 16> ByValArgLocs;
4290 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4291 ByValArgLocs, *DAG.getContext());
4292
4293 // Reserve stack space for the allocations in CCInfo.
4294 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4295
4296 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4297
4298 // Area that is at least reserved in the caller of this function.
4299 unsigned MinReservedArea = CCByValInfo.getStackSize();
4300 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4301
4302 // Set the size that is at least reserved in caller of this function. Tail
4303 // call optimized function's reserved stack space needs to be aligned so that
4304 // taking the difference between two stack areas will result in an aligned
4305 // stack.
4306 MinReservedArea =
4307 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4308 FuncInfo->setMinReservedArea(MinReservedArea);
4309
4311
4312 // If the function takes variable number of arguments, make a frame index for
4313 // the start of the first vararg value... for expansion of llvm.va_start.
4314 if (isVarArg) {
4315 static const MCPhysReg GPArgRegs[] = {
4316 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4317 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4318 };
4319 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4320
4321 static const MCPhysReg FPArgRegs[] = {
4322 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4323 PPC::F8
4324 };
4325 unsigned NumFPArgRegs = std::size(FPArgRegs);
4326
4327 if (useSoftFloat() || hasSPE())
4328 NumFPArgRegs = 0;
4329
4330 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4331 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4332
4333 // Make room for NumGPArgRegs and NumFPArgRegs.
4334 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4335 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4336
4338 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4339
4340 FuncInfo->setVarArgsFrameIndex(
4341 MFI.CreateStackObject(Depth, Align(8), false));
4342 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4343
4344 // The fixed integer arguments of a variadic function are stored to the
4345 // VarArgsFrameIndex on the stack so that they may be loaded by
4346 // dereferencing the result of va_next.
4347 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4348 // Get an existing live-in vreg, or add a new one.
4349 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4350 if (!VReg)
4351 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4352
4353 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4354 SDValue Store =
4355 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4356 MemOps.push_back(Store);
4357 // Increment the address by four for the next argument to store
4358 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4359 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4360 }
4361
4362 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4363 // is set.
4364 // The double arguments are stored to the VarArgsFrameIndex
4365 // on the stack.
4366 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4367 // Get an existing live-in vreg, or add a new one.
4368 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4369 if (!VReg)
4370 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4371
4372 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4373 SDValue Store =
4374 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4375 MemOps.push_back(Store);
4376 // Increment the address by eight for the next argument to store
4377 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4378 PtrVT);
4379 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4380 }
4381 }
4382
4383 if (!MemOps.empty())
4384 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4385
4386 return Chain;
4387}
4388
4389// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4390// value to MVT::i64 and then truncate to the correct register size.
4391SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4392 EVT ObjectVT, SelectionDAG &DAG,
4393 SDValue ArgVal,
4394 const SDLoc &dl) const {
4395 if (Flags.isSExt())
4396 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4397 DAG.getValueType(ObjectVT));
4398 else if (Flags.isZExt())
4399 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4400 DAG.getValueType(ObjectVT));
4401
4402 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4403}
4404
4405SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4406 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4407 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4408 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4409 // TODO: add description of PPC stack frame format, or at least some docs.
4410 //
4411 bool isELFv2ABI = Subtarget.isELFv2ABI();
4412 bool isLittleEndian = Subtarget.isLittleEndian();
4414 MachineFrameInfo &MFI = MF.getFrameInfo();
4415 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4416
4417 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4418 "fastcc not supported on varargs functions");
4419
4420 EVT PtrVT = getPointerTy(MF.getDataLayout());
4421 // Potential tail calls could cause overwriting of argument stack slots.
4422 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4423 (CallConv == CallingConv::Fast));
4424 unsigned PtrByteSize = 8;
4425 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4426
4427 static const MCPhysReg GPR[] = {
4428 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4429 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4430 };
4431 static const MCPhysReg VR[] = {
4432 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4433 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4434 };
4435
4436 const unsigned Num_GPR_Regs = std::size(GPR);
4437 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4438 const unsigned Num_VR_Regs = std::size(VR);
4439
4440 // Do a first pass over the arguments to determine whether the ABI
4441 // guarantees that our caller has allocated the parameter save area
4442 // on its stack frame. In the ELFv1 ABI, this is always the case;
4443 // in the ELFv2 ABI, it is true if this is a vararg function or if
4444 // any parameter is located in a stack slot.
4445
4446 bool HasParameterArea = !isELFv2ABI || isVarArg;
4447 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4448 unsigned NumBytes = LinkageSize;
4449 unsigned AvailableFPRs = Num_FPR_Regs;
4450 unsigned AvailableVRs = Num_VR_Regs;
4451 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4452 if (Ins[i].Flags.isNest())
4453 continue;
4454
4455 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4456 PtrByteSize, LinkageSize, ParamAreaSize,
4457 NumBytes, AvailableFPRs, AvailableVRs))
4458 HasParameterArea = true;
4459 }
4460
4461 // Add DAG nodes to load the arguments or copy them out of registers. On
4462 // entry to a function on PPC, the arguments start after the linkage area,
4463 // although the first ones are often in registers.
4464
4465 unsigned ArgOffset = LinkageSize;
4466 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4469 unsigned CurArgIdx = 0;
4470 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4471 SDValue ArgVal;
4472 bool needsLoad = false;
4473 EVT ObjectVT = Ins[ArgNo].VT;
4474 EVT OrigVT = Ins[ArgNo].ArgVT;
4475 unsigned ObjSize = ObjectVT.getStoreSize();
4476 unsigned ArgSize = ObjSize;
4477 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4478 if (Ins[ArgNo].isOrigArg()) {
4479 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4480 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4481 }
4482 // We re-align the argument offset for each argument, except when using the
4483 // fast calling convention, when we need to make sure we do that only when
4484 // we'll actually use a stack slot.
4485 unsigned CurArgOffset;
4486 Align Alignment;
4487 auto ComputeArgOffset = [&]() {
4488 /* Respect alignment of argument on the stack. */
4489 Alignment =
4490 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4491 ArgOffset = alignTo(ArgOffset, Alignment);
4492 CurArgOffset = ArgOffset;
4493 };
4494
4495 if (CallConv != CallingConv::Fast) {
4496 ComputeArgOffset();
4497
4498 /* Compute GPR index associated with argument offset. */
4499 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4500 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4501 }
4502
4503 // FIXME the codegen can be much improved in some cases.
4504 // We do not have to keep everything in memory.
4505 if (Flags.isByVal()) {
4506 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4507
4508 if (CallConv == CallingConv::Fast)
4509 ComputeArgOffset();
4510
4511 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4512 ObjSize = Flags.getByValSize();
4513 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4514 // Empty aggregate parameters do not take up registers. Examples:
4515 // struct { } a;
4516 // union { } b;
4517 // int c[0];
4518 // etc. However, we have to provide a place-holder in InVals, so
4519 // pretend we have an 8-byte item at the current address for that
4520 // purpose.
4521 if (!ObjSize) {
4522 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4523 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4524 InVals.push_back(FIN);
4525 continue;
4526 }
4527
4528 // Create a stack object covering all stack doublewords occupied
4529 // by the argument. If the argument is (fully or partially) on
4530 // the stack, or if the argument is fully in registers but the
4531 // caller has allocated the parameter save anyway, we can refer
4532 // directly to the caller's stack frame. Otherwise, create a
4533 // local copy in our own frame.
4534 int FI;
4535 if (HasParameterArea ||
4536 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4537 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4538 else
4539 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4540 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4541
4542 // Handle aggregates smaller than 8 bytes.
4543 if (ObjSize < PtrByteSize) {
4544 // The value of the object is its address, which differs from the
4545 // address of the enclosing doubleword on big-endian systems.
4546 SDValue Arg = FIN;
4547 if (!isLittleEndian) {
4548 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4549 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4550 }
4551 InVals.push_back(Arg);
4552
4553 if (GPR_idx != Num_GPR_Regs) {
4554 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4555 FuncInfo->addLiveInAttr(VReg, Flags);
4556 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4557 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4558 SDValue Store =
4559 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4560 MachinePointerInfo(&*FuncArg), ObjType);
4561 MemOps.push_back(Store);
4562 }
4563 // Whether we copied from a register or not, advance the offset
4564 // into the parameter save area by a full doubleword.
4565 ArgOffset += PtrByteSize;
4566 continue;
4567 }
4568
4569 // The value of the object is its address, which is the address of
4570 // its first stack doubleword.
4571 InVals.push_back(FIN);
4572
4573 // Store whatever pieces of the object are in registers to memory.
4574 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4575 if (GPR_idx == Num_GPR_Regs)
4576 break;
4577
4578 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4579 FuncInfo->addLiveInAttr(VReg, Flags);
4580 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4581 SDValue Addr = FIN;
4582 if (j) {
4583 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4584 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4585 }
4586 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4587 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4588 SDValue Store =
4589 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4590 MachinePointerInfo(&*FuncArg, j), ObjType);
4591 MemOps.push_back(Store);
4592 ++GPR_idx;
4593 }
4594 ArgOffset += ArgSize;
4595 continue;
4596 }
4597
4598 switch (ObjectVT.getSimpleVT().SimpleTy) {
4599 default: llvm_unreachable("Unhandled argument type!");
4600 case MVT::i1:
4601 case MVT::i32:
4602 case MVT::i64:
4603 if (Flags.isNest()) {
4604 // The 'nest' parameter, if any, is passed in R11.
4605 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4606 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4607
4608 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4609 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4610
4611 break;
4612 }
4613
4614 // These can be scalar arguments or elements of an integer array type
4615 // passed directly. Clang may use those instead of "byval" aggregate
4616 // types to avoid forcing arguments to memory unnecessarily.
4617 if (GPR_idx != Num_GPR_Regs) {
4618 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4619 FuncInfo->addLiveInAttr(VReg, Flags);
4620 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4621
4622 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4623 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4624 // value to MVT::i64 and then truncate to the correct register size.
4625 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4626 } else {
4627 if (CallConv == CallingConv::Fast)
4628 ComputeArgOffset();
4629
4630 needsLoad = true;
4631 ArgSize = PtrByteSize;
4632 }
4633 if (CallConv != CallingConv::Fast || needsLoad)
4634 ArgOffset += 8;
4635 break;
4636
4637 case MVT::f32:
4638 case MVT::f64:
4639 // These can be scalar arguments or elements of a float array type
4640 // passed directly. The latter are used to implement ELFv2 homogenous
4641 // float aggregates.
4642 if (FPR_idx != Num_FPR_Regs) {
4643 unsigned VReg;
4644
4645 if (ObjectVT == MVT::f32)
4646 VReg = MF.addLiveIn(FPR[FPR_idx],
4647 Subtarget.hasP8Vector()
4648 ? &PPC::VSSRCRegClass
4649 : &PPC::F4RCRegClass);
4650 else
4651 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4652 ? &PPC::VSFRCRegClass
4653 : &PPC::F8RCRegClass);
4654
4655 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4656 ++FPR_idx;
4657 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4658 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4659 // once we support fp <-> gpr moves.
4660
4661 // This can only ever happen in the presence of f32 array types,
4662 // since otherwise we never run out of FPRs before running out
4663 // of GPRs.
4664 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4665 FuncInfo->addLiveInAttr(VReg, Flags);
4666 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4667
4668 if (ObjectVT == MVT::f32) {
4669 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4670 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4671 DAG.getConstant(32, dl, MVT::i32));
4672 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4673 }
4674
4675 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4676 } else {
4677 if (CallConv == CallingConv::Fast)
4678 ComputeArgOffset();
4679
4680 needsLoad = true;
4681 }
4682
4683 // When passing an array of floats, the array occupies consecutive
4684 // space in the argument area; only round up to the next doubleword
4685 // at the end of the array. Otherwise, each float takes 8 bytes.
4686 if (CallConv != CallingConv::Fast || needsLoad) {
4687 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4688 ArgOffset += ArgSize;
4689 if (Flags.isInConsecutiveRegsLast())
4690 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4691 }
4692 break;
4693 case MVT::v4f32:
4694 case MVT::v4i32:
4695 case MVT::v8i16:
4696 case MVT::v16i8:
4697 case MVT::v2f64:
4698 case MVT::v2i64:
4699 case MVT::v1i128:
4700 case MVT::f128:
4701 // These can be scalar arguments or elements of a vector array type
4702 // passed directly. The latter are used to implement ELFv2 homogenous
4703 // vector aggregates.
4704 if (VR_idx != Num_VR_Regs) {
4705 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4706 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4707 ++VR_idx;
4708 } else {
4709 if (CallConv == CallingConv::Fast)
4710 ComputeArgOffset();
4711 needsLoad = true;
4712 }
4713 if (CallConv != CallingConv::Fast || needsLoad)
4714 ArgOffset += 16;
4715 break;
4716 }
4717
4718 // We need to load the argument to a virtual register if we determined
4719 // above that we ran out of physical registers of the appropriate type.
4720 if (needsLoad) {
4721 if (ObjSize < ArgSize && !isLittleEndian)
4722 CurArgOffset += ArgSize - ObjSize;
4723 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4724 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4725 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4726 }
4727
4728 InVals.push_back(ArgVal);
4729 }
4730
4731 // Area that is at least reserved in the caller of this function.
4732 unsigned MinReservedArea;
4733 if (HasParameterArea)
4734 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4735 else
4736 MinReservedArea = LinkageSize;
4737
4738 // Set the size that is at least reserved in caller of this function. Tail
4739 // call optimized functions' reserved stack space needs to be aligned so that
4740 // taking the difference between two stack areas will result in an aligned
4741 // stack.
4742 MinReservedArea =
4743 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4744 FuncInfo->setMinReservedArea(MinReservedArea);
4745
4746 // If the function takes variable number of arguments, make a frame index for
4747 // the start of the first vararg value... for expansion of llvm.va_start.
4748 // On ELFv2ABI spec, it writes:
4749 // C programs that are intended to be *portable* across different compilers
4750 // and architectures must use the header file <stdarg.h> to deal with variable
4751 // argument lists.
4752 if (isVarArg && MFI.hasVAStart()) {
4753 int Depth = ArgOffset;
4754
4755 FuncInfo->setVarArgsFrameIndex(
4756 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4757 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4758
4759 // If this function is vararg, store any remaining integer argument regs
4760 // to their spots on the stack so that they may be loaded by dereferencing
4761 // the result of va_next.
4762 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4763 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4764 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4765 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4766 SDValue Store =
4767 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4768 MemOps.push_back(Store);
4769 // Increment the address by four for the next argument to store
4770 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4771 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4772 }
4773 }
4774
4775 if (!MemOps.empty())
4776 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4777
4778 return Chain;
4779}
4780
4781/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4782/// adjusted to accommodate the arguments for the tailcall.
4783static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4784 unsigned ParamSize) {
4785
4786 if (!isTailCall) return 0;
4787
4789 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4790 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4791 // Remember only if the new adjustment is bigger.
4792 if (SPDiff < FI->getTailCallSPDelta())
4793 FI->setTailCallSPDelta(SPDiff);
4794
4795 return SPDiff;
4796}
4797
4798static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4799
4800static bool callsShareTOCBase(const Function *Caller,
4801 const GlobalValue *CalleeGV,
4802 const TargetMachine &TM) {
4803 // It does not make sense to call callsShareTOCBase() with a caller that
4804 // is PC Relative since PC Relative callers do not have a TOC.
4805#ifndef NDEBUG
4806 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4807 assert(!STICaller->isUsingPCRelativeCalls() &&
4808 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4809#endif
4810
4811 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4812 // don't have enough information to determine if the caller and callee share
4813 // the same TOC base, so we have to pessimistically assume they don't for
4814 // correctness.
4815 if (!CalleeGV)
4816 return false;
4817
4818 // If the callee is preemptable, then the static linker will use a plt-stub
4819 // which saves the toc to the stack, and needs a nop after the call
4820 // instruction to convert to a toc-restore.
4821 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4822 return false;
4823
4824 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4825 // We may need a TOC restore in the situation where the caller requires a
4826 // valid TOC but the callee is PC Relative and does not.
4827 const Function *F = dyn_cast<Function>(CalleeGV);
4828 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4829
4830 // If we have an Alias we can try to get the function from there.
4831 if (Alias) {
4832 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4833 F = dyn_cast<Function>(GlobalObj);
4834 }
4835
4836 // If we still have no valid function pointer we do not have enough
4837 // information to determine if the callee uses PC Relative calls so we must
4838 // assume that it does.
4839 if (!F)
4840 return false;
4841
4842 // If the callee uses PC Relative we cannot guarantee that the callee won't
4843 // clobber the TOC of the caller and so we must assume that the two
4844 // functions do not share a TOC base.
4845 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4846 if (STICallee->isUsingPCRelativeCalls())
4847 return false;
4848
4849 // If the GV is not a strong definition then we need to assume it can be
4850 // replaced by another function at link time. The function that replaces
4851 // it may not share the same TOC as the caller since the callee may be
4852 // replaced by a PC Relative version of the same function.
4853 if (!CalleeGV->isStrongDefinitionForLinker())
4854 return false;
4855
4856 // The medium and large code models are expected to provide a sufficiently
4857 // large TOC to provide all data addressing needs of a module with a
4858 // single TOC.
4859 if (CodeModel::Medium == TM.getCodeModel() ||
4860 CodeModel::Large == TM.getCodeModel())
4861 return true;
4862
4863 // Any explicitly-specified sections and section prefixes must also match.
4864 // Also, if we're using -ffunction-sections, then each function is always in
4865 // a different section (the same is true for COMDAT functions).
4866 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4867 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4868 return false;
4869 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4870 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4871 return false;
4872 }
4873
4874 return true;
4875}
4876
4877static bool
4879 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4880 assert(Subtarget.is64BitELFABI());
4881
4882 const unsigned PtrByteSize = 8;
4883 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4884
4885 static const MCPhysReg GPR[] = {
4886 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4887 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4888 };
4889 static const MCPhysReg VR[] = {
4890 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4891 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4892 };
4893
4894 const unsigned NumGPRs = std::size(GPR);
4895 const unsigned NumFPRs = 13;
4896 const unsigned NumVRs = std::size(VR);
4897 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4898
4899 unsigned NumBytes = LinkageSize;
4900 unsigned AvailableFPRs = NumFPRs;
4901 unsigned AvailableVRs = NumVRs;
4902
4903 for (const ISD::OutputArg& Param : Outs) {
4904 if (Param.Flags.isNest()) continue;
4905
4906 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4907 LinkageSize, ParamAreaSize, NumBytes,
4908 AvailableFPRs, AvailableVRs))
4909 return true;
4910 }
4911 return false;
4912}
4913
4914static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4915 if (CB.arg_size() != CallerFn->arg_size())
4916 return false;
4917
4918 auto CalleeArgIter = CB.arg_begin();
4919 auto CalleeArgEnd = CB.arg_end();
4920 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4921
4922 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4923 const Value* CalleeArg = *CalleeArgIter;
4924 const Value* CallerArg = &(*CallerArgIter);
4925 if (CalleeArg == CallerArg)
4926 continue;
4927
4928 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4929 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4930 // }
4931 // 1st argument of callee is undef and has the same type as caller.
4932 if (CalleeArg->getType() == CallerArg->getType() &&
4933 isa<UndefValue>(CalleeArg))
4934 continue;
4935
4936 return false;
4937 }
4938
4939 return true;
4940}
4941
4942// Returns true if TCO is possible between the callers and callees
4943// calling conventions.
4944static bool
4946 CallingConv::ID CalleeCC) {
4947 // Tail calls are possible with fastcc and ccc.
4948 auto isTailCallableCC = [] (CallingConv::ID CC){
4949 return CC == CallingConv::C || CC == CallingConv::Fast;
4950 };
4951 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4952 return false;
4953
4954 // We can safely tail call both fastcc and ccc callees from a c calling
4955 // convention caller. If the caller is fastcc, we may have less stack space
4956 // than a non-fastcc caller with the same signature so disable tail-calls in
4957 // that case.
4958 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4959}
4960
4961bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4962 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4963 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4965 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4966 bool isCalleeExternalSymbol) const {
4967 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4968
4969 if (DisableSCO && !TailCallOpt) return false;
4970
4971 // Variadic argument functions are not supported.
4972 if (isVarArg) return false;
4973
4974 // Check that the calling conventions are compatible for tco.
4975 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4976 return false;
4977
4978 // Caller contains any byval parameter is not supported.
4979 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4980 return false;
4981
4982 // Callee contains any byval parameter is not supported, too.
4983 // Note: This is a quick work around, because in some cases, e.g.
4984 // caller's stack size > callee's stack size, we are still able to apply
4985 // sibling call optimization. For example, gcc is able to do SCO for caller1
4986 // in the following example, but not for caller2.
4987 // struct test {
4988 // long int a;
4989 // char ary[56];
4990 // } gTest;
4991 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4992 // b->a = v.a;
4993 // return 0;
4994 // }
4995 // void caller1(struct test a, struct test c, struct test *b) {
4996 // callee(gTest, b); }
4997 // void caller2(struct test *b) { callee(gTest, b); }
4998 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4999 return false;
5000
5001 // If callee and caller use different calling conventions, we cannot pass
5002 // parameters on stack since offsets for the parameter area may be different.
5003 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5004 return false;
5005
5006 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5007 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5008 // callee potentially have different TOC bases then we cannot tail call since
5009 // we need to restore the TOC pointer after the call.
5010 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5011 // We cannot guarantee this for indirect calls or calls to external functions.
5012 // When PC-Relative addressing is used, the concept of the TOC is no longer
5013 // applicable so this check is not required.
5014 // Check first for indirect calls.
5015 if (!Subtarget.isUsingPCRelativeCalls() &&
5016 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5017 return false;
5018
5019 // Check if we share the TOC base.
5020 if (!Subtarget.isUsingPCRelativeCalls() &&
5021 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5022 return false;
5023
5024 // TCO allows altering callee ABI, so we don't have to check further.
5025 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5026 return true;
5027
5028 if (DisableSCO) return false;
5029
5030 // If callee use the same argument list that caller is using, then we can
5031 // apply SCO on this case. If it is not, then we need to check if callee needs
5032 // stack for passing arguments.
5033 // PC Relative tail calls may not have a CallBase.
5034 // If there is no CallBase we cannot verify if we have the same argument
5035 // list so assume that we don't have the same argument list.
5036 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5037 needStackSlotPassParameters(Subtarget, Outs))
5038 return false;
5039 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5040 return false;
5041
5042 return true;
5043}
5044
5045/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5046/// for tail call optimization. Targets which want to do tail call
5047/// optimization should implement this function.
5048bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5049 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5050 CallingConv::ID CallerCC, bool isVarArg,
5051 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5052 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5053 return false;
5054
5055 // Variable argument functions are not supported.
5056 if (isVarArg)
5057 return false;
5058
5059 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5060 // Functions containing by val parameters are not supported.
5061 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5062 return false;
5063
5064 // Non-PIC/GOT tail calls are supported.
5065 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5066 return true;
5067
5068 // At the moment we can only do local tail calls (in same module, hidden
5069 // or protected) if we are generating PIC.
5070 if (CalleeGV)
5071 return CalleeGV->hasHiddenVisibility() ||
5072 CalleeGV->hasProtectedVisibility();
5073 }
5074
5075 return false;
5076}
5077
5078/// isCallCompatibleAddress - Return the immediate to use if the specified
5079/// 32-bit value is representable in the immediate field of a BxA instruction.
5081 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5082 if (!C) return nullptr;
5083
5084 int Addr = C->getZExtValue();
5085 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5086 SignExtend32<26>(Addr) != Addr)
5087 return nullptr; // Top 6 bits have to be sext of immediate.
5088
5089 return DAG
5090 .getConstant(
5091 (int)C->getZExtValue() >> 2, SDLoc(Op),
5093 .getNode();
5094}
5095
5096namespace {
5097
5098struct TailCallArgumentInfo {
5099 SDValue Arg;
5100 SDValue FrameIdxOp;
5101 int FrameIdx = 0;
5102
5103 TailCallArgumentInfo() = default;
5104};
5105
5106} // end anonymous namespace
5107
5108/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5110 SelectionDAG &DAG, SDValue Chain,
5111 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5112 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5113 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5114 SDValue Arg = TailCallArgs[i].Arg;
5115 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5116 int FI = TailCallArgs[i].FrameIdx;
5117 // Store relative to framepointer.
5118 MemOpChains.push_back(DAG.getStore(
5119 Chain, dl, Arg, FIN,
5121 }
5122}
5123
5124/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5125/// the appropriate stack slot for the tail call optimized function call.
5127 SDValue OldRetAddr, SDValue OldFP,
5128 int SPDiff, const SDLoc &dl) {
5129 if (SPDiff) {
5130 // Calculate the new stack slot for the return address.
5132 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5133 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5134 bool isPPC64 = Subtarget.isPPC64();
5135 int SlotSize = isPPC64 ? 8 : 4;
5136 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5137 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5138 NewRetAddrLoc, true);
5139 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5140 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5141 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5142 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5143 }
5144 return Chain;
5145}
5146
5147/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5148/// the position of the argument.
5149static void
5151 SDValue Arg, int SPDiff, unsigned ArgOffset,
5152 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5153 int Offset = ArgOffset + SPDiff;
5154 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5155 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5156 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5157 SDValue FIN = DAG.getFrameIndex(FI, VT);
5158 TailCallArgumentInfo Info;
5159 Info.Arg = Arg;
5160 Info.FrameIdxOp = FIN;
5161 Info.FrameIdx = FI;
5162 TailCallArguments.push_back(Info);
5163}
5164
5165/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5166/// stack slot. Returns the chain as result and the loaded frame pointers in
5167/// LROpOut/FPOpout. Used when tail calling.
5168SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5169 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5170 SDValue &FPOpOut, const SDLoc &dl) const {
5171 if (SPDiff) {
5172 // Load the LR and FP stack slot for later adjusting.
5173 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5174 LROpOut = getReturnAddrFrameIndex(DAG);
5175 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5176 Chain = SDValue(LROpOut.getNode(), 1);
5177 }
5178 return Chain;
5179}
5180
5181/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5182/// by "Src" to address "Dst" of size "Size". Alignment information is
5183/// specified by the specific parameter attribute. The copy will be passed as
5184/// a byval function parameter.
5185/// Sometimes what we are copying is the end of a larger object, the part that
5186/// does not fit in registers.
5188 SDValue Chain, ISD::ArgFlagsTy Flags,
5189 SelectionDAG &DAG, const SDLoc &dl) {
5190 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5191 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5192 Flags.getNonZeroByValAlign(), false, false, false,
5194}
5195
5196/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5197/// tail calls.
5199 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5200 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5201 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5202 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5204 if (!isTailCall) {
5205 if (isVector) {
5206 SDValue StackPtr;
5207 if (isPPC64)
5208 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5209 else
5210 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5211 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5212 DAG.getConstant(ArgOffset, dl, PtrVT));
5213 }
5214 MemOpChains.push_back(
5215 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5216 // Calculate and remember argument location.
5217 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5218 TailCallArguments);
5219}
5220
5221static void
5223 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5224 SDValue FPOp,
5225 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5226 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5227 // might overwrite each other in case of tail call optimization.
5228 SmallVector<SDValue, 8> MemOpChains2;
5229 // Do not flag preceding copytoreg stuff together with the following stuff.
5230 InGlue = SDValue();
5231 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5232 MemOpChains2, dl);
5233 if (!MemOpChains2.empty())
5234 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5235
5236 // Store the return address to the appropriate stack slot.
5237 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5238
5239 // Emit callseq_end just before tailcall node.
5240 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5241 InGlue = Chain.getValue(1);
5242}
5243
5244// Is this global address that of a function that can be called by name? (as
5245// opposed to something that must hold a descriptor for an indirect call).
5246static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5247 if (GV) {
5248 if (GV->isThreadLocal())
5249 return false;
5250
5251 return GV->getValueType()->isFunctionTy();
5252 }
5253
5254 return false;
5255}
5256
5257SDValue PPCTargetLowering::LowerCallResult(
5258 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5259 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5260 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5262 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5263 *DAG.getContext());
5264
5265 CCRetInfo.AnalyzeCallResult(
5266 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5268 : RetCC_PPC);
5269
5270 // Copy all of the result registers out of their specified physreg.
5271 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5272 CCValAssign &VA = RVLocs[i];
5273 assert(VA.isRegLoc() && "Can only return in registers!");
5274
5275 SDValue Val;
5276
5277 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5278 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5279 InGlue);
5280 Chain = Lo.getValue(1);
5281 InGlue = Lo.getValue(2);
5282 VA = RVLocs[++i]; // skip ahead to next loc
5283 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5284 InGlue);
5285 Chain = Hi.getValue(1);
5286 InGlue = Hi.getValue(2);
5287 if (!Subtarget.isLittleEndian())
5288 std::swap (Lo, Hi);
5289 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5290 } else {
5291 Val = DAG.getCopyFromReg(Chain, dl,
5292 VA.getLocReg(), VA.getLocVT(), InGlue);
5293 Chain = Val.getValue(1);
5294 InGlue = Val.getValue(2);
5295 }
5296
5297 switch (VA.getLocInfo()) {
5298 default: llvm_unreachable("Unknown loc info!");
5299 case CCValAssign::Full: break;
5300 case CCValAssign::AExt:
5301 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5302 break;
5303 case CCValAssign::ZExt:
5304 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5305 DAG.getValueType(VA.getValVT()));
5306 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5307 break;
5308 case CCValAssign::SExt:
5309 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5310 DAG.getValueType(VA.getValVT()));
5311 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5312 break;
5313 }
5314
5315 InVals.push_back(Val);
5316 }
5317
5318 return Chain;
5319}
5320
5321static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5322 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5323 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5324 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5325
5326 // PatchPoint calls are not indirect.
5327 if (isPatchPoint)
5328 return false;
5329
5330 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5331 return false;
5332
5333 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5334 // becuase the immediate function pointer points to a descriptor instead of
5335 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5336 // pointer immediate points to the global entry point, while the BLA would
5337 // need to jump to the local entry point (see rL211174).
5338 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5339 isBLACompatibleAddress(Callee, DAG))
5340 return false;
5341
5342 return true;
5343}
5344
5345// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5346static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5347 return Subtarget.isAIXABI() ||
5348 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5349}
5350
5352 const Function &Caller, const SDValue &Callee,
5353 const PPCSubtarget &Subtarget,
5354 const TargetMachine &TM,
5355 bool IsStrictFPCall = false) {
5356 if (CFlags.IsTailCall)
5357 return PPCISD::TC_RETURN;
5358
5359 unsigned RetOpc = 0;
5360 // This is a call through a function pointer.
5361 if (CFlags.IsIndirect) {
5362 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5363 // indirect calls. The save of the caller's TOC pointer to the stack will be
5364 // inserted into the DAG as part of call lowering. The restore of the TOC
5365 // pointer is modeled by using a pseudo instruction for the call opcode that
5366 // represents the 2 instruction sequence of an indirect branch and link,
5367 // immediately followed by a load of the TOC pointer from the stack save
5368 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5369 // as it is not saved or used.
5371 : PPCISD::BCTRL;
5372 } else if (Subtarget.isUsingPCRelativeCalls()) {
5373 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5374 RetOpc = PPCISD::CALL_NOTOC;
5375 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5376 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5377 // immediately following the call instruction if the caller and callee may
5378 // have different TOC bases. At link time if the linker determines the calls
5379 // may not share a TOC base, the call is redirected to a trampoline inserted
5380 // by the linker. The trampoline will (among other things) save the callers
5381 // TOC pointer at an ABI designated offset in the linkage area and the
5382 // linker will rewrite the nop to be a load of the TOC pointer from the
5383 // linkage area into gpr2.
5384 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5385 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5386 RetOpc =
5388 } else
5389 RetOpc = PPCISD::CALL;
5390 if (IsStrictFPCall) {
5391 switch (RetOpc) {
5392 default:
5393 llvm_unreachable("Unknown call opcode");
5396 break;
5397 case PPCISD::BCTRL:
5398 RetOpc = PPCISD::BCTRL_RM;
5399 break;
5400 case PPCISD::CALL_NOTOC:
5401 RetOpc = PPCISD::CALL_NOTOC_RM;
5402 break;
5403 case PPCISD::CALL:
5404 RetOpc = PPCISD::CALL_RM;
5405 break;
5406 case PPCISD::CALL_NOP:
5407 RetOpc = PPCISD::CALL_NOP_RM;
5408 break;
5409 }
5410 }
5411 return RetOpc;
5412}
5413
5414static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5415 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5416 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5417 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5418 return SDValue(Dest, 0);
5419
5420 // Returns true if the callee is local, and false otherwise.
5421 auto isLocalCallee = [&]() {
5422 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5423 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5424
5425 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5426 !isa_and_nonnull<GlobalIFunc>(GV);
5427 };
5428
5429 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5430 // a static relocation model causes some versions of GNU LD (2.17.50, at
5431 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5432 // built with secure-PLT.
5433 bool UsePlt =
5434 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5436
5437 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5438 const TargetMachine &TM = Subtarget.getTargetMachine();
5439 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5440 MCSymbolXCOFF *S =
5441 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5442
5444 return DAG.getMCSymbol(S, PtrVT);
5445 };
5446
5447 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5448 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5449 if (isFunctionGlobalAddress(GV)) {
5450 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5451
5452 if (Subtarget.isAIXABI()) {
5453 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5454 return getAIXFuncEntryPointSymbolSDNode(GV);
5455 }
5456 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5457 UsePlt ? PPCII::MO_PLT : 0);
5458 }
5459
5460 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5461 const char *SymName = S->getSymbol();
5462 if (Subtarget.isAIXABI()) {
5463 // If there exists a user-declared function whose name is the same as the
5464 // ExternalSymbol's, then we pick up the user-declared version.
5466 if (const Function *F =
5467 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5468 return getAIXFuncEntryPointSymbolSDNode(F);
5469
5470 // On AIX, direct function calls reference the symbol for the function's
5471 // entry point, which is named by prepending a "." before the function's
5472 // C-linkage name. A Qualname is returned here because an external
5473 // function entry point is a csect with XTY_ER property.
5474 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5475 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5476 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5477 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5479 return Sec->getQualNameSymbol();
5480 };
5481
5482 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5483 }
5484 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5485 UsePlt ? PPCII::MO_PLT : 0);
5486 }
5487
5488 // No transformation needed.
5489 assert(Callee.getNode() && "What no callee?");
5490 return Callee;
5491}
5492
5494 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5495 "Expected a CALLSEQ_STARTSDNode.");
5496
5497 // The last operand is the chain, except when the node has glue. If the node
5498 // has glue, then the last operand is the glue, and the chain is the second
5499 // last operand.
5500 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5501 if (LastValue.getValueType() != MVT::Glue)
5502 return LastValue;
5503
5504 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5505}
5506
5507// Creates the node that moves a functions address into the count register
5508// to prepare for an indirect call instruction.
5509static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5510 SDValue &Glue, SDValue &Chain,
5511 const SDLoc &dl) {
5512 SDValue MTCTROps[] = {Chain, Callee, Glue};
5513 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5514 Chain = DAG.getNode(PPCISD::MTCTR, dl, ArrayRef(ReturnTypes, 2),
5515 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5516 // The glue is the second value produced.
5517 Glue = Chain.getValue(1);
5518}
5519
5521 SDValue &Glue, SDValue &Chain,
5522 SDValue CallSeqStart,
5523 const CallBase *CB, const SDLoc &dl,
5524 bool hasNest,
5525 const PPCSubtarget &Subtarget) {
5526 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5527 // entry point, but to the function descriptor (the function entry point
5528 // address is part of the function descriptor though).
5529 // The function descriptor is a three doubleword structure with the
5530 // following fields: function entry point, TOC base address and
5531 // environment pointer.
5532 // Thus for a call through a function pointer, the following actions need
5533 // to be performed:
5534 // 1. Save the TOC of the caller in the TOC save area of its stack
5535 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5536 // 2. Load the address of the function entry point from the function
5537 // descriptor.
5538 // 3. Load the TOC of the callee from the function descriptor into r2.
5539 // 4. Load the environment pointer from the function descriptor into
5540 // r11.
5541 // 5. Branch to the function entry point address.
5542 // 6. On return of the callee, the TOC of the caller needs to be
5543 // restored (this is done in FinishCall()).
5544 //
5545 // The loads are scheduled at the beginning of the call sequence, and the
5546 // register copies are flagged together to ensure that no other
5547 // operations can be scheduled in between. E.g. without flagging the
5548 // copies together, a TOC access in the caller could be scheduled between
5549 // the assignment of the callee TOC and the branch to the callee, which leads
5550 // to incorrect code.
5551
5552 // Start by loading the function address from the descriptor.
5553 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5554 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5558
5559 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5560
5561 // Registers used in building the DAG.
5562 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5563 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5564
5565 // Offsets of descriptor members.
5566 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5567 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5568
5569 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5570 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5571
5572 // One load for the functions entry point address.
5573 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5574 Alignment, MMOFlags);
5575
5576 // One for loading the TOC anchor for the module that contains the called
5577 // function.
5578 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5579 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5580 SDValue TOCPtr =
5581 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5582 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5583
5584 // One for loading the environment pointer.
5585 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5586 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5587 SDValue LoadEnvPtr =
5588 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5589 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5590
5591
5592 // Then copy the newly loaded TOC anchor to the TOC pointer.
5593 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5594 Chain = TOCVal.getValue(0);
5595 Glue = TOCVal.getValue(1);
5596
5597 // If the function call has an explicit 'nest' parameter, it takes the
5598 // place of the environment pointer.
5599 assert((!hasNest || !Subtarget.isAIXABI()) &&
5600 "Nest parameter is not supported on AIX.");
5601 if (!hasNest) {
5602 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5603 Chain = EnvVal.getValue(0);
5604 Glue = EnvVal.getValue(1);
5605 }
5606
5607 // The rest of the indirect call sequence is the same as the non-descriptor
5608 // DAG.
5609 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5610}
5611
5612static void
5614 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5615 SelectionDAG &DAG,
5616 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5617 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5618 const PPCSubtarget &Subtarget) {
5619 const bool IsPPC64 = Subtarget.isPPC64();
5620 // MVT for a general purpose register.
5621 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5622
5623 // First operand is always the chain.
5624 Ops.push_back(Chain);
5625
5626 // If it's a direct call pass the callee as the second operand.
5627 if (!CFlags.IsIndirect)
5628 Ops.push_back(Callee);
5629 else {
5630 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5631
5632 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5633 // on the stack (this would have been done in `LowerCall_64SVR4` or
5634 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5635 // represents both the indirect branch and a load that restores the TOC
5636 // pointer from the linkage area. The operand for the TOC restore is an add
5637 // of the TOC save offset to the stack pointer. This must be the second
5638 // operand: after the chain input but before any other variadic arguments.
5639 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5640 // saved or used.
5641 if (isTOCSaveRestoreRequired(Subtarget)) {
5642 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5643
5644 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5645 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5646 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5647 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5648 Ops.push_back(AddTOC);
5649 }
5650
5651 // Add the register used for the environment pointer.
5652 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5654 RegVT));
5655
5656
5657 // Add CTR register as callee so a bctr can be emitted later.
5658 if (CFlags.IsTailCall)
5659 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5660 }
5661
5662 // If this is a tail call add stack pointer delta.
5663 if (CFlags.IsTailCall)
5664 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5665
5666 // Add argument registers to the end of the list so that they are known live
5667 // into the call.
5668 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5669 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5670 RegsToPass[i].second.getValueType()));
5671
5672 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5673 // no way to mark dependencies as implicit here.
5674 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5675 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5676 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5677 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5678
5679 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5680 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5681 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5682
5683 // Add a register mask operand representing the call-preserved registers.
5684 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5685 const uint32_t *Mask =
5686 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5687 assert(Mask && "Missing call preserved mask for calling convention");
5688 Ops.push_back(DAG.getRegisterMask(Mask));
5689
5690 // If the glue is valid, it is the last operand.
5691 if (Glue.getNode())
5692 Ops.push_back(Glue);
5693}
5694
5695SDValue PPCTargetLowering::FinishCall(
5696 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5697 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5698 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5699 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5700 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5701
5702 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5703 Subtarget.isAIXABI())
5704 setUsesTOCBasePtr(DAG);
5705
5706 unsigned CallOpc =
5707 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5708 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5709
5710 if (!CFlags.IsIndirect)
5711 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5712 else if (Subtarget.usesFunctionDescriptors())
5713 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5714 dl, CFlags.HasNest, Subtarget);
5715 else
5716 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5717
5718 // Build the operand list for the call instruction.
5720 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5721 SPDiff, Subtarget);
5722
5723 // Emit tail call.
5724 if (CFlags.IsTailCall) {
5725 // Indirect tail call when using PC Relative calls do not have the same
5726 // constraints.
5727 assert(((Callee.getOpcode() == ISD::Register &&
5728 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5729 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5730 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5731 isa<ConstantSDNode>(Callee) ||
5732 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5733 "Expecting a global address, external symbol, absolute value, "
5734 "register or an indirect tail call when PC Relative calls are "
5735 "used.");
5736 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5737 assert(CallOpc == PPCISD::TC_RETURN &&
5738 "Unexpected call opcode for a tail call.");
5740 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5741 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5742 return Ret;
5743 }
5744
5745 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5746 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5747 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5748 Glue = Chain.getValue(1);
5749
5750 // When performing tail call optimization the callee pops its arguments off
5751 // the stack. Account for this here so these bytes can be pushed back on in
5752 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5753 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5755 ? NumBytes
5756 : 0;
5757
5758 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5759 Glue = Chain.getValue(1);
5760
5761 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5762 DAG, InVals);
5763}
5764
5766 CallingConv::ID CalleeCC = CB->getCallingConv();
5767 const Function *CallerFunc = CB->getCaller();
5768 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5769 const Function *CalleeFunc = CB->getCalledFunction();
5770 if (!CalleeFunc)
5771 return false;
5772 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5773
5776
5777 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5778 CalleeFunc->getAttributes(), Outs, *this,
5779 CalleeFunc->getParent()->getDataLayout());
5780
5781 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5782 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5783 false /*isCalleeExternalSymbol*/);
5784}
5785
5786bool PPCTargetLowering::isEligibleForTCO(
5787 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5788 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5790 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5791 bool isCalleeExternalSymbol) const {
5792 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5793 return false;
5794
5795 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5796 return IsEligibleForTailCallOptimization_64SVR4(
5797 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5798 isCalleeExternalSymbol);
5799 else
5800 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5801 isVarArg, Ins);
5802}
5803
5804SDValue
5805PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5806 SmallVectorImpl<SDValue> &InVals) const {
5807 SelectionDAG &DAG = CLI.DAG;
5808 SDLoc &dl = CLI.DL;
5810 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5812 SDValue Chain = CLI.Chain;
5813 SDValue Callee = CLI.Callee;
5814 bool &isTailCall = CLI.IsTailCall;
5815 CallingConv::ID CallConv = CLI.CallConv;
5816 bool isVarArg = CLI.IsVarArg;
5817 bool isPatchPoint = CLI.IsPatchPoint;
5818 const CallBase *CB = CLI.CB;
5819
5820 if (isTailCall) {
5822 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5823 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5824 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5825 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5826
5827 isTailCall =
5828 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5829 &(MF.getFunction()), IsCalleeExternalSymbol);
5830 if (isTailCall) {
5831 ++NumTailCalls;
5832 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5833 ++NumSiblingCalls;
5834
5835 // PC Relative calls no longer guarantee that the callee is a Global
5836 // Address Node. The callee could be an indirect tail call in which
5837 // case the SDValue for the callee could be a load (to load the address
5838 // of a function pointer) or it may be a register copy (to move the
5839 // address of the callee from a function parameter into a virtual
5840 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5841 assert((Subtarget.isUsingPCRelativeCalls() ||
5842 isa<GlobalAddressSDNode>(Callee)) &&
5843 "Callee should be an llvm::Function object.");
5844
5845 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5846 << "\nTCO callee: ");
5847 LLVM_DEBUG(Callee.dump());
5848 }
5849 }
5850
5851 if (!isTailCall && CB && CB->isMustTailCall())
5852 report_fatal_error("failed to perform tail call elimination on a call "
5853 "site marked musttail");
5854
5855 // When long calls (i.e. indirect calls) are always used, calls are always
5856 // made via function pointer. If we have a function name, first translate it
5857 // into a pointer.
5858 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5859 !isTailCall)
5860 Callee = LowerGlobalAddress(Callee, DAG);
5861
5862 CallFlags CFlags(
5863 CallConv, isTailCall, isVarArg, isPatchPoint,
5864 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5865 // hasNest
5866 Subtarget.is64BitELFABI() &&
5867 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5868 CLI.NoMerge);
5869
5870 if (Subtarget.isAIXABI())
5871 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5872 InVals, CB);
5873
5874 assert(Subtarget.isSVR4ABI());
5875 if (Subtarget.isPPC64())
5876 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5877 InVals, CB);
5878 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5879 InVals, CB);
5880}
5881
5882SDValue PPCTargetLowering::LowerCall_32SVR4(
5883 SDValue Chain, SDValue Callee, CallFlags CFlags,
5885 const SmallVectorImpl<SDValue> &OutVals,
5886 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5888 const CallBase *CB) const {
5889 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5890 // of the 32-bit SVR4 ABI stack frame layout.
5891
5892 const CallingConv::ID CallConv = CFlags.CallConv;
5893 const bool IsVarArg = CFlags.IsVarArg;
5894 const bool IsTailCall = CFlags.IsTailCall;
5895
5896 assert((CallConv == CallingConv::C ||
5897 CallConv == CallingConv::Cold ||
5898 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5899
5900 const Align PtrAlign(4);
5901
5903
5904 // Mark this function as potentially containing a function that contains a
5905 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5906 // and restoring the callers stack pointer in this functions epilog. This is
5907 // done because by tail calling the called function might overwrite the value
5908 // in this function's (MF) stack pointer stack slot 0(SP).
5909 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5910 CallConv == CallingConv::Fast)
5911 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5912
5913 // Count how many bytes are to be pushed on the stack, including the linkage
5914 // area, parameter list area and the part of the local variable space which
5915 // contains copies of aggregates which are passed by value.
5916
5917 // Assign locations to all of the outgoing arguments.
5919 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5920
5921 // Reserve space for the linkage area on the stack.
5922 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5923 PtrAlign);
5924 if (useSoftFloat())
5925 CCInfo.PreAnalyzeCallOperands(Outs);
5926
5927 if (IsVarArg) {
5928 // Handle fixed and variable vector arguments differently.
5929 // Fixed vector arguments go into registers as long as registers are
5930 // available. Variable vector arguments always go into memory.
5931 unsigned NumArgs = Outs.size();
5932
5933 for (unsigned i = 0; i != NumArgs; ++i) {
5934 MVT ArgVT = Outs[i].VT;
5935 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5936 bool Result;
5937
5938 if (Outs[i].IsFixed) {
5939 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5940 CCInfo);
5941 } else {
5943 ArgFlags, CCInfo);
5944 }
5945
5946 if (Result) {
5947#ifndef NDEBUG
5948 errs() << "Call operand #" << i << " has unhandled type "
5949 << ArgVT << "\n";
5950#endif
5951 llvm_unreachable(nullptr);
5952 }
5953 }
5954 } else {
5955 // All arguments are treated the same.
5956 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5957 }
5958 CCInfo.clearWasPPCF128();
5959
5960 // Assign locations to all of the outgoing aggregate by value arguments.
5961 SmallVector<CCValAssign, 16> ByValArgLocs;
5962 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5963
5964 // Reserve stack space for the allocations in CCInfo.
5965 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5966
5967 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5968
5969 // Size of the linkage area, parameter list area and the part of the local
5970 // space variable where copies of aggregates which are passed by value are
5971 // stored.
5972 unsigned NumBytes = CCByValInfo.getStackSize();
5973
5974 // Calculate by how many bytes the stack has to be adjusted in case of tail
5975 // call optimization.
5976 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5977
5978 // Adjust the stack pointer for the new arguments...
5979 // These operations are automatically eliminated by the prolog/epilog pass
5980 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5981 SDValue CallSeqStart = Chain;
5982
5983 // Load the return address and frame pointer so it can be moved somewhere else
5984 // later.
5985 SDValue LROp, FPOp;
5986 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5987
5988 // Set up a copy of the stack pointer for use loading and storing any
5989 // arguments that may not fit in the registers available for argument
5990 // passing.
5991 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5992
5994 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5995 SmallVector<SDValue, 8> MemOpChains;
5996
5997 bool seenFloatArg = false;
5998 // Walk the register/memloc assignments, inserting copies/loads.
5999 // i - Tracks the index into the list of registers allocated for the call
6000 // RealArgIdx - Tracks the index into the list of actual function arguments
6001 // j - Tracks the index into the list of byval arguments
6002 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6003 i != e;
6004 ++i, ++RealArgIdx) {
6005 CCValAssign &VA = ArgLocs[i];
6006 SDValue Arg = OutVals[RealArgIdx];
6007 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6008
6009 if (Flags.isByVal()) {
6010 // Argument is an aggregate which is passed by value, thus we need to
6011 // create a copy of it in the local variable space of the current stack
6012 // frame (which is the stack frame of the caller) and pass the address of
6013 // this copy to the callee.
6014 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6015 CCValAssign &ByValVA = ByValArgLocs[j++];
6016 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6017
6018 // Memory reserved in the local variable space of the callers stack frame.
6019 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6020
6021 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6022 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6023 StackPtr, PtrOff);
6024
6025 // Create a copy of the argument in the local area of the current
6026 // stack frame.
6027 SDValue MemcpyCall =
6028 CreateCopyOfByValArgument(Arg, PtrOff,
6029 CallSeqStart.getNode()->getOperand(0),
6030 Flags, DAG, dl);
6031
6032 // This must go outside the CALLSEQ_START..END.
6033 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6034 SDLoc(MemcpyCall));
6035 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6036 NewCallSeqStart.getNode());
6037 Chain = CallSeqStart = NewCallSeqStart;
6038
6039 // Pass the address of the aggregate copy on the stack either in a
6040 // physical register or in the parameter list area of the current stack
6041 // frame to the callee.
6042 Arg = PtrOff;
6043 }
6044
6045 // When useCRBits() is true, there can be i1 arguments.
6046 // It is because getRegisterType(MVT::i1) => MVT::i1,
6047 // and for other integer types getRegisterType() => MVT::i32.
6048 // Extend i1 and ensure callee will get i32.
6049 if (Arg.getValueType() == MVT::i1)
6050 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6051 dl, MVT::i32, Arg);
6052
6053 if (VA.isRegLoc()) {
6054 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6055 // Put argument in a physical register.
6056 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6057 bool IsLE = Subtarget.isLittleEndian();
6058 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6059 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6060 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6061 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6062 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6063 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6064 SVal.getValue(0)));
6065 } else
6066 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6067 } else {
6068 // Put argument in the parameter list area of the current stack frame.
6069 assert(VA.isMemLoc());
6070 unsigned LocMemOffset = VA.getLocMemOffset();
6071
6072 if (!IsTailCall) {
6073 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6074 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6075 StackPtr, PtrOff);
6076
6077 MemOpChains.push_back(
6078 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6079 } else {
6080 // Calculate and remember argument location.
6081 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6082 TailCallArguments);
6083 }
6084 }
6085 }
6086
6087 if (!MemOpChains.empty())
6088 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6089
6090 // Build a sequence of copy-to-reg nodes chained together with token chain
6091 // and flag operands which copy the outgoing args into the appropriate regs.
6092 SDValue InGlue;
6093 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6094 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6095 RegsToPass[i].second, InGlue);
6096 InGlue = Chain.getValue(1);
6097 }
6098
6099 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6100 // registers.
6101 if (IsVarArg) {
6102 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6103 SDValue Ops[] = { Chain, InGlue };
6104
6105 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6106 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6107
6108 InGlue = Chain.getValue(1);
6109 }
6110
6111 if (IsTailCall)
6112 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6113 TailCallArguments);
6114
6115 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6116 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6117}
6118
6119// Copy an argument into memory, being careful to do this outside the
6120// call sequence for the call to which the argument belongs.
6121SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6122 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6123 SelectionDAG &DAG, const SDLoc &dl) const {
6124 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6125 CallSeqStart.getNode()->getOperand(0),
6126 Flags, DAG, dl);
6127 // The MEMCPY must go outside the CALLSEQ_START..END.
6128 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6129 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6130 SDLoc(MemcpyCall));
6131 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6132 NewCallSeqStart.getNode());
6133 return NewCallSeqStart;
6134}
6135
6136SDValue PPCTargetLowering::LowerCall_64SVR4(
6137 SDValue Chain, SDValue Callee, CallFlags CFlags,
6139 const SmallVectorImpl<SDValue> &OutVals,
6140 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6142 const CallBase *CB) const {
6143 bool isELFv2ABI = Subtarget.isELFv2ABI();
6144 bool isLittleEndian = Subtarget.isLittleEndian();
6145 unsigned NumOps = Outs.size();
6146 bool IsSibCall = false;
6147 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6148
6149 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6150 unsigned PtrByteSize = 8;
6151
6153
6154 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6155 IsSibCall = true;
6156
6157 // Mark this function as potentially containing a function that contains a
6158 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6159 // and restoring the callers stack pointer in this functions epilog. This is
6160 // done because by tail calling the called function might overwrite the value
6161 // in this function's (MF) stack pointer stack slot 0(SP).
6162 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6163 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6164
6165 assert(!(IsFastCall && CFlags.IsVarArg) &&
6166 "fastcc not supported on varargs functions");
6167
6168 // Count how many bytes are to be pushed on the stack, including the linkage
6169 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6170 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6171 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6172 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6173 unsigned NumBytes = LinkageSize;
6174 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6175
6176 static const MCPhysReg GPR[] = {
6177 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6178 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6179 };
6180 static const MCPhysReg VR[] = {
6181 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6182 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6183 };
6184
6185 const unsigned NumGPRs = std::size(GPR);
6186 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6187 const unsigned NumVRs = std::size(VR);
6188
6189 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6190 // can be passed to the callee in registers.
6191 // For the fast calling convention, there is another check below.
6192 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6193 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6194 if (!HasParameterArea) {
6195 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6196 unsigned AvailableFPRs = NumFPRs;
6197 unsigned AvailableVRs = NumVRs;
6198 unsigned NumBytesTmp = NumBytes;
6199 for (unsigned i = 0; i != NumOps; ++i) {
6200 if (Outs[i].Flags.isNest()) continue;
6201 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6202 PtrByteSize, LinkageSize, ParamAreaSize,
6203 NumBytesTmp, AvailableFPRs, AvailableVRs))
6204 HasParameterArea = true;
6205 }
6206 }
6207
6208 // When using the fast calling convention, we don't provide backing for
6209 // arguments that will be in registers.
6210 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6211
6212 // Avoid allocating parameter area for fastcc functions if all the arguments
6213 // can be passed in the registers.
6214 if (IsFastCall)
6215 HasParameterArea = false;
6216
6217 // Add up all the space actually used.
6218 for (unsigned i = 0; i != NumOps; ++i) {
6219 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6220 EVT ArgVT = Outs[i].VT;
6221 EVT OrigVT = Outs[i].ArgVT;
6222
6223 if (Flags.isNest())
6224 continue;
6225
6226 if (IsFastCall) {
6227 if (Flags.isByVal()) {
6228 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6229 if (NumGPRsUsed > NumGPRs)
6230 HasParameterArea = true;
6231 } else {
6232 switch (ArgVT.getSimpleVT().SimpleTy) {
6233 default: llvm_unreachable("Unexpected ValueType for argument!");
6234 case MVT::i1:
6235 case MVT::i32:
6236 case MVT::i64:
6237 if (++NumGPRsUsed <= NumGPRs)
6238 continue;
6239 break;
6240 case MVT::v4i32:
6241 case MVT::v8i16:
6242 case MVT::v16i8:
6243 case MVT::v2f64:
6244 case MVT::v2i64:
6245 case MVT::v1i128:
6246 case MVT::f128:
6247 if (++NumVRsUsed <= NumVRs)
6248 continue;
6249 break;
6250 case MVT::v4f32:
6251 if (++NumVRsUsed <= NumVRs)
6252 continue;
6253 break;
6254 case MVT::f32:
6255 case MVT::f64:
6256 if (++NumFPRsUsed <= NumFPRs)
6257 continue;
6258 break;
6259 }
6260 HasParameterArea = true;
6261 }
6262 }
6263
6264 /* Respect alignment of argument on the stack. */
6265 auto Alignement =
6266 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6267 NumBytes = alignTo(NumBytes, Alignement);
6268
6269 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6270 if (Flags.isInConsecutiveRegsLast())
6271 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6272 }
6273
6274 unsigned NumBytesActuallyUsed = NumBytes;
6275
6276 // In the old ELFv1 ABI,
6277 // the prolog code of the callee may store up to 8 GPR argument registers to
6278 // the stack, allowing va_start to index over them in memory if its varargs.
6279 // Because we cannot tell if this is needed on the caller side, we have to
6280 // conservatively assume that it is needed. As such, make sure we have at
6281 // least enough stack space for the caller to store the 8 GPRs.
6282 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6283 // really requires memory operands, e.g. a vararg function.
6284 if (HasParameterArea)
6285 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6286 else
6287 NumBytes = LinkageSize;
6288
6289 // Tail call needs the stack to be aligned.
6290 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6291 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6292
6293 int SPDiff = 0;
6294
6295 // Calculate by how many bytes the stack has to be adjusted in case of tail
6296 // call optimization.
6297 if (!IsSibCall)
6298 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6299
6300 // To protect arguments on the stack from being clobbered in a tail call,
6301 // force all the loads to happen before doing any other lowering.
6302 if (CFlags.IsTailCall)
6303 Chain = DAG.getStackArgumentTokenFactor(Chain);
6304
6305 // Adjust the stack pointer for the new arguments...
6306 // These operations are automatically eliminated by the prolog/epilog pass
6307 if (!IsSibCall)
6308 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6309 SDValue CallSeqStart = Chain;
6310
6311 // Load the return address and frame pointer so it can be move somewhere else
6312 // later.
6313 SDValue LROp, FPOp;
6314 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6315
6316 // Set up a copy of the stack pointer for use loading and storing any
6317 // arguments that may not fit in the registers available for argument
6318 // passing.
6319 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6320
6321 // Figure out which arguments are going to go in registers, and which in
6322 // memory. Also, if this is a vararg function, floating point operations
6323 // must be stored to our stack, and loaded into integer regs as well, if
6324 // any integer regs are available for argument passing.
6325 unsigned ArgOffset = LinkageSize;
6326
6328 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6329
6330 SmallVector<SDValue, 8> MemOpChains;
6331 for (unsigned i = 0; i != NumOps; ++i) {
6332 SDValue Arg = OutVals[i];
6333 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6334 EVT ArgVT = Outs[i].VT;
6335 EVT OrigVT = Outs[i].ArgVT;
6336
6337 // PtrOff will be used to store the current argument to the stack if a
6338 // register cannot be found for it.
6339 SDValue PtrOff;
6340
6341 // We re-align the argument offset for each argument, except when using the
6342 // fast calling convention, when we need to make sure we do that only when
6343 // we'll actually use a stack slot.
6344 auto ComputePtrOff = [&]() {
6345 /* Respect alignment of argument on the stack. */
6346 auto Alignment =
6347 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6348 ArgOffset = alignTo(ArgOffset, Alignment);
6349
6350 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6351
6352 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6353 };
6354
6355 if (!IsFastCall) {
6356 ComputePtrOff();
6357
6358 /* Compute GPR index associated with argument offset. */
6359 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6360 GPR_idx = std::min(GPR_idx, NumGPRs);
6361 }
6362
6363 // Promote integers to 64-bit values.
6364 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6365 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6366 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6367 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6368 }
6369
6370 // FIXME memcpy is used way more than necessary. Correctness first.
6371 // Note: "by value" is code for passing a structure by value, not
6372 // basic types.
6373 if (Flags.isByVal()) {
6374 // Note: Size includes alignment padding, so
6375 // struct x { short a; char b; }
6376 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6377 // These are the proper values we need for right-justifying the
6378 // aggregate in a parameter register.
6379 unsigned Size = Flags.getByValSize();
6380
6381 // An empty aggregate parameter takes up no storage and no
6382 // registers.
6383 if (Size == 0)
6384 continue;
6385
6386 if (IsFastCall)
6387 ComputePtrOff();
6388
6389 // All aggregates smaller than 8 bytes must be passed right-justified.
6390 if (Size==1 || Size==2 || Size==4) {
6391 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6392 if (GPR_idx != NumGPRs) {
6393 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6394 MachinePointerInfo(), VT);
6395 MemOpChains.push_back(Load.getValue(1));
6396 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6397
6398 ArgOffset += PtrByteSize;
6399 continue;
6400 }
6401 }
6402
6403 if (GPR_idx == NumGPRs && Size < 8) {
6404 SDValue AddPtr = PtrOff;
6405 if (!isLittleEndian) {
6406 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6407 PtrOff.getValueType());
6408 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6409 }
6410 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6411 CallSeqStart,
6412 Flags, DAG, dl);
6413 ArgOffset += PtrByteSize;
6414 continue;
6415 }
6416 // Copy the object to parameter save area if it can not be entirely passed
6417 // by registers.
6418 // FIXME: we only need to copy the parts which need to be passed in
6419 // parameter save area. For the parts passed by registers, we don't need
6420 // to copy them to the stack although we need to allocate space for them
6421 // in parameter save area.
6422 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6423 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6424 CallSeqStart,
6425 Flags, DAG, dl);
6426
6427 // When a register is available, pass a small aggregate right-justified.
6428 if (Size < 8 && GPR_idx != NumGPRs) {
6429 // The easiest way to get this right-justified in a register
6430 // is to copy the structure into the rightmost portion of a
6431 // local variable slot, then load the whole slot into the
6432 // register.
6433 // FIXME: The memcpy seems to produce pretty awful code for
6434 // small aggregates, particularly for packed ones.
6435 // FIXME: It would be preferable to use the slot in the
6436 // parameter save area instead of a new local variable.
6437 SDValue AddPtr = PtrOff;
6438 if (!isLittleEndian) {
6439 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6440 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6441 }
6442 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6443 CallSeqStart,
6444 Flags, DAG, dl);
6445
6446 // Load the slot into the register.
6447 SDValue Load =
6448 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6449 MemOpChains.push_back(Load.getValue(1));
6450 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6451
6452 // Done with this argument.
6453 ArgOffset += PtrByteSize;
6454 continue;
6455 }
6456
6457 // For aggregates larger than PtrByteSize, copy the pieces of the
6458 // object that fit into registers from the parameter save area.
6459 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6460 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6461 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6462 if (GPR_idx != NumGPRs) {
6463 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6464 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6465 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6466 MachinePointerInfo(), ObjType);
6467
6468 MemOpChains.push_back(Load.getValue(1));
6469 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6470 ArgOffset += PtrByteSize;
6471 } else {
6472 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6473 break;
6474 }
6475 }
6476 continue;
6477 }
6478
6479 switch (Arg.getSimpleValueType().SimpleTy) {
6480 default: llvm_unreachable("Unexpected ValueType for argument!");
6481 case MVT::i1:
6482 case MVT::i32:
6483 case MVT::i64:
6484 if (Flags.isNest()) {
6485 // The 'nest' parameter, if any, is passed in R11.
6486 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6487 break;
6488 }
6489
6490 // These can be scalar arguments or elements of an integer array type
6491 // passed directly. Clang may use those instead of "byval" aggregate
6492 // types to avoid forcing arguments to memory unnecessarily.
6493 if (GPR_idx != NumGPRs) {
6494 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6495 } else {
6496 if (IsFastCall)
6497 ComputePtrOff();
6498
6499 assert(HasParameterArea &&
6500 "Parameter area must exist to pass an argument in memory.");
6501 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6502 true, CFlags.IsTailCall, false, MemOpChains,
6503 TailCallArguments, dl);
6504 if (IsFastCall)
6505 ArgOffset += PtrByteSize;
6506 }
6507 if (!IsFastCall)
6508 ArgOffset += PtrByteSize;
6509 break;
6510 case MVT::f32:
6511 case MVT::f64: {
6512 // These can be scalar arguments or elements of a float array type
6513 // passed directly. The latter are used to implement ELFv2 homogenous
6514 // float aggregates.
6515
6516 // Named arguments go into FPRs first, and once they overflow, the
6517 // remaining arguments go into GPRs and then the parameter save area.
6518 // Unnamed arguments for vararg functions always go to GPRs and
6519 // then the parameter save area. For now, put all arguments to vararg
6520 // routines always in both locations (FPR *and* GPR or stack slot).
6521 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6522 bool NeededLoad = false;
6523
6524 // First load the argument into the next available FPR.
6525 if (FPR_idx != NumFPRs)
6526 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6527
6528 // Next, load the argument into GPR or stack slot if needed.
6529 if (!NeedGPROrStack)
6530 ;
6531 else if (GPR_idx != NumGPRs && !IsFastCall) {
6532 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6533 // once we support fp <-> gpr moves.
6534
6535 // In the non-vararg case, this can only ever happen in the
6536 // presence of f32 array types, since otherwise we never run
6537 // out of FPRs before running out of GPRs.
6538 SDValue ArgVal;
6539
6540 // Double values are always passed in a single GPR.
6541 if (Arg.getValueType() != MVT::f32) {
6542 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6543
6544 // Non-array float values are extended and passed in a GPR.
6545 } else if (!Flags.isInConsecutiveRegs()) {
6546 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6547 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6548
6549 // If we have an array of floats, we collect every odd element
6550 // together with its predecessor into one GPR.
6551 } else if (ArgOffset % PtrByteSize != 0) {
6552 SDValue Lo, Hi;
6553 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6554 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6555 if (!isLittleEndian)
6556 std::swap(Lo, Hi);
6557 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6558
6559 // The final element, if even, goes into the first half of a GPR.
6560 } else if (Flags.isInConsecutiveRegsLast()) {
6561 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6562 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6563 if (!isLittleEndian)
6564 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6565 DAG.getConstant(32, dl, MVT::i32));
6566
6567 // Non-final even elements are skipped; they will be handled
6568 // together the with subsequent argument on the next go-around.
6569 } else
6570 ArgVal = SDValue();
6571
6572 if (ArgVal.getNode())
6573 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6574 } else {
6575 if (IsFastCall)
6576 ComputePtrOff();
6577
6578 // Single-precision floating-point values are mapped to the
6579 // second (rightmost) word of the stack doubleword.
6580 if (Arg.getValueType() == MVT::f32 &&
6581 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6582 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6583 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6584 }
6585
6586 assert(HasParameterArea &&
6587 "Parameter area must exist to pass an argument in memory.");
6588 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6589 true, CFlags.IsTailCall, false, MemOpChains,
6590 TailCallArguments, dl);
6591
6592 NeededLoad = true;
6593 }
6594 // When passing an array of floats, the array occupies consecutive
6595 // space in the argument area; only round up to the next doubleword
6596 // at the end of the array. Otherwise, each float takes 8 bytes.
6597 if (!IsFastCall || NeededLoad) {
6598 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6599 Flags.isInConsecutiveRegs()) ? 4 : 8;
6600 if (Flags.isInConsecutiveRegsLast())
6601 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6602 }
6603 break;
6604 }
6605 case MVT::v4f32:
6606 case MVT::v4i32:
6607 case MVT::v8i16:
6608 case MVT::v16i8:
6609 case MVT::v2f64:
6610 case MVT::v2i64:
6611 case MVT::v1i128:
6612 case MVT::f128:
6613 // These can be scalar arguments or elements of a vector array type
6614 // passed directly. The latter are used to implement ELFv2 homogenous
6615 // vector aggregates.
6616
6617 // For a varargs call, named arguments go into VRs or on the stack as
6618 // usual; unnamed arguments always go to the stack or the corresponding
6619 // GPRs when within range. For now, we always put the value in both
6620 // locations (or even all three).
6621 if (CFlags.IsVarArg) {
6622 assert(HasParameterArea &&
6623 "Parameter area must exist if we have a varargs call.");
6624 // We could elide this store in the case where the object fits
6625 // entirely in R registers. Maybe later.
6626 SDValue Store =
6627 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6628 MemOpChains.push_back(Store);
6629 if (VR_idx != NumVRs) {
6630 SDValue Load =
6631 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6632 MemOpChains.push_back(Load.getValue(1));
6633 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6634 }
6635 ArgOffset += 16;
6636 for (unsigned i=0; i<16; i+=PtrByteSize) {
6637 if (GPR_idx == NumGPRs)
6638 break;
6639 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6640 DAG.getConstant(i, dl, PtrVT));
6641 SDValue Load =
6642 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6643 MemOpChains.push_back(Load.getValue(1));
6644 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6645 }
6646 break;
6647 }
6648
6649 // Non-varargs Altivec params go into VRs or on the stack.
6650 if (VR_idx != NumVRs) {
6651 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6652 } else {
6653 if (IsFastCall)
6654 ComputePtrOff();
6655
6656 assert(HasParameterArea &&
6657 "Parameter area must exist to pass an argument in memory.");
6658 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6659 true, CFlags.IsTailCall, true, MemOpChains,
6660 TailCallArguments, dl);
6661 if (IsFastCall)
6662 ArgOffset += 16;
6663 }
6664
6665 if (!IsFastCall)
6666 ArgOffset += 16;
6667 break;
6668 }
6669 }
6670
6671 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6672 "mismatch in size of parameter area");
6673 (void)NumBytesActuallyUsed;
6674
6675 if (!MemOpChains.empty())
6676 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6677
6678 // Check if this is an indirect call (MTCTR/BCTRL).
6679 // See prepareDescriptorIndirectCall and buildCallOperands for more
6680 // information about calls through function pointers in the 64-bit SVR4 ABI.
6681 if (CFlags.IsIndirect) {
6682 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6683 // caller in the TOC save area.
6684 if (isTOCSaveRestoreRequired(Subtarget)) {
6685 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6686 // Load r2 into a virtual register and store it to the TOC save area.
6687 setUsesTOCBasePtr(DAG);
6688 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6689 // TOC save area offset.
6690 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6691 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6692 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6693 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6695 DAG.getMachineFunction(), TOCSaveOffset));
6696 }
6697 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6698 // This does not mean the MTCTR instruction must use R12; it's easier
6699 // to model this as an extra parameter, so do that.
6700 if (isELFv2ABI && !CFlags.IsPatchPoint)
6701 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6702 }
6703
6704 // Build a sequence of copy-to-reg nodes chained together with token chain
6705 // and flag operands which copy the outgoing args into the appropriate regs.
6706 SDValue InGlue;
6707 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6708 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6709 RegsToPass[i].second, InGlue);
6710 InGlue = Chain.getValue(1);
6711 }
6712
6713 if (CFlags.IsTailCall && !IsSibCall)
6714 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6715 TailCallArguments);
6716
6717 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6718 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6719}
6720
6721// Returns true when the shadow of a general purpose argument register
6722// in the parameter save area is aligned to at least 'RequiredAlign'.
6723static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6724 assert(RequiredAlign.value() <= 16 &&
6725 "Required alignment greater than stack alignment.");
6726 switch (Reg) {
6727 default:
6728 report_fatal_error("called on invalid register.");
6729 case PPC::R5:
6730 case PPC::R9:
6731 case PPC::X3:
6732 case PPC::X5:
6733 case PPC::X7:
6734 case PPC::X9:
6735 // These registers are 16 byte aligned which is the most strict aligment
6736 // we can support.
6737 return true;
6738 case PPC::R3:
6739 case PPC::R7:
6740 case PPC::X4:
6741 case PPC::X6:
6742 case PPC::X8:
6743 case PPC::X10:
6744 // The shadow of these registers in the PSA is 8 byte aligned.
6745 return RequiredAlign <= 8;
6746 case PPC::R4:
6747 case PPC::R6:
6748 case PPC::R8:
6749 case PPC::R10:
6750 return RequiredAlign <= 4;
6751 }
6752}
6753
6754static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6755 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6756 CCState &S) {
6757 AIXCCState &State = static_cast<AIXCCState &>(S);
6758 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6760 const bool IsPPC64 = Subtarget.isPPC64();
6761 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6762 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6763
6764 if (ValVT == MVT::f128)
6765 report_fatal_error("f128 is unimplemented on AIX.");
6766
6767 if (ArgFlags.isNest())
6768 report_fatal_error("Nest arguments are unimplemented.");
6769
6770 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6771 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6772 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6773 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6774 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6775 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6776
6777 static const MCPhysReg VR[] = {// Vector registers.
6778 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6779 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6780 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6781
6782 if (ArgFlags.isByVal()) {
6783 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6784 report_fatal_error("Pass-by-value arguments with alignment greater than "
6785 "register width are not supported.");
6786
6787 const unsigned ByValSize = ArgFlags.getByValSize();
6788
6789 // An empty aggregate parameter takes up no storage and no registers,
6790 // but needs a MemLoc for a stack slot for the formal arguments side.
6791 if (ByValSize == 0) {
6793 State.getStackSize(), RegVT, LocInfo));
6794 return false;
6795 }
6796
6797 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6798 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6799 for (const unsigned E = Offset + StackSize; Offset < E;
6800 Offset += PtrAlign.value()) {
6801 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6802 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6803 else {
6806 LocInfo));
6807 break;
6808 }
6809 }
6810 return false;
6811 }
6812
6813 // Arguments always reserve parameter save area.
6814 switch (ValVT.SimpleTy) {
6815 default:
6816 report_fatal_error("Unhandled value type for argument.");
6817 case MVT::i64:
6818 // i64 arguments should have been split to i32 for PPC32.
6819 assert(IsPPC64 && "PPC32 should have split i64 values.");
6820 [[fallthrough]];
6821 case MVT::i1:
6822 case MVT::i32: {
6823 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6824 // AIX integer arguments are always passed in register width.
6825 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6826 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6828 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6829 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6830 else
6831 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6832
6833 return false;
6834 }
6835 case MVT::f32:
6836 case MVT::f64: {
6837 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6838 const unsigned StoreSize = LocVT.getStoreSize();
6839 // Floats are always 4-byte aligned in the PSA on AIX.
6840 // This includes f64 in 64-bit mode for ABI compatibility.
6841 const unsigned Offset =
6842 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6843 unsigned FReg = State.AllocateReg(FPR);
6844 if (FReg)
6845 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6846
6847 // Reserve and initialize GPRs or initialize the PSA as required.
6848 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6849 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6850 assert(FReg && "An FPR should be available when a GPR is reserved.");
6851 if (State.isVarArg()) {
6852 // Successfully reserved GPRs are only initialized for vararg calls.
6853 // Custom handling is required for:
6854 // f64 in PPC32 needs to be split into 2 GPRs.
6855 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6856 State.addLoc(
6857 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6858 }
6859 } else {
6860 // If there are insufficient GPRs, the PSA needs to be initialized.
6861 // Initialization occurs even if an FPR was initialized for
6862 // compatibility with the AIX XL compiler. The full memory for the
6863 // argument will be initialized even if a prior word is saved in GPR.
6864 // A custom memLoc is used when the argument also passes in FPR so
6865 // that the callee handling can skip over it easily.
6866 State.addLoc(
6867 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6868 LocInfo)
6869 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6870 break;
6871 }
6872 }
6873
6874 return false;
6875 }
6876 case MVT::v4f32:
6877 case MVT::v4i32:
6878 case MVT::v8i16:
6879 case MVT::v16i8:
6880 case MVT::v2i64:
6881 case MVT::v2f64:
6882 case MVT::v1i128: {
6883 const unsigned VecSize = 16;
6884 const Align VecAlign(VecSize);
6885
6886 if (!State.isVarArg()) {
6887 // If there are vector registers remaining we don't consume any stack
6888 // space.
6889 if (unsigned VReg = State.AllocateReg(VR)) {
6890 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6891 return false;
6892 }
6893 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6894 // might be allocated in the portion of the PSA that is shadowed by the
6895 // GPRs.
6896 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6897 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6898 return false;
6899 }
6900
6901 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6902 ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6903
6904 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6905 // Burn any underaligned registers and their shadowed stack space until
6906 // we reach the required alignment.
6907 while (NextRegIndex != GPRs.size() &&
6908 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6909 // Shadow allocate register and its stack shadow.
6910 unsigned Reg = State.AllocateReg(GPRs);
6911 State.AllocateStack(PtrSize, PtrAlign);
6912 assert(Reg && "Allocating register unexpectedly failed.");
6913 (void)Reg;
6914 NextRegIndex = State.getFirstUnallocated(GPRs);
6915 }
6916
6917 // Vectors that are passed as fixed arguments are handled differently.
6918 // They are passed in VRs if any are available (unlike arguments passed
6919 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6920 // functions)
6921 if (State.isFixed(ValNo)) {
6922 if (unsigned VReg = State.AllocateReg(VR)) {
6923 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6924 // Shadow allocate GPRs and stack space even though we pass in a VR.
6925 for (unsigned I = 0; I != VecSize; I += PtrSize)
6926 State.AllocateReg(GPRs);
6927 State.AllocateStack(VecSize, VecAlign);
6928 return false;
6929 }
6930 // No vector registers remain so pass on the stack.
6931 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6932 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6933 return false;
6934 }
6935
6936 // If all GPRS are consumed then we pass the argument fully on the stack.
6937 if (NextRegIndex == GPRs.size()) {
6938 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6939 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6940 return false;
6941 }
6942
6943 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6944 // half of the argument, and then need to pass the remaining half on the
6945 // stack.
6946 if (GPRs[NextRegIndex] == PPC::R9) {
6947 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6948 State.addLoc(
6949 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6950
6951 const unsigned FirstReg = State.AllocateReg(PPC::R9);
6952 const unsigned SecondReg = State.AllocateReg(PPC::R10);
6953 assert(FirstReg && SecondReg &&
6954 "Allocating R9 or R10 unexpectedly failed.");
6955 State.addLoc(
6956 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6957 State.addLoc(
6958 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6959 return false;
6960 }
6961
6962 // We have enough GPRs to fully pass the vector argument, and we have
6963 // already consumed any underaligned registers. Start with the custom
6964 // MemLoc and then the custom RegLocs.
6965 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6966 State.addLoc(
6967 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6968 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6969 const unsigned Reg = State.AllocateReg(GPRs);
6970 assert(Reg && "Failed to allocated register for vararg vector argument");
6971 State.addLoc(
6972 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6973 }
6974 return false;
6975 }
6976 }
6977 return true;
6978}
6979
6980// So far, this function is only used by LowerFormalArguments_AIX()
6982 bool IsPPC64,
6983 bool HasP8Vector,
6984 bool HasVSX) {
6985 assert((IsPPC64 || SVT != MVT::i64) &&
6986 "i64 should have been split for 32-bit codegen.");
6987
6988 switch (SVT) {
6989 default:
6990 report_fatal_error("Unexpected value type for formal argument");
6991 case MVT::i1:
6992 case MVT::i32:
6993 case MVT::i64:
6994 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6995 case MVT::f32:
6996 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6997 case MVT::f64:
6998 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6999 case MVT::v4f32:
7000 case MVT::v4i32:
7001 case MVT::v8i16:
7002 case MVT::v16i8:
7003 case MVT::v2i64:
7004 case MVT::v2f64:
7005 case MVT::v1i128:
7006 return &PPC::VRRCRegClass;
7007 }
7008}
7009
7011 SelectionDAG &DAG, SDValue ArgValue,
7012 MVT LocVT, const SDLoc &dl) {
7013 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7014 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7015
7016 if (Flags.isSExt())
7017 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7018 DAG.getValueType(ValVT));
7019 else if (Flags.isZExt())
7020 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7021 DAG.getValueType(ValVT));
7022
7023 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7024}
7025
7026static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7027 const unsigned LASize = FL->getLinkageSize();
7028
7029 if (PPC::GPRCRegClass.contains(Reg)) {
7030 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7031 "Reg must be a valid argument register!");
7032 return LASize + 4 * (Reg - PPC::R3);
7033 }
7034
7035 if (PPC::G8RCRegClass.contains(Reg)) {
7036 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7037 "Reg must be a valid argument register!");
7038 return LASize + 8 * (Reg - PPC::X3);
7039 }
7040
7041 llvm_unreachable("Only general purpose registers expected.");
7042}
7043
7044// AIX ABI Stack Frame Layout:
7045//
7046// Low Memory +--------------------------------------------+
7047// SP +---> | Back chain | ---+
7048// | +--------------------------------------------+ |
7049// | | Saved Condition Register | |
7050// | +--------------------------------------------+ |
7051// | | Saved Linkage Register | |
7052// | +--------------------------------------------+ | Linkage Area
7053// | | Reserved for compilers | |
7054// | +--------------------------------------------+ |
7055// | | Reserved for binders | |
7056// | +--------------------------------------------+ |
7057// | | Saved TOC pointer | ---+
7058// | +--------------------------------------------+
7059// | | Parameter save area |
7060// | +--------------------------------------------+
7061// | | Alloca space |
7062// | +--------------------------------------------+
7063// | | Local variable space |
7064// | +--------------------------------------------+
7065// | | Float/int conversion temporary |
7066// | +--------------------------------------------+
7067// | | Save area for AltiVec registers |
7068// | +--------------------------------------------+
7069// | | AltiVec alignment padding |
7070// | +--------------------------------------------+
7071// | | Save area for VRSAVE register |
7072// | +--------------------------------------------+
7073// | | Save area for General Purpose registers |
7074// | +--------------------------------------------+
7075// | | Save area for Floating Point registers |
7076// | +--------------------------------------------+
7077// +---- | Back chain |
7078// High Memory +--------------------------------------------+
7079//
7080// Specifications:
7081// AIX 7.2 Assembler Language Reference
7082// Subroutine linkage convention
7083
7084SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7085 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7086 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7087 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7088
7089 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7090 CallConv == CallingConv::Fast) &&
7091 "Unexpected calling convention!");
7092
7093 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7094 report_fatal_error("Tail call support is unimplemented on AIX.");
7095
7096 if (useSoftFloat())
7097 report_fatal_error("Soft float support is unimplemented on AIX.");
7098
7099 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7100
7101 const bool IsPPC64 = Subtarget.isPPC64();
7102 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7103
7104 // Assign locations to all of the incoming arguments.
7107 MachineFrameInfo &MFI = MF.getFrameInfo();
7108 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7109 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7110
7111 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7112 // Reserve space for the linkage area on the stack.
7113 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7114 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7115 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7116
7118
7119 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7120 CCValAssign &VA = ArgLocs[I++];
7121 MVT LocVT = VA.getLocVT();
7122 MVT ValVT = VA.getValVT();
7123 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7124 // For compatibility with the AIX XL compiler, the float args in the
7125 // parameter save area are initialized even if the argument is available
7126 // in register. The caller is required to initialize both the register
7127 // and memory, however, the callee can choose to expect it in either.
7128 // The memloc is dismissed here because the argument is retrieved from
7129 // the register.
7130 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7131 continue;
7132
7133 auto HandleMemLoc = [&]() {
7134 const unsigned LocSize = LocVT.getStoreSize();
7135 const unsigned ValSize = ValVT.getStoreSize();
7136 assert((ValSize <= LocSize) &&
7137 "Object size is larger than size of MemLoc");
7138 int CurArgOffset = VA.getLocMemOffset();
7139 // Objects are right-justified because AIX is big-endian.
7140 if (LocSize > ValSize)
7141 CurArgOffset += LocSize - ValSize;
7142 // Potential tail calls could cause overwriting of argument stack slots.
7143 const bool IsImmutable =
7145 (CallConv == CallingConv::Fast));
7146 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7147 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7148 SDValue ArgValue =
7149 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7150 InVals.push_back(ArgValue);
7151 };
7152
7153 // Vector arguments to VaArg functions are passed both on the stack, and
7154 // in any available GPRs. Load the value from the stack and add the GPRs
7155 // as live ins.
7156 if (VA.isMemLoc() && VA.needsCustom()) {
7157 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7158 assert(isVarArg && "Only use custom memloc for vararg.");
7159 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7160 // matching custom RegLocs.
7161 const unsigned OriginalValNo = VA.getValNo();
7162 (void)OriginalValNo;
7163
7164 auto HandleCustomVecRegLoc = [&]() {
7165 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7166 "Missing custom RegLoc.");
7167 VA = ArgLocs[I++];
7168 assert(VA.getValVT().isVector() &&
7169 "Unexpected Val type for custom RegLoc.");
7170 assert(VA.getValNo() == OriginalValNo &&
7171 "ValNo mismatch between custom MemLoc and RegLoc.");
7173 MF.addLiveIn(VA.getLocReg(),
7174 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7175 Subtarget.hasVSX()));
7176 };
7177
7178 HandleMemLoc();
7179 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7180 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7181 // R10.
7182 HandleCustomVecRegLoc();
7183 HandleCustomVecRegLoc();
7184
7185 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7186 // we passed the vector in R5, R6, R7 and R8.
7187 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7188 assert(!IsPPC64 &&
7189 "Only 2 custom RegLocs expected for 64-bit codegen.");
7190 HandleCustomVecRegLoc();
7191 HandleCustomVecRegLoc();
7192 }
7193
7194 continue;
7195 }
7196
7197 if (VA.isRegLoc()) {
7198 if (VA.getValVT().isScalarInteger())
7200 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7201 switch (VA.getValVT().SimpleTy) {
7202 default:
7203 report_fatal_error("Unhandled value type for argument.");
7204 case MVT::f32:
7206 break;
7207 case MVT::f64:
7209 break;
7210 }
7211 } else if (VA.getValVT().isVector()) {
7212 switch (VA.getValVT().SimpleTy) {
7213 default:
7214 report_fatal_error("Unhandled value type for argument.");
7215 case MVT::v16i8:
7217 break;
7218 case MVT::v8i16:
7220 break;
7221 case MVT::v4i32:
7222 case MVT::v2i64:
7223 case MVT::v1i128:
7225 break;
7226 case MVT::v4f32:
7227 case MVT::v2f64:
7229 break;
7230 }
7231 }
7232 }
7233
7234 if (Flags.isByVal() && VA.isMemLoc()) {
7235 const unsigned Size =
7236 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7237 PtrByteSize);
7238 const int FI = MF.getFrameInfo().CreateFixedObject(
7239 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7240 /* IsAliased */ true);
7241 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7242 InVals.push_back(FIN);
7243
7244 continue;
7245 }
7246
7247 if (Flags.isByVal()) {
7248 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7249
7250 const MCPhysReg ArgReg = VA.getLocReg();
7251 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7252
7253 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7254 report_fatal_error("Over aligned byvals not supported yet.");
7255
7256 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7257 const int FI = MF.getFrameInfo().CreateFixedObject(
7258 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7259 /* IsAliased */ true);
7260 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7261 InVals.push_back(FIN);
7262
7263 // Add live ins for all the RegLocs for the same ByVal.
7264 const TargetRegisterClass *RegClass =
7265 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7266
7267 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7268 unsigned Offset) {
7269 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7270 // Since the callers side has left justified the aggregate in the
7271 // register, we can simply store the entire register into the stack
7272 // slot.
7273 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7274 // The store to the fixedstack object is needed becuase accessing a
7275 // field of the ByVal will use a gep and load. Ideally we will optimize
7276 // to extracting the value from the register directly, and elide the
7277 // stores when the arguments address is not taken, but that will need to
7278 // be future work.
7279 SDValue Store = DAG.getStore(
7280 CopyFrom.getValue(1), dl, CopyFrom,
7283
7284 MemOps.push_back(Store);
7285 };
7286
7287 unsigned Offset = 0;
7288 HandleRegLoc(VA.getLocReg(), Offset);
7289 Offset += PtrByteSize;
7290 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7291 Offset += PtrByteSize) {
7292 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7293 "RegLocs should be for ByVal argument.");
7294
7295 const CCValAssign RL = ArgLocs[I++];
7296 HandleRegLoc(RL.getLocReg(), Offset);
7298 }
7299
7300 if (Offset != StackSize) {
7301 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7302 "Expected MemLoc for remaining bytes.");
7303 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7304 // Consume the MemLoc.The InVal has already been emitted, so nothing
7305 // more needs to be done.
7306 ++I;
7307 }
7308
7309 continue;
7310 }
7311
7312 if (VA.isRegLoc() && !VA.needsCustom()) {
7313 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7314 Register VReg =
7315 MF.addLiveIn(VA.getLocReg(),
7316 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7317 Subtarget.hasVSX()));
7318 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7319 if (ValVT.isScalarInteger() &&
7320 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7321 ArgValue =
7322 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7323 }
7324 InVals.push_back(ArgValue);
7325 continue;
7326 }
7327 if (VA.isMemLoc()) {
7328 HandleMemLoc();
7329 continue;
7330 }
7331 }
7332
7333 // On AIX a minimum of 8 words is saved to the parameter save area.
7334 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7335 // Area that is at least reserved in the caller of this function.
7336 unsigned CallerReservedArea = std::max<unsigned>(
7337 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7338
7339 // Set the size that is at least reserved in caller of this function. Tail
7340 // call optimized function's reserved stack space needs to be aligned so
7341 // that taking the difference between two stack areas will result in an
7342 // aligned stack.
7343 CallerReservedArea =
7344 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7345 FuncInfo->setMinReservedArea(CallerReservedArea);
7346
7347 if (isVarArg) {
7348 FuncInfo->setVarArgsFrameIndex(
7349 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7350 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7351
7352 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7353 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7354
7355 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7356 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7357 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7358
7359 // The fixed integer arguments of a variadic function are stored to the
7360 // VarArgsFrameIndex on the stack so that they may be loaded by
7361 // dereferencing the result of va_next.
7362 for (unsigned GPRIndex =
7363 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7364 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7365
7366 const Register VReg =
7367 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7368 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7369
7370 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7371 SDValue Store =
7372 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7373 MemOps.push_back(Store);
7374 // Increment the address for the next argument to store.
7375 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7376 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7377 }
7378 }
7379
7380 if (!MemOps.empty())
7381 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7382
7383 return Chain;
7384}
7385
7386SDValue PPCTargetLowering::LowerCall_AIX(
7387 SDValue Chain, SDValue Callee, CallFlags CFlags,
7389 const SmallVectorImpl<SDValue> &OutVals,
7390 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7392 const CallBase *CB) const {
7393 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7394 // AIX ABI stack frame layout.
7395
7396 assert((CFlags.CallConv == CallingConv::C ||
7397 CFlags.CallConv == CallingConv::Cold ||
7398 CFlags.CallConv == CallingConv::Fast) &&
7399 "Unexpected calling convention!");
7400
7401 if (CFlags.IsPatchPoint)
7402 report_fatal_error("This call type is unimplemented on AIX.");
7403
7404 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7405
7408 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7409 *DAG.getContext());
7410
7411 // Reserve space for the linkage save area (LSA) on the stack.
7412 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7413 // [SP][CR][LR][2 x reserved][TOC].
7414 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7415 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7416 const bool IsPPC64 = Subtarget.isPPC64();
7417 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7418 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7419 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7420 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7421
7422 // The prolog code of the callee may store up to 8 GPR argument registers to
7423 // the stack, allowing va_start to index over them in memory if the callee
7424 // is variadic.
7425 // Because we cannot tell if this is needed on the caller side, we have to
7426 // conservatively assume that it is needed. As such, make sure we have at
7427 // least enough stack space for the caller to store the 8 GPRs.
7428 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7429 const unsigned NumBytes = std::max<unsigned>(
7430 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7431
7432 // Adjust the stack pointer for the new arguments...
7433 // These operations are automatically eliminated by the prolog/epilog pass.
7434 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7435 SDValue CallSeqStart = Chain;
7436
7438 SmallVector<SDValue, 8> MemOpChains;
7439
7440 // Set up a copy of the stack pointer for loading and storing any
7441 // arguments that may not fit in the registers available for argument
7442 // passing.
7443 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7444 : DAG.getRegister(PPC::R1, MVT::i32);
7445
7446 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7447 const unsigned ValNo = ArgLocs[I].getValNo();
7448 SDValue Arg = OutVals[ValNo];
7449 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7450
7451 if (Flags.isByVal()) {
7452 const unsigned ByValSize = Flags.getByValSize();
7453
7454 // Nothing to do for zero-sized ByVals on the caller side.
7455 if (!ByValSize) {
7456 ++I;
7457 continue;
7458 }
7459
7460 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7461 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7462 (LoadOffset != 0)
7463 ? DAG.getObjectPtrOffset(
7464 dl, Arg, TypeSize::getFixed(LoadOffset))
7465 : Arg,
7466 MachinePointerInfo(), VT);
7467 };
7468
7469 unsigned LoadOffset = 0;
7470
7471 // Initialize registers, which are fully occupied by the by-val argument.
7472 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7473 SDValue Load = GetLoad(PtrVT, LoadOffset);
7474 MemOpChains.push_back(Load.getValue(1));
7475 LoadOffset += PtrByteSize;
7476 const CCValAssign &ByValVA = ArgLocs[I++];
7477 assert(ByValVA.getValNo() == ValNo &&
7478 "Unexpected location for pass-by-value argument.");
7479 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7480 }
7481
7482 if (LoadOffset == ByValSize)
7483 continue;
7484
7485 // There must be one more loc to handle the remainder.
7486 assert(ArgLocs[I].getValNo() == ValNo &&
7487 "Expected additional location for by-value argument.");
7488
7489 if (ArgLocs[I].isMemLoc()) {
7490 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7491 const CCValAssign &ByValVA = ArgLocs[I++];
7492 ISD::ArgFlagsTy MemcpyFlags = Flags;
7493 // Only memcpy the bytes that don't pass in register.
7494 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7495 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7496 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7497 dl, Arg, TypeSize::getFixed(LoadOffset))
7498 : Arg,
7500 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7501 CallSeqStart, MemcpyFlags, DAG, dl);
7502 continue;
7503 }
7504
7505 // Initialize the final register residue.
7506 // Any residue that occupies the final by-val arg register must be
7507 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7508 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7509 // 2 and 1 byte loads.
7510 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7511 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7512 "Unexpected register residue for by-value argument.");
7513 SDValue ResidueVal;
7514 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7515 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7516 const MVT VT =
7517 N == 1 ? MVT::i8
7518 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7519 SDValue Load = GetLoad(VT, LoadOffset);
7520 MemOpChains.push_back(Load.getValue(1));
7521 LoadOffset += N;
7522 Bytes += N;
7523
7524 // By-val arguments are passed left-justfied in register.
7525 // Every load here needs to be shifted, otherwise a full register load
7526 // should have been used.
7527 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7528 "Unexpected load emitted during handling of pass-by-value "
7529 "argument.");
7530 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7531 EVT ShiftAmountTy =
7532 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7533 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7534 SDValue ShiftedLoad =
7535 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7536 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7537 ShiftedLoad)
7538 : ShiftedLoad;
7539 }
7540
7541 const CCValAssign &ByValVA = ArgLocs[I++];
7542 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7543 continue;
7544 }
7545
7546 CCValAssign &VA = ArgLocs[I++];
7547 const MVT LocVT = VA.getLocVT();
7548 const MVT ValVT = VA.getValVT();
7549
7550 switch (VA.getLocInfo()) {
7551 default:
7552 report_fatal_error("Unexpected argument extension type.");
7553 case CCValAssign::Full:
7554 break;
7555 case CCValAssign::ZExt:
7556 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7557 break;
7558 case CCValAssign::SExt:
7559 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7560 break;
7561 }
7562
7563 if (VA.isRegLoc() && !VA.needsCustom()) {
7564 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7565 continue;
7566 }
7567
7568 // Vector arguments passed to VarArg functions need custom handling when
7569 // they are passed (at least partially) in GPRs.
7570 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7571 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7572 // Store value to its stack slot.
7573 SDValue PtrOff =
7574 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7575 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7576 SDValue Store =
7577 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7578 MemOpChains.push_back(Store);
7579 const unsigned OriginalValNo = VA.getValNo();
7580 // Then load the GPRs from the stack
7581 unsigned LoadOffset = 0;
7582 auto HandleCustomVecRegLoc = [&]() {
7583 assert(I != E && "Unexpected end of CCvalAssigns.");
7584 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7585 "Expected custom RegLoc.");
7586 CCValAssign RegVA = ArgLocs[I++];
7587 assert(RegVA.getValNo() == OriginalValNo &&
7588 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7589 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7590 DAG.getConstant(LoadOffset, dl, PtrVT));
7591 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7592 MemOpChains.push_back(Load.getValue(1));
7593 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7594 LoadOffset += PtrByteSize;
7595 };
7596
7597 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7598 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7599 // R10.
7600 HandleCustomVecRegLoc();
7601 HandleCustomVecRegLoc();
7602
7603 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7604 ArgLocs[I].getValNo() == OriginalValNo) {
7605 assert(!IsPPC64 &&
7606 "Only 2 custom RegLocs expected for 64-bit codegen.");
7607 HandleCustomVecRegLoc();
7608 HandleCustomVecRegLoc();
7609 }
7610
7611 continue;
7612 }
7613
7614 if (VA.isMemLoc()) {
7615 SDValue PtrOff =
7616 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7617 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7618 MemOpChains.push_back(
7619 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7620
7621 continue;
7622 }
7623
7624 if (!ValVT.isFloatingPoint())
7626 "Unexpected register handling for calling convention.");
7627
7628 // Custom handling is used for GPR initializations for vararg float
7629 // arguments.
7630 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7631 LocVT.isInteger() &&
7632 "Custom register handling only expected for VarArg.");
7633
7634 SDValue ArgAsInt =
7635 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7636
7637 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7638 // f32 in 32-bit GPR
7639 // f64 in 64-bit GPR
7640 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7641 else if (Arg.getValueType().getFixedSizeInBits() <
7642 LocVT.getFixedSizeInBits())
7643 // f32 in 64-bit GPR.
7644 RegsToPass.push_back(std::make_pair(
7645 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7646 else {
7647 // f64 in two 32-bit GPRs
7648 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7649 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7650 "Unexpected custom register for argument!");
7651 CCValAssign &GPR1 = VA;
7652 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7653 DAG.getConstant(32, dl, MVT::i8));
7654 RegsToPass.push_back(std::make_pair(
7655 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7656
7657 if (I != E) {
7658 // If only 1 GPR was available, there will only be one custom GPR and
7659 // the argument will also pass in memory.
7660 CCValAssign &PeekArg = ArgLocs[I];
7661 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7662 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7663 CCValAssign &GPR2 = ArgLocs[I++];
7664 RegsToPass.push_back(std::make_pair(
7665 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7666 }
7667 }
7668 }
7669 }
7670
7671 if (!MemOpChains.empty())
7672 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7673
7674 // For indirect calls, we need to save the TOC base to the stack for
7675 // restoration after the call.
7676 if (CFlags.IsIndirect) {
7677 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7678 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7679 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7680 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7681 const unsigned TOCSaveOffset =
7682 Subtarget.getFrameLowering()->getTOCSaveOffset();
7683
7684 setUsesTOCBasePtr(DAG);
7685 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7686 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7687 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7688 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7689 Chain = DAG.getStore(
7690 Val.getValue(1), dl, Val, AddPtr,
7691 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7692 }
7693
7694 // Build a sequence of copy-to-reg nodes chained together with token chain
7695 // and flag operands which copy the outgoing args into the appropriate regs.
7696 SDValue InGlue;
7697 for (auto Reg : RegsToPass) {
7698 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7699 InGlue = Chain.getValue(1);
7700 }
7701
7702 const int SPDiff = 0;
7703 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7704 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7705}
7706
7707bool
7708PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7709 MachineFunction &MF, bool isVarArg,
7711 LLVMContext &Context) const {
7713 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7714 return CCInfo.CheckReturn(
7715 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7717 : RetCC_PPC);
7718}
7719
7720SDValue
7721PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7722 bool isVarArg,
7724 const SmallVectorImpl<SDValue> &OutVals,
7725 const SDLoc &dl, SelectionDAG &DAG) const {
7727 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7728 *DAG.getContext());
7729 CCInfo.AnalyzeReturn(Outs,
7730 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7732 : RetCC_PPC);
7733
7734 SDValue Glue;
7735 SmallVector<SDValue, 4> RetOps(1, Chain);
7736
7737 // Copy the result values into the output registers.
7738 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7739 CCValAssign &VA = RVLocs[i];
7740 assert(VA.isRegLoc() && "Can only return in registers!");
7741
7742 SDValue Arg = OutVals[RealResIdx];
7743
7744 switch (VA.getLocInfo()) {
7745 default: llvm_unreachable("Unknown loc info!");
7746 case CCValAssign::Full: break;
7747 case CCValAssign::AExt:
7748 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7749 break;
7750 case CCValAssign::ZExt:
7751 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7752 break;
7753 case CCValAssign::SExt:
7754 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7755 break;
7756 }
7757 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7758 bool isLittleEndian = Subtarget.isLittleEndian();
7759 // Legalize ret f64 -> ret 2 x i32.
7760 SDValue SVal =
7761 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7762 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7763 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7764 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7765 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7766 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7767 Glue = Chain.getValue(1);
7768 VA = RVLocs[++i]; // skip ahead to next loc
7769 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7770 } else
7771 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7772 Glue = Chain.getValue(1);
7773 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7774 }
7775
7776 RetOps[0] = Chain; // Update chain.
7777
7778 // Add the glue if we have it.
7779 if (Glue.getNode())
7780 RetOps.push_back(Glue);
7781
7782 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7783}
7784
7785SDValue
7786PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7787 SelectionDAG &DAG) const {
7788 SDLoc dl(Op);
7789
7790 // Get the correct type for integers.
7791 EVT IntVT = Op.getValueType();
7792
7793 // Get the inputs.
7794 SDValue Chain = Op.getOperand(0);
7795 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7796 // Build a DYNAREAOFFSET node.
7797 SDValue Ops[2] = {Chain, FPSIdx};
7798 SDVTList VTs = DAG.getVTList(IntVT);
7799 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7800}
7801
7802SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7803 SelectionDAG &DAG) const {
7804 // When we pop the dynamic allocation we need to restore the SP link.
7805 SDLoc dl(Op);
7806
7807 // Get the correct type for pointers.
7808 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7809
7810 // Construct the stack pointer operand.
7811 bool isPPC64 = Subtarget.isPPC64();
7812 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7813 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7814
7815 // Get the operands for the STACKRESTORE.
7816 SDValue Chain = Op.getOperand(0);
7817 SDValue SaveSP = Op.getOperand(1);
7818
7819 // Load the old link SP.
7820 SDValue LoadLinkSP =
7821 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7822
7823 // Restore the stack pointer.
7824 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7825
7826 // Store the old link SP.
7827 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7828}
7829
7830SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7832 bool isPPC64 = Subtarget.isPPC64();
7833 EVT PtrVT = getPointerTy(MF.getDataLayout());
7834
7835 // Get current frame pointer save index. The users of this index will be
7836 // primarily DYNALLOC instructions.
7838 int RASI = FI->getReturnAddrSaveIndex();
7839
7840 // If the frame pointer save index hasn't been defined yet.
7841 if (!RASI) {
7842 // Find out what the fix offset of the frame pointer save area.
7843 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7844 // Allocate the frame index for frame pointer save area.
7845 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7846 // Save the result.
7847 FI->setReturnAddrSaveIndex(RASI);
7848 }
7849 return DAG.getFrameIndex(RASI, PtrVT);
7850}
7851
7852SDValue
7853PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7855 bool isPPC64 = Subtarget.isPPC64();
7856 EVT PtrVT = getPointerTy(MF.getDataLayout());
7857
7858 // Get current frame pointer save index. The users of this index will be
7859 // primarily DYNALLOC instructions.
7861 int FPSI = FI->getFramePointerSaveIndex();
7862
7863 // If the frame pointer save index hasn't been defined yet.
7864 if (!FPSI) {
7865 // Find out what the fix offset of the frame pointer save area.
7866 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7867 // Allocate the frame index for frame pointer save area.
7868 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7869 // Save the result.
7870 FI->setFramePointerSaveIndex(FPSI);
7871 }
7872 return DAG.getFrameIndex(FPSI, PtrVT);
7873}
7874
7875SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7876 SelectionDAG &DAG) const {
7878 // Get the inputs.
7879 SDValue Chain = Op.getOperand(0);
7880 SDValue Size = Op.getOperand(1);
7881 SDLoc dl(Op);
7882
7883 // Get the correct type for pointers.
7884 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7885 // Negate the size.
7886 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7887 DAG.getConstant(0, dl, PtrVT), Size);
7888 // Construct a node for the frame pointer save index.
7889 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7890 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7891 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7892 if (hasInlineStackProbe(MF))
7893 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7894 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7895}
7896
7897SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7898 SelectionDAG &DAG) const {
7900
7901 bool isPPC64 = Subtarget.isPPC64();
7902 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7903
7904 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7905 return DAG.getFrameIndex(FI, PtrVT);
7906}
7907
7908SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7909 SelectionDAG &DAG) const {
7910 SDLoc DL(Op);
7911 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7912 DAG.getVTList(MVT::i32, MVT::Other),
7913 Op.getOperand(0), Op.getOperand(1));
7914}
7915
7916SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7917 SelectionDAG &DAG) const {
7918 SDLoc DL(Op);
7919 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7920 Op.getOperand(0), Op.getOperand(1));
7921}
7922
7923SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7924 if (Op.getValueType().isVector())
7925 return LowerVectorLoad(Op, DAG);
7926
7927 assert(Op.getValueType() == MVT::i1 &&
7928 "Custom lowering only for i1 loads");
7929
7930 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7931
7932 SDLoc dl(Op);
7933 LoadSDNode *LD = cast<LoadSDNode>(Op);
7934
7935 SDValue Chain = LD->getChain();
7936 SDValue BasePtr = LD->getBasePtr();
7937 MachineMemOperand *MMO = LD->getMemOperand();
7938
7939 SDValue NewLD =
7940 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7941 BasePtr, MVT::i8, MMO);
7942 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7943
7944 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7945 return DAG.getMergeValues(Ops, dl);
7946}
7947
7948SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7949 if (Op.getOperand(1).getValueType().isVector())
7950 return LowerVectorStore(Op, DAG);
7951
7952 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7953 "Custom lowering only for i1 stores");
7954
7955 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7956
7957 SDLoc dl(Op);
7958 StoreSDNode *ST = cast<StoreSDNode>(Op);
7959
7960 SDValue Chain = ST->getChain();
7961 SDValue BasePtr = ST->getBasePtr();
7962 SDValue Value = ST->getValue();
7963 MachineMemOperand *MMO = ST->getMemOperand();
7964
7966 Value);
7967 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7968}
7969
7970// FIXME: Remove this once the ANDI glue bug is fixed:
7971SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7972 assert(Op.getValueType() == MVT::i1 &&
7973 "Custom lowering only for i1 results");
7974
7975 SDLoc DL(Op);
7976 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7977}
7978
7979SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7980 SelectionDAG &DAG) const {
7981
7982 // Implements a vector truncate that fits in a vector register as a shuffle.
7983 // We want to legalize vector truncates down to where the source fits in
7984 // a vector register (and target is therefore smaller than vector register
7985 // size). At that point legalization will try to custom lower the sub-legal
7986 // result and get here - where we can contain the truncate as a single target
7987 // operation.
7988
7989 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7990 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7991 //
7992 // We will implement it for big-endian ordering as this (where x denotes
7993 // undefined):
7994 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7995 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7996 //
7997 // The same operation in little-endian ordering will be:
7998 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7999 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8000
8001 EVT TrgVT = Op.getValueType();
8002 assert(TrgVT.isVector() && "Vector type expected.");
8003 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8004 EVT EltVT = TrgVT.getVectorElementType();
8005 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8006 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8007 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8008 return SDValue();
8009
8010 SDValue N1 = Op.getOperand(0);
8011 EVT SrcVT = N1.getValueType();
8012 unsigned SrcSize = SrcVT.getSizeInBits();
8013 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8014 !llvm::has_single_bit<uint32_t>(
8016 return SDValue();
8017 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8018 return SDValue();
8019
8020 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8021 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8022
8023 SDLoc DL(Op);
8024 SDValue Op1, Op2;
8025 if (SrcSize == 256) {
8026 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8027 EVT SplitVT =
8029 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8030 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8031 DAG.getConstant(0, DL, VecIdxTy));
8032 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8033 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8034 }
8035 else {
8036 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8037 Op2 = DAG.getUNDEF(WideVT);
8038 }
8039
8040 // First list the elements we want to keep.
8041 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8042 SmallVector<int, 16> ShuffV;
8043 if (Subtarget.isLittleEndian())
8044 for (unsigned i = 0; i < TrgNumElts; ++i)
8045 ShuffV.push_back(i * SizeMult);
8046 else
8047 for (unsigned i = 1; i <= TrgNumElts; ++i)
8048 ShuffV.push_back(i * SizeMult - 1);
8049
8050 // Populate the remaining elements with undefs.
8051 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8052 // ShuffV.push_back(i + WideNumElts);
8053 ShuffV.push_back(WideNumElts + 1);
8054
8055 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8056 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8057 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8058}
8059
8060/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8061/// possible.
8062SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8063 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8064 EVT ResVT = Op.getValueType();
8065 EVT CmpVT = Op.getOperand(0).getValueType();
8066 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8067 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8068 SDLoc dl(Op);
8069
8070 // Without power9-vector, we don't have native instruction for f128 comparison.
8071 // Following transformation to libcall is needed for setcc:
8072 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8073 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8074 SDValue Z = DAG.getSetCC(
8075 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8076 LHS, RHS, CC);
8077 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8078 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8079 }
8080
8081 // Not FP, or using SPE? Not a fsel.
8082 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8083 Subtarget.hasSPE())
8084 return Op;
8085
8086 SDNodeFlags Flags = Op.getNode()->getFlags();
8087
8088 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8089 // presence of infinities.
8090 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8091 switch (CC) {
8092 default:
8093 break;
8094 case ISD::SETOGT:
8095 case ISD::SETGT:
8096 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8097 case ISD::SETOLT:
8098 case ISD::SETLT:
8099 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8100 }
8101 }
8102
8103 // We might be able to do better than this under some circumstances, but in
8104 // general, fsel-based lowering of select is a finite-math-only optimization.
8105 // For more information, see section F.3 of the 2.06 ISA specification.
8106 // With ISA 3.0
8107 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8108 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8109 ResVT == MVT::f128)
8110 return Op;
8111
8112 // If the RHS of the comparison is a 0.0, we don't need to do the
8113 // subtraction at all.
8114 SDValue Sel1;
8115 if (isFloatingPointZero(RHS))
8116 switch (CC) {
8117 default: break; // SETUO etc aren't handled by fsel.
8118 case ISD::SETNE:
8119 std::swap(TV, FV);
8120 [[fallthrough]];
8121 case ISD::SETEQ:
8122 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8123 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8124 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8125 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8126 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8127 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8128 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8129 case ISD::SETULT:
8130 case ISD::SETLT:
8131 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8132 [[fallthrough]];
8133 case ISD::SETOGE:
8134 case ISD::SETGE:
8135 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8136 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8137 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8138 case ISD::SETUGT:
8139 case ISD::SETGT:
8140 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8141 [[fallthrough]];
8142 case ISD::SETOLE:
8143 case ISD::SETLE:
8144 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8145 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8146 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8147 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8148 }
8149
8150 SDValue Cmp;
8151 switch (CC) {
8152 default: break; // SETUO etc aren't handled by fsel.
8153 case ISD::SETNE:
8154 std::swap(TV, FV);
8155 [[fallthrough]];
8156 case ISD::SETEQ:
8157 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8158 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8159 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8160 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8161 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8162 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8163 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8164 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8165 case ISD::SETULT:
8166 case ISD::SETLT:
8167 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8168 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8169 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8170 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8171 case ISD::SETOGE:
8172 case ISD::SETGE:
8173 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8174 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8175 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8176 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8177 case ISD::SETUGT:
8178 case ISD::SETGT:
8179 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8180 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8181 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8182 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8183 case ISD::SETOLE:
8184 case ISD::SETLE:
8185 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8186 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8187 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8188 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8189 }
8190 return Op;
8191}
8192
8193static unsigned getPPCStrictOpcode(unsigned Opc) {
8194 switch (Opc) {
8195 default:
8196 llvm_unreachable("No strict version of this opcode!");
8197 case PPCISD::FCTIDZ:
8198 return PPCISD::STRICT_FCTIDZ;
8199 case PPCISD::FCTIWZ:
8200 return PPCISD::STRICT_FCTIWZ;
8201 case PPCISD::FCTIDUZ:
8203 case PPCISD::FCTIWUZ:
8205 case PPCISD::FCFID:
8206 return PPCISD::STRICT_FCFID;
8207 case PPCISD::FCFIDU:
8208 return PPCISD::STRICT_FCFIDU;
8209 case PPCISD::FCFIDS:
8210 return PPCISD::STRICT_FCFIDS;
8211 case PPCISD::FCFIDUS:
8213 }
8214}
8215
8217 const PPCSubtarget &Subtarget) {
8218 SDLoc dl(Op);
8219 bool IsStrict = Op->isStrictFPOpcode();
8220 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8221 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8222
8223 // TODO: Any other flags to propagate?
8224 SDNodeFlags Flags;
8225 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8226
8227 // For strict nodes, source is the second operand.
8228 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8229 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8230 MVT DestTy = Op.getSimpleValueType();
8231 assert(Src.getValueType().isFloatingPoint() &&
8232 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8233 DestTy == MVT::i64) &&
8234 "Invalid FP_TO_INT types");
8235 if (Src.getValueType() == MVT::f32) {
8236 if (IsStrict) {
8237 Src =
8239 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8240 Chain = Src.getValue(1);
8241 } else
8242 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8243 }
8244 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8245 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8246 unsigned Opc = ISD::DELETED_NODE;
8247 switch (DestTy.SimpleTy) {
8248 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8249 case MVT::i32:
8250 Opc = IsSigned ? PPCISD::FCTIWZ
8251 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8252 break;
8253 case MVT::i64:
8254 assert((IsSigned || Subtarget.hasFPCVT()) &&
8255 "i64 FP_TO_UINT is supported only with FPCVT");
8256 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8257 }
8258 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8259 SDValue Conv;
8260 if (IsStrict) {
8261 Opc = getPPCStrictOpcode(Opc);
8262 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8263 Flags);
8264 } else {
8265 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8266 }
8267 return Conv;
8268}
8269
8270void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8271 SelectionDAG &DAG,
8272 const SDLoc &dl) const {
8273 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8274 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8275 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8276 bool IsStrict = Op->isStrictFPOpcode();
8277
8278 // Convert the FP value to an int value through memory.
8279 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8280 (IsSigned || Subtarget.hasFPCVT());
8281 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8282 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8283 MachinePointerInfo MPI =
8285
8286 // Emit a store to the stack slot.
8287 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8288 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8289 if (i32Stack) {
8291 Alignment = Align(4);
8292 MachineMemOperand *MMO =
8293 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8294 SDValue Ops[] = { Chain, Tmp, FIPtr };
8295 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8296 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8297 } else
8298 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8299
8300 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8301 // add in a bias on big endian.
8302 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8303 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8304 DAG.getConstant(4, dl, FIPtr.getValueType()));
8305 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8306 }
8307
8308 RLI.Chain = Chain;
8309 RLI.Ptr = FIPtr;
8310 RLI.MPI = MPI;
8311 RLI.Alignment = Alignment;
8312}
8313
8314/// Custom lowers floating point to integer conversions to use
8315/// the direct move instructions available in ISA 2.07 to avoid the
8316/// need for load/store combinations.
8317SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8318 SelectionDAG &DAG,
8319 const SDLoc &dl) const {
8320 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8321 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8322 if (Op->isStrictFPOpcode())
8323 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8324 else
8325 return Mov;
8326}
8327
8328SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8329 const SDLoc &dl) const {
8330 bool IsStrict = Op->isStrictFPOpcode();
8331 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8332 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8333 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8334 EVT SrcVT = Src.getValueType();
8335 EVT DstVT = Op.getValueType();
8336
8337 // FP to INT conversions are legal for f128.
8338 if (SrcVT == MVT::f128)
8339 return Subtarget.hasP9Vector() ? Op : SDValue();
8340
8341 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8342 // PPC (the libcall is not available).
8343 if (SrcVT == MVT::ppcf128) {
8344 if (DstVT == MVT::i32) {
8345 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8346 // set other fast-math flags to FP operations in both strict and
8347 // non-strict cases. (FP_TO_SINT, FSUB)
8349 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8350
8351 if (IsSigned) {
8352 SDValue Lo, Hi;
8353 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8354
8355 // Add the two halves of the long double in round-to-zero mode, and use
8356 // a smaller FP_TO_SINT.
8357 if (IsStrict) {
8359 DAG.getVTList(MVT::f64, MVT::Other),
8360 {Op.getOperand(0), Lo, Hi}, Flags);
8361 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8362 DAG.getVTList(MVT::i32, MVT::Other),
8363 {Res.getValue(1), Res}, Flags);
8364 } else {
8365 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8366 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8367 }
8368 } else {
8369 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8370 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8371 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8372 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8373 if (IsStrict) {
8374 // Sel = Src < 0x80000000
8375 // FltOfs = select Sel, 0.0, 0x80000000
8376 // IntOfs = select Sel, 0, 0x80000000
8377 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8378 SDValue Chain = Op.getOperand(0);
8379 EVT SetCCVT =
8380 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8381 EVT DstSetCCVT =
8382 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8383 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8384 Chain, true);
8385 Chain = Sel.getValue(1);
8386
8387 SDValue FltOfs = DAG.getSelect(
8388 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8389 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8390
8391 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8392 DAG.getVTList(SrcVT, MVT::Other),
8393 {Chain, Src, FltOfs}, Flags);
8394 Chain = Val.getValue(1);
8395 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8396 DAG.getVTList(DstVT, MVT::Other),
8397 {Chain, Val}, Flags);
8398 Chain = SInt.getValue(1);
8399 SDValue IntOfs = DAG.getSelect(
8400 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8401 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8402 return DAG.getMergeValues({Result, Chain}, dl);
8403 } else {
8404 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8405 // FIXME: generated code sucks.
8406 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8407 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8408 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8409 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8410 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8411 }
8412 }
8413 }
8414
8415 return SDValue();
8416 }
8417
8418 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8419 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8420
8421 ReuseLoadInfo RLI;
8422 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8423
8424 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8425 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8426}
8427
8428// We're trying to insert a regular store, S, and then a load, L. If the
8429// incoming value, O, is a load, we might just be able to have our load use the
8430// address used by O. However, we don't know if anything else will store to
8431// that address before we can load from it. To prevent this situation, we need
8432// to insert our load, L, into the chain as a peer of O. To do this, we give L
8433// the same chain operand as O, we create a token factor from the chain results
8434// of O and L, and we replace all uses of O's chain result with that token
8435// factor (see spliceIntoChain below for this last part).
8436bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8437 ReuseLoadInfo &RLI,
8438 SelectionDAG &DAG,
8439 ISD::LoadExtType ET) const {
8440 // Conservatively skip reusing for constrained FP nodes.
8441 if (Op->isStrictFPOpcode())
8442 return false;
8443
8444 SDLoc dl(Op);
8445 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8446 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8447 if (ET == ISD::NON_EXTLOAD &&
8448 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8449 isOperationLegalOrCustom(Op.getOpcode(),
8450 Op.getOperand(0).getValueType())) {
8451
8452 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8453 return true;
8454 }
8455
8456 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8457 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8458 LD->isNonTemporal())
8459 return false;
8460 if (LD->getMemoryVT() != MemVT)
8461 return false;
8462
8463 // If the result of the load is an illegal type, then we can't build a
8464 // valid chain for reuse since the legalised loads and token factor node that
8465 // ties the legalised loads together uses a different output chain then the
8466 // illegal load.
8467 if (!isTypeLegal(LD->getValueType(0)))
8468 return false;
8469
8470 RLI.Ptr = LD->getBasePtr();
8471 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8472 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8473 "Non-pre-inc AM on PPC?");
8474 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8475 LD->getOffset());
8476 }
8477
8478 RLI.Chain = LD->getChain();
8479 RLI.MPI = LD->getPointerInfo();
8480 RLI.IsDereferenceable = LD->isDereferenceable();
8481 RLI.IsInvariant = LD->isInvariant();
8482 RLI.Alignment = LD->getAlign();
8483 RLI.AAInfo = LD->getAAInfo();
8484 RLI.Ranges = LD->getRanges();
8485
8486 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8487 return true;
8488}
8489
8490// Given the head of the old chain, ResChain, insert a token factor containing
8491// it and NewResChain, and make users of ResChain now be users of that token
8492// factor.
8493// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8494void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8495 SDValue NewResChain,
8496 SelectionDAG &DAG) const {
8497 if (!ResChain)
8498 return;
8499
8500 SDLoc dl(NewResChain);
8501
8502 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8503 NewResChain, DAG.getUNDEF(MVT::Other));
8504 assert(TF.getNode() != NewResChain.getNode() &&
8505 "A new TF really is required here");
8506
8507 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8508 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8509}
8510
8511/// Analyze profitability of direct move
8512/// prefer float load to int load plus direct move
8513/// when there is no integer use of int load
8514bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8515 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8516 if (Origin->getOpcode() != ISD::LOAD)
8517 return true;
8518
8519 // If there is no LXSIBZX/LXSIHZX, like Power8,
8520 // prefer direct move if the memory size is 1 or 2 bytes.
8521 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8522 if (!Subtarget.hasP9Vector() &&
8523 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8524 return true;
8525
8526 for (SDNode::use_iterator UI = Origin->use_begin(),
8527 UE = Origin->use_end();
8528 UI != UE; ++UI) {
8529
8530 // Only look at the users of the loaded value.
8531 if (UI.getUse().get().getResNo() != 0)
8532 continue;
8533
8534 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8535 UI->getOpcode() != ISD::UINT_TO_FP &&
8536 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8537 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8538 return true;
8539 }
8540
8541 return false;
8542}
8543
8545 const PPCSubtarget &Subtarget,
8546 SDValue Chain = SDValue()) {
8547 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8548 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8549 SDLoc dl(Op);
8550
8551 // TODO: Any other flags to propagate?
8552 SDNodeFlags Flags;
8553 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8554
8555 // If we have FCFIDS, then use it when converting to single-precision.
8556 // Otherwise, convert to double-precision and then round.
8557 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8558 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8559 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8560 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8561 if (Op->isStrictFPOpcode()) {
8562 if (!Chain)
8563 Chain = Op.getOperand(0);
8564 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8565 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8566 } else
8567 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8568}
8569
8570/// Custom lowers integer to floating point conversions to use
8571/// the direct move instructions available in ISA 2.07 to avoid the
8572/// need for load/store combinations.
8573SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8574 SelectionDAG &DAG,
8575 const SDLoc &dl) const {
8576 assert((Op.getValueType() == MVT::f32 ||
8577 Op.getValueType() == MVT::f64) &&
8578 "Invalid floating point type as target of conversion");
8579 assert(Subtarget.hasFPCVT() &&
8580 "Int to FP conversions with direct moves require FPCVT");
8581 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8582 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8583 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8584 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8585 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8586 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8587 return convertIntToFP(Op, Mov, DAG, Subtarget);
8588}
8589
8590static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8591
8592 EVT VecVT = Vec.getValueType();
8593 assert(VecVT.isVector() && "Expected a vector type.");
8594 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8595
8596 EVT EltVT = VecVT.getVectorElementType();
8597 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8598 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8599
8600 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8601 SmallVector<SDValue, 16> Ops(NumConcat);
8602 Ops[0] = Vec;
8603 SDValue UndefVec = DAG.getUNDEF(VecVT);
8604 for (unsigned i = 1; i < NumConcat; ++i)
8605 Ops[i] = UndefVec;
8606
8607 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8608}
8609
8610SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8611 const SDLoc &dl) const {
8612 bool IsStrict = Op->isStrictFPOpcode();
8613 unsigned Opc = Op.getOpcode();
8614 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8615 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8617 "Unexpected conversion type");
8618 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8619 "Supports conversions to v2f64/v4f32 only.");
8620
8621 // TODO: Any other flags to propagate?
8623 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8624
8625 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8626 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8627
8628 SDValue Wide = widenVec(DAG, Src, dl);
8629 EVT WideVT = Wide.getValueType();
8630 unsigned WideNumElts = WideVT.getVectorNumElements();
8631 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8632
8633 SmallVector<int, 16> ShuffV;
8634 for (unsigned i = 0; i < WideNumElts; ++i)
8635 ShuffV.push_back(i + WideNumElts);
8636
8637 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8638 int SaveElts = FourEltRes ? 4 : 2;
8639 if (Subtarget.isLittleEndian())
8640 for (int i = 0; i < SaveElts; i++)
8641 ShuffV[i * Stride] = i;
8642 else
8643 for (int i = 1; i <= SaveElts; i++)
8644 ShuffV[i * Stride - 1] = i - 1;
8645
8646 SDValue ShuffleSrc2 =
8647 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8648 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8649
8650 SDValue Extend;
8651 if (SignedConv) {
8652 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8653 EVT ExtVT = Src.getValueType();
8654 if (Subtarget.hasP9Altivec())
8655 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8656 IntermediateVT.getVectorNumElements());
8657
8658 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8659 DAG.getValueType(ExtVT));
8660 } else
8661 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8662
8663 if (IsStrict)
8664 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8665 {Op.getOperand(0), Extend}, Flags);
8666
8667 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8668}
8669
8670SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8671 SelectionDAG &DAG) const {
8672 SDLoc dl(Op);
8673 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8674 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8675 bool IsStrict = Op->isStrictFPOpcode();
8676 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8677 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8678
8679 // TODO: Any other flags to propagate?
8681 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8682
8683 EVT InVT = Src.getValueType();
8684 EVT OutVT = Op.getValueType();
8685 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8686 isOperationCustom(Op.getOpcode(), InVT))
8687 return LowerINT_TO_FPVector(Op, DAG, dl);
8688
8689 // Conversions to f128 are legal.
8690 if (Op.getValueType() == MVT::f128)
8691 return Subtarget.hasP9Vector() ? Op : SDValue();
8692
8693 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8694 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8695 return SDValue();
8696
8697 if (Src.getValueType() == MVT::i1) {
8698 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8699 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8700 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8701 if (IsStrict)
8702 return DAG.getMergeValues({Sel, Chain}, dl);
8703 else
8704 return Sel;
8705 }
8706
8707 // If we have direct moves, we can do all the conversion, skip the store/load
8708 // however, without FPCVT we can't do most conversions.
8709 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8710 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8711 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8712
8713 assert((IsSigned || Subtarget.hasFPCVT()) &&
8714 "UINT_TO_FP is supported only with FPCVT");
8715
8716 if (Src.getValueType() == MVT::i64) {
8717 SDValue SINT = Src;
8718 // When converting to single-precision, we actually need to convert
8719 // to double-precision first and then round to single-precision.
8720 // To avoid double-rounding effects during that operation, we have
8721 // to prepare the input operand. Bits that might be truncated when
8722 // converting to double-precision are replaced by a bit that won't
8723 // be lost at this stage, but is below the single-precision rounding
8724 // position.
8725 //
8726 // However, if -enable-unsafe-fp-math is in effect, accept double
8727 // rounding to avoid the extra overhead.
8728 if (Op.getValueType() == MVT::f32 &&
8729 !Subtarget.hasFPCVT() &&
8731
8732 // Twiddle input to make sure the low 11 bits are zero. (If this
8733 // is the case, we are guaranteed the value will fit into the 53 bit
8734 // mantissa of an IEEE double-precision value without rounding.)
8735 // If any of those low 11 bits were not zero originally, make sure
8736 // bit 12 (value 2048) is set instead, so that the final rounding
8737 // to single-precision gets the correct result.
8738 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8739 SINT, DAG.getConstant(2047, dl, MVT::i64));
8740 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8741 Round, DAG.getConstant(2047, dl, MVT::i64));
8742 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8743 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8744 Round, DAG.getConstant(-2048, dl, MVT::i64));
8745
8746 // However, we cannot use that value unconditionally: if the magnitude
8747 // of the input value is small, the bit-twiddling we did above might
8748 // end up visibly changing the output. Fortunately, in that case, we
8749 // don't need to twiddle bits since the original input will convert
8750 // exactly to double-precision floating-point already. Therefore,
8751 // construct a conditional to use the original value if the top 11
8752 // bits are all sign-bit copies, and use the rounded value computed
8753 // above otherwise.
8754 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8755 SINT, DAG.getConstant(53, dl, MVT::i32));
8756 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8757 Cond, DAG.getConstant(1, dl, MVT::i64));
8758 Cond = DAG.getSetCC(
8759 dl,
8760 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8761 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8762
8763 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8764 }
8765
8766 ReuseLoadInfo RLI;
8767 SDValue Bits;
8768
8770 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8771 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8772 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8773 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8774 } else if (Subtarget.hasLFIWAX() &&
8775 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8776 MachineMemOperand *MMO =
8778 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8779 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8781 DAG.getVTList(MVT::f64, MVT::Other),
8782 Ops, MVT::i32, MMO);
8783 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8784 } else if (Subtarget.hasFPCVT() &&
8785 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8786 MachineMemOperand *MMO =
8788 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8789 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8791 DAG.getVTList(MVT::f64, MVT::Other),
8792 Ops, MVT::i32, MMO);
8793 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8794 } else if (((Subtarget.hasLFIWAX() &&
8795 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8796 (Subtarget.hasFPCVT() &&
8797 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8798 SINT.getOperand(0).getValueType() == MVT::i32) {
8799 MachineFrameInfo &MFI = MF.getFrameInfo();
8800 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8801
8802 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8803 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8804
8805 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8807 DAG.getMachineFunction(), FrameIdx));
8808 Chain = Store;
8809
8810 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8811 "Expected an i32 store");
8812
8813 RLI.Ptr = FIdx;
8814 RLI.Chain = Chain;
8815 RLI.MPI =
8817 RLI.Alignment = Align(4);
8818
8819 MachineMemOperand *MMO =
8821 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8822 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8825 dl, DAG.getVTList(MVT::f64, MVT::Other),
8826 Ops, MVT::i32, MMO);
8827 Chain = Bits.getValue(1);
8828 } else
8829 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8830
8831 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8832 if (IsStrict)
8833 Chain = FP.getValue(1);
8834
8835 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8836 if (IsStrict)
8838 DAG.getVTList(MVT::f32, MVT::Other),
8839 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8840 else
8841 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8842 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8843 }
8844 return FP;
8845 }
8846
8847 assert(Src.getValueType() == MVT::i32 &&
8848 "Unhandled INT_TO_FP type in custom expander!");
8849 // Since we only generate this in 64-bit mode, we can take advantage of
8850 // 64-bit registers. In particular, sign extend the input value into the
8851 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8852 // then lfd it and fcfid it.
8854 MachineFrameInfo &MFI = MF.getFrameInfo();
8855 EVT PtrVT = getPointerTy(MF.getDataLayout());
8856
8857 SDValue Ld;
8858 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8859 ReuseLoadInfo RLI;
8860 bool ReusingLoad;
8861 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8862 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8863 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8864
8865 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8867 DAG.getMachineFunction(), FrameIdx));
8868 Chain = Store;
8869
8870 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8871 "Expected an i32 store");
8872
8873 RLI.Ptr = FIdx;
8874 RLI.Chain = Chain;
8875 RLI.MPI =
8877 RLI.Alignment = Align(4);
8878 }
8879
8880 MachineMemOperand *MMO =
8882 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8883 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8884 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8885 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8886 MVT::i32, MMO);
8887 Chain = Ld.getValue(1);
8888 if (ReusingLoad)
8889 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8890 } else {
8891 assert(Subtarget.isPPC64() &&
8892 "i32->FP without LFIWAX supported only on PPC64");
8893
8894 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8895 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8896
8897 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8898
8899 // STD the extended value into the stack slot.
8900 SDValue Store = DAG.getStore(
8901 Chain, dl, Ext64, FIdx,
8903 Chain = Store;
8904
8905 // Load the value as a double.
8906 Ld = DAG.getLoad(
8907 MVT::f64, dl, Chain, FIdx,
8909 Chain = Ld.getValue(1);
8910 }
8911
8912 // FCFID it and return it.
8913 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8914 if (IsStrict)
8915 Chain = FP.getValue(1);
8916 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8917 if (IsStrict)
8919 DAG.getVTList(MVT::f32, MVT::Other),
8920 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8921 else
8922 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8923 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8924 }
8925 return FP;
8926}
8927
8928SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
8929 SelectionDAG &DAG) const {
8930 SDLoc dl(Op);
8931 /*
8932 The rounding mode is in bits 30:31 of FPSR, and has the following
8933 settings:
8934 00 Round to nearest
8935 01 Round to 0
8936 10 Round to +inf
8937 11 Round to -inf
8938
8939 GET_ROUNDING, on the other hand, expects the following:
8940 -1 Undefined
8941 0 Round to 0
8942 1 Round to nearest
8943 2 Round to +inf
8944 3 Round to -inf
8945
8946 To perform the conversion, we do:
8947 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8948 */
8949
8951 EVT VT = Op.getValueType();
8952 EVT PtrVT = getPointerTy(MF.getDataLayout());
8953
8954 // Save FP Control Word to register
8955 SDValue Chain = Op.getOperand(0);
8956 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8957 Chain = MFFS.getValue(1);
8958
8959 SDValue CWD;
8960 if (isTypeLegal(MVT::i64)) {
8961 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8962 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8963 } else {
8964 // Save FP register to stack slot
8965 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8966 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8967 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8968
8969 // Load FP Control Word from low 32 bits of stack slot.
8971 "Stack slot adjustment is valid only on big endian subtargets!");
8972 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8973 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8974 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8975 Chain = CWD.getValue(1);
8976 }
8977
8978 // Transform as necessary
8979 SDValue CWD1 =
8980 DAG.getNode(ISD::AND, dl, MVT::i32,
8981 CWD, DAG.getConstant(3, dl, MVT::i32));
8982 SDValue CWD2 =
8983 DAG.getNode(ISD::SRL, dl, MVT::i32,
8984 DAG.getNode(ISD::AND, dl, MVT::i32,
8985 DAG.getNode(ISD::XOR, dl, MVT::i32,
8986 CWD, DAG.getConstant(3, dl, MVT::i32)),
8987 DAG.getConstant(3, dl, MVT::i32)),
8988 DAG.getConstant(1, dl, MVT::i32));
8989
8990 SDValue RetVal =
8991 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8992
8993 RetVal =
8995 dl, VT, RetVal);
8996
8997 return DAG.getMergeValues({RetVal, Chain}, dl);
8998}
8999
9000SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9001 EVT VT = Op.getValueType();
9002 unsigned BitWidth = VT.getSizeInBits();
9003 SDLoc dl(Op);
9004 assert(Op.getNumOperands() == 3 &&
9005 VT == Op.getOperand(1).getValueType() &&
9006 "Unexpected SHL!");
9007
9008 // Expand into a bunch of logical ops. Note that these ops
9009 // depend on the PPC behavior for oversized shift amounts.
9010 SDValue Lo = Op.getOperand(0);
9011 SDValue Hi = Op.getOperand(1);
9012 SDValue Amt = Op.getOperand(2);
9013 EVT AmtVT = Amt.getValueType();
9014
9015 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9016 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9017 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9018 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9019 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9020 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9021 DAG.getConstant(-BitWidth, dl, AmtVT));
9022 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9023 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9024 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9025 SDValue OutOps[] = { OutLo, OutHi };
9026 return DAG.getMergeValues(OutOps, dl);
9027}
9028
9029SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9030 EVT VT = Op.getValueType();
9031 SDLoc dl(Op);
9032 unsigned BitWidth = VT.getSizeInBits();
9033 assert(Op.getNumOperands() == 3 &&
9034 VT == Op.getOperand(1).getValueType() &&
9035 "Unexpected SRL!");
9036
9037 // Expand into a bunch of logical ops. Note that these ops
9038 // depend on the PPC behavior for oversized shift amounts.
9039 SDValue Lo = Op.getOperand(0);
9040 SDValue Hi = Op.getOperand(1);
9041 SDValue Amt = Op.getOperand(2);
9042 EVT AmtVT = Amt.getValueType();
9043
9044 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9045 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9046 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9047 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9048 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9049 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9050 DAG.getConstant(-BitWidth, dl, AmtVT));
9051 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9052 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9053 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9054 SDValue OutOps[] = { OutLo, OutHi };
9055 return DAG.getMergeValues(OutOps, dl);
9056}
9057
9058SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9059 SDLoc dl(Op);
9060 EVT VT = Op.getValueType();
9061 unsigned BitWidth = VT.getSizeInBits();
9062 assert(Op.getNumOperands() == 3 &&
9063 VT == Op.getOperand(1).getValueType() &&
9064 "Unexpected SRA!");
9065
9066 // Expand into a bunch of logical ops, followed by a select_cc.
9067 SDValue Lo = Op.getOperand(0);
9068 SDValue Hi = Op.getOperand(1);
9069 SDValue Amt = Op.getOperand(2);
9070 EVT AmtVT = Amt.getValueType();
9071
9072 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9073 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9074 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9075 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9076 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9077 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9078 DAG.getConstant(-BitWidth, dl, AmtVT));
9079 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9080 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9081 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9082 Tmp4, Tmp6, ISD::SETLE);
9083 SDValue OutOps[] = { OutLo, OutHi };
9084 return DAG.getMergeValues(OutOps, dl);
9085}
9086
9087SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9088 SelectionDAG &DAG) const {
9089 SDLoc dl(Op);
9090 EVT VT = Op.getValueType();
9091 unsigned BitWidth = VT.getSizeInBits();
9092
9093 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9094 SDValue X = Op.getOperand(0);
9095 SDValue Y = Op.getOperand(1);
9096 SDValue Z = Op.getOperand(2);
9097 EVT AmtVT = Z.getValueType();
9098
9099 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9100 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9101 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9102 // on PowerPC shift by BW being well defined.
9103 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9104 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9105 SDValue SubZ =
9106 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9107 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9108 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9109 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9110}
9111
9112//===----------------------------------------------------------------------===//
9113// Vector related lowering.
9114//
9115
9116/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9117/// element size of SplatSize. Cast the result to VT.
9118static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9119 SelectionDAG &DAG, const SDLoc &dl) {
9120 static const MVT VTys[] = { // canonical VT to use for each size.
9121 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9122 };
9123
9124 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9125
9126 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9127 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9128 SplatSize = 1;
9129 Val = 0xFF;
9130 }
9131
9132 EVT CanonicalVT = VTys[SplatSize-1];
9133
9134 // Build a canonical splat for this value.
9135 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9136}
9137
9138/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9139/// specified intrinsic ID.
9141 const SDLoc &dl, EVT DestVT = MVT::Other) {
9142 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9143 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9144 DAG.getConstant(IID, dl, MVT::i32), Op);
9145}
9146
9147/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9148/// specified intrinsic ID.
9149static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9150 SelectionDAG &DAG, const SDLoc &dl,
9151 EVT DestVT = MVT::Other) {
9152 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9153 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9154 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9155}
9156
9157/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9158/// specified intrinsic ID.
9159static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9160 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9161 EVT DestVT = MVT::Other) {
9162 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9163 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9164 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9165}
9166
9167/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9168/// amount. The result has the specified value type.
9169static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9170 SelectionDAG &DAG, const SDLoc &dl) {
9171 // Force LHS/RHS to be the right type.
9172 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9173 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9174
9175 int Ops[16];
9176 for (unsigned i = 0; i != 16; ++i)
9177 Ops[i] = i + Amt;
9178 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9179 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9180}
9181
9182/// Do we have an efficient pattern in a .td file for this node?
9183///
9184/// \param V - pointer to the BuildVectorSDNode being matched
9185/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9186///
9187/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9188/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9189/// the opposite is true (expansion is beneficial) are:
9190/// - The node builds a vector out of integers that are not 32 or 64-bits
9191/// - The node builds a vector out of constants
9192/// - The node is a "load-and-splat"
9193/// In all other cases, we will choose to keep the BUILD_VECTOR.
9195 bool HasDirectMove,
9196 bool HasP8Vector) {
9197 EVT VecVT = V->getValueType(0);
9198 bool RightType = VecVT == MVT::v2f64 ||
9199 (HasP8Vector && VecVT == MVT::v4f32) ||
9200 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9201 if (!RightType)
9202 return false;
9203
9204 bool IsSplat = true;
9205 bool IsLoad = false;
9206 SDValue Op0 = V->getOperand(0);
9207
9208 // This function is called in a block that confirms the node is not a constant
9209 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9210 // different constants.
9211 if (V->isConstant())
9212 return false;
9213 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9214 if (V->getOperand(i).isUndef())
9215 return false;
9216 // We want to expand nodes that represent load-and-splat even if the
9217 // loaded value is a floating point truncation or conversion to int.
9218 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9219 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9220 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9221 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9222 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9223 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9224 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9225 IsLoad = true;
9226 // If the operands are different or the input is not a load and has more
9227 // uses than just this BV node, then it isn't a splat.
9228 if (V->getOperand(i) != Op0 ||
9229 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9230 IsSplat = false;
9231 }
9232 return !(IsSplat && IsLoad);
9233}
9234
9235// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9236SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9237
9238 SDLoc dl(Op);
9239 SDValue Op0 = Op->getOperand(0);
9240
9241 if ((Op.getValueType() != MVT::f128) ||
9242 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9243 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9244 (Op0.getOperand(1).getValueType() != MVT::i64))
9245 return SDValue();
9246
9247 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9248 Op0.getOperand(1));
9249}
9250
9251static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9252 const SDValue *InputLoad = &Op;
9253 while (InputLoad->getOpcode() == ISD::BITCAST)
9254 InputLoad = &InputLoad->getOperand(0);
9255 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9257 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9258 InputLoad = &InputLoad->getOperand(0);
9259 }
9260 if (InputLoad->getOpcode() != ISD::LOAD)
9261 return nullptr;
9262 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9263 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9264}
9265
9266// Convert the argument APFloat to a single precision APFloat if there is no
9267// loss in information during the conversion to single precision APFloat and the
9268// resulting number is not a denormal number. Return true if successful.
9270 APFloat APFloatToConvert = ArgAPFloat;
9271 bool LosesInfo = true;
9273 &LosesInfo);
9274 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9275 if (Success)
9276 ArgAPFloat = APFloatToConvert;
9277 return Success;
9278}
9279
9280// Bitcast the argument APInt to a double and convert it to a single precision
9281// APFloat, bitcast the APFloat to an APInt and assign it to the original
9282// argument if there is no loss in information during the conversion from
9283// double to single precision APFloat and the resulting number is not a denormal
9284// number. Return true if successful.
9286 double DpValue = ArgAPInt.bitsToDouble();
9287 APFloat APFloatDp(DpValue);
9288 bool Success = convertToNonDenormSingle(APFloatDp);
9289 if (Success)
9290 ArgAPInt = APFloatDp.bitcastToAPInt();
9291 return Success;
9292}
9293
9294// Nondestructive check for convertTonNonDenormSingle.
9296 // Only convert if it loses info, since XXSPLTIDP should
9297 // handle the other case.
9298 APFloat APFloatToConvert = ArgAPFloat;
9299 bool LosesInfo = true;
9301 &LosesInfo);
9302
9303 return (!LosesInfo && !APFloatToConvert.isDenormal());
9304}
9305
9306static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9307 unsigned &Opcode) {
9308 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9309 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9310 return false;
9311
9312 EVT Ty = Op->getValueType(0);
9313 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9314 // as we cannot handle extending loads for these types.
9315 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9316 ISD::isNON_EXTLoad(InputNode))
9317 return true;
9318
9319 EVT MemVT = InputNode->getMemoryVT();
9320 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9321 // memory VT is the same vector element VT type.
9322 // The loads feeding into the v8i16 and v16i8 types will be extending because
9323 // scalar i8/i16 are not legal types.
9324 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9325 (MemVT == Ty.getVectorElementType()))
9326 return true;
9327
9328 if (Ty == MVT::v2i64) {
9329 // Check the extend type, when the input type is i32, and the output vector
9330 // type is v2i64.
9331 if (MemVT == MVT::i32) {
9332 if (ISD::isZEXTLoad(InputNode))
9333 Opcode = PPCISD::ZEXT_LD_SPLAT;
9334 if (ISD::isSEXTLoad(InputNode))
9335 Opcode = PPCISD::SEXT_LD_SPLAT;
9336 }
9337 return true;
9338 }
9339 return false;
9340}
9341
9342// If this is a case we can't handle, return null and let the default
9343// expansion code take care of it. If we CAN select this case, and if it
9344// selects to a single instruction, return Op. Otherwise, if we can codegen
9345// this case more efficiently than a constant pool load, lower it to the
9346// sequence of ops that should be used.
9347SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9348 SelectionDAG &DAG) const {
9349 SDLoc dl(Op);
9350 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9351 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9352
9353 // Check if this is a splat of a constant value.
9354 APInt APSplatBits, APSplatUndef;
9355 unsigned SplatBitSize;
9356 bool HasAnyUndefs;
9357 bool BVNIsConstantSplat =
9358 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9359 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9360
9361 // If it is a splat of a double, check if we can shrink it to a 32 bit
9362 // non-denormal float which when converted back to double gives us the same
9363 // double. This is to exploit the XXSPLTIDP instruction.
9364 // If we lose precision, we use XXSPLTI32DX.
9365 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9366 Subtarget.hasPrefixInstrs()) {
9367 // Check the type first to short-circuit so we don't modify APSplatBits if
9368 // this block isn't executed.
9369 if ((Op->getValueType(0) == MVT::v2f64) &&
9370 convertToNonDenormSingle(APSplatBits)) {
9371 SDValue SplatNode = DAG.getNode(
9372 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9373 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9374 return DAG.getBitcast(Op.getValueType(), SplatNode);
9375 } else {
9376 // We may lose precision, so we have to use XXSPLTI32DX.
9377
9378 uint32_t Hi =
9379 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9380 uint32_t Lo =
9381 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9382 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9383
9384 if (!Hi || !Lo)
9385 // If either load is 0, then we should generate XXLXOR to set to 0.
9386 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9387
9388 if (Hi)
9389 SplatNode = DAG.getNode(
9390 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9391 DAG.getTargetConstant(0, dl, MVT::i32),
9392 DAG.getTargetConstant(Hi, dl, MVT::i32));
9393
9394 if (Lo)
9395 SplatNode =
9396 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9397 DAG.getTargetConstant(1, dl, MVT::i32),
9398 DAG.getTargetConstant(Lo, dl, MVT::i32));
9399
9400 return DAG.getBitcast(Op.getValueType(), SplatNode);
9401 }
9402 }
9403
9404 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9405 unsigned NewOpcode = PPCISD::LD_SPLAT;
9406
9407 // Handle load-and-splat patterns as we have instructions that will do this
9408 // in one go.
9409 if (DAG.isSplatValue(Op, true) &&
9410 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9411 const SDValue *InputLoad = &Op.getOperand(0);
9412 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9413
9414 // If the input load is an extending load, it will be an i32 -> i64
9415 // extending load and isValidSplatLoad() will update NewOpcode.
9416 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9417 unsigned ElementSize =
9418 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9419
9420 assert(((ElementSize == 2 * MemorySize)
9421 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9422 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9423 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9424 "Unmatched element size and opcode!\n");
9425
9426 // Checking for a single use of this load, we have to check for vector
9427 // width (128 bits) / ElementSize uses (since each operand of the
9428 // BUILD_VECTOR is a separate use of the value.
9429 unsigned NumUsesOfInputLD = 128 / ElementSize;
9430 for (SDValue BVInOp : Op->ops())
9431 if (BVInOp.isUndef())
9432 NumUsesOfInputLD--;
9433
9434 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9435 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9436 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9437 // 15", but function IsValidSplatLoad() now will only return true when
9438 // the data at index 0 is not nullptr. So we will not get into trouble for
9439 // these cases.
9440 //
9441 // case 1 - lfiwzx/lfiwax
9442 // 1.1: load result is i32 and is sign/zero extend to i64;
9443 // 1.2: build a v2i64 vector type with above loaded value;
9444 // 1.3: the vector has only one value at index 0, others are all undef;
9445 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9446 if (NumUsesOfInputLD == 1 &&
9447 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9448 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9449 Subtarget.hasLFIWAX()))
9450 return SDValue();
9451
9452 // case 2 - lxvr[hb]x
9453 // 2.1: load result is at most i16;
9454 // 2.2: build a vector with above loaded value;
9455 // 2.3: the vector has only one value at index 0, others are all undef;
9456 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9457 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9458 Subtarget.isISA3_1() && ElementSize <= 16)
9459 return SDValue();
9460
9461 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9462 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9463 Subtarget.hasVSX()) {
9464 SDValue Ops[] = {
9465 LD->getChain(), // Chain
9466 LD->getBasePtr(), // Ptr
9467 DAG.getValueType(Op.getValueType()) // VT
9468 };
9469 SDValue LdSplt = DAG.getMemIntrinsicNode(
9470 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9471 LD->getMemoryVT(), LD->getMemOperand());
9472 // Replace all uses of the output chain of the original load with the
9473 // output chain of the new load.
9474 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9475 LdSplt.getValue(1));
9476 return LdSplt;
9477 }
9478 }
9479
9480 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9481 // 32-bits can be lowered to VSX instructions under certain conditions.
9482 // Without VSX, there is no pattern more efficient than expanding the node.
9483 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9484 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9485 Subtarget.hasP8Vector()))
9486 return Op;
9487 return SDValue();
9488 }
9489
9490 uint64_t SplatBits = APSplatBits.getZExtValue();
9491 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9492 unsigned SplatSize = SplatBitSize / 8;
9493
9494 // First, handle single instruction cases.
9495
9496 // All zeros?
9497 if (SplatBits == 0) {
9498 // Canonicalize all zero vectors to be v4i32.
9499 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9500 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9501 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9502 }
9503 return Op;
9504 }
9505
9506 // We have XXSPLTIW for constant splats four bytes wide.
9507 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9508 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9509 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9510 // turned into a 4-byte splat of 0xABABABAB.
9511 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9512 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9513 Op.getValueType(), DAG, dl);
9514
9515 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9516 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9517 dl);
9518
9519 // We have XXSPLTIB for constant splats one byte wide.
9520 if (Subtarget.hasP9Vector() && SplatSize == 1)
9521 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9522 dl);
9523
9524 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9525 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9526 (32-SplatBitSize));
9527 if (SextVal >= -16 && SextVal <= 15)
9528 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9529 dl);
9530
9531 // Two instruction sequences.
9532
9533 // If this value is in the range [-32,30] and is even, use:
9534 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9535 // If this value is in the range [17,31] and is odd, use:
9536 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9537 // If this value is in the range [-31,-17] and is odd, use:
9538 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9539 // Note the last two are three-instruction sequences.
9540 if (SextVal >= -32 && SextVal <= 31) {
9541 // To avoid having these optimizations undone by constant folding,
9542 // we convert to a pseudo that will be expanded later into one of
9543 // the above forms.
9544 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9545 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9546 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9547 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9548 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9549 if (VT == Op.getValueType())
9550 return RetVal;
9551 else
9552 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9553 }
9554
9555 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9556 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9557 // for fneg/fabs.
9558 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9559 // Make -1 and vspltisw -1:
9560 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9561
9562 // Make the VSLW intrinsic, computing 0x8000_0000.
9563 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9564 OnesV, DAG, dl);
9565
9566 // xor by OnesV to invert it.
9567 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9568 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9569 }
9570
9571 // Check to see if this is a wide variety of vsplti*, binop self cases.
9572 static const signed char SplatCsts[] = {
9573 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9574 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9575 };
9576
9577 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9578 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9579 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9580 int i = SplatCsts[idx];
9581
9582 // Figure out what shift amount will be used by altivec if shifted by i in
9583 // this splat size.
9584 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9585
9586 // vsplti + shl self.
9587 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9588 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9589 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9590 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9591 Intrinsic::ppc_altivec_vslw
9592 };
9593 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9594 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9595 }
9596
9597 // vsplti + srl self.
9598 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9599 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9600 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9601 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9602 Intrinsic::ppc_altivec_vsrw
9603 };
9604 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9605 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9606 }
9607
9608 // vsplti + rol self.
9609 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9610 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9611 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9612 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9613 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9614 Intrinsic::ppc_altivec_vrlw
9615 };
9616 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9617 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9618 }
9619
9620 // t = vsplti c, result = vsldoi t, t, 1
9621 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9622 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9623 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9624 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9625 }
9626 // t = vsplti c, result = vsldoi t, t, 2
9627 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9628 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9629 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9630 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9631 }
9632 // t = vsplti c, result = vsldoi t, t, 3
9633 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9634 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9635 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9636 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9637 }
9638 }
9639
9640 return SDValue();
9641}
9642
9643/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9644/// the specified operations to build the shuffle.
9645static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9646 SDValue RHS, SelectionDAG &DAG,
9647 const SDLoc &dl) {
9648 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9649 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9650 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9651
9652 enum {
9653 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9654 OP_VMRGHW,
9655 OP_VMRGLW,
9656 OP_VSPLTISW0,
9657 OP_VSPLTISW1,
9658 OP_VSPLTISW2,
9659 OP_VSPLTISW3,
9660 OP_VSLDOI4,
9661 OP_VSLDOI8,
9662 OP_VSLDOI12
9663 };
9664
9665 if (OpNum == OP_COPY) {
9666 if (LHSID == (1*9+2)*9+3) return LHS;
9667 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9668 return RHS;
9669 }
9670
9671 SDValue OpLHS, OpRHS;
9672 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9673 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9674
9675 int ShufIdxs[16];
9676 switch (OpNum) {
9677 default: llvm_unreachable("Unknown i32 permute!");
9678 case OP_VMRGHW:
9679 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9680 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9681 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9682 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9683 break;
9684 case OP_VMRGLW:
9685 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9686 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9687 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9688 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9689 break;
9690 case OP_VSPLTISW0:
9691 for (unsigned i = 0; i != 16; ++i)
9692 ShufIdxs[i] = (i&3)+0;
9693 break;
9694 case OP_VSPLTISW1:
9695 for (unsigned i = 0; i != 16; ++i)
9696 ShufIdxs[i] = (i&3)+4;
9697 break;
9698 case OP_VSPLTISW2:
9699 for (unsigned i = 0; i != 16; ++i)
9700 ShufIdxs[i] = (i&3)+8;
9701 break;
9702 case OP_VSPLTISW3:
9703 for (unsigned i = 0; i != 16; ++i)
9704 ShufIdxs[i] = (i&3)+12;
9705 break;
9706 case OP_VSLDOI4:
9707 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9708 case OP_VSLDOI8:
9709 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9710 case OP_VSLDOI12:
9711 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9712 }
9713 EVT VT = OpLHS.getValueType();
9714 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9715 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9716 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9717 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9718}
9719
9720/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9721/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9722/// SDValue.
9723SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9724 SelectionDAG &DAG) const {
9725 const unsigned BytesInVector = 16;
9726 bool IsLE = Subtarget.isLittleEndian();
9727 SDLoc dl(N);
9728 SDValue V1 = N->getOperand(0);
9729 SDValue V2 = N->getOperand(1);
9730 unsigned ShiftElts = 0, InsertAtByte = 0;
9731 bool Swap = false;
9732
9733 // Shifts required to get the byte we want at element 7.
9734 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9735 0, 15, 14, 13, 12, 11, 10, 9};
9736 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9737 1, 2, 3, 4, 5, 6, 7, 8};
9738
9739 ArrayRef<int> Mask = N->getMask();
9740 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9741
9742 // For each mask element, find out if we're just inserting something
9743 // from V2 into V1 or vice versa.
9744 // Possible permutations inserting an element from V2 into V1:
9745 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9746 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9747 // ...
9748 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9749 // Inserting from V1 into V2 will be similar, except mask range will be
9750 // [16,31].
9751
9752 bool FoundCandidate = false;
9753 // If both vector operands for the shuffle are the same vector, the mask
9754 // will contain only elements from the first one and the second one will be
9755 // undef.
9756 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9757 // Go through the mask of half-words to find an element that's being moved
9758 // from one vector to the other.
9759 for (unsigned i = 0; i < BytesInVector; ++i) {
9760 unsigned CurrentElement = Mask[i];
9761 // If 2nd operand is undefined, we should only look for element 7 in the
9762 // Mask.
9763 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9764 continue;
9765
9766 bool OtherElementsInOrder = true;
9767 // Examine the other elements in the Mask to see if they're in original
9768 // order.
9769 for (unsigned j = 0; j < BytesInVector; ++j) {
9770 if (j == i)
9771 continue;
9772 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9773 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9774 // in which we always assume we're always picking from the 1st operand.
9775 int MaskOffset =
9776 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9777 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9778 OtherElementsInOrder = false;
9779 break;
9780 }
9781 }
9782 // If other elements are in original order, we record the number of shifts
9783 // we need to get the element we want into element 7. Also record which byte
9784 // in the vector we should insert into.
9785 if (OtherElementsInOrder) {
9786 // If 2nd operand is undefined, we assume no shifts and no swapping.
9787 if (V2.isUndef()) {
9788 ShiftElts = 0;
9789 Swap = false;
9790 } else {
9791 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9792 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9793 : BigEndianShifts[CurrentElement & 0xF];
9794 Swap = CurrentElement < BytesInVector;
9795 }
9796 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9797 FoundCandidate = true;
9798 break;
9799 }
9800 }
9801
9802 if (!FoundCandidate)
9803 return SDValue();
9804
9805 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9806 // optionally with VECSHL if shift is required.
9807 if (Swap)
9808 std::swap(V1, V2);
9809 if (V2.isUndef())
9810 V2 = V1;
9811 if (ShiftElts) {
9812 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9813 DAG.getConstant(ShiftElts, dl, MVT::i32));
9814 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9815 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9816 }
9817 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9818 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9819}
9820
9821/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9822/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9823/// SDValue.
9824SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9825 SelectionDAG &DAG) const {
9826 const unsigned NumHalfWords = 8;
9827 const unsigned BytesInVector = NumHalfWords * 2;
9828 // Check that the shuffle is on half-words.
9829 if (!isNByteElemShuffleMask(N, 2, 1))
9830 return SDValue();
9831
9832 bool IsLE = Subtarget.isLittleEndian();
9833 SDLoc dl(N);
9834 SDValue V1 = N->getOperand(0);
9835 SDValue V2 = N->getOperand(1);
9836 unsigned ShiftElts = 0, InsertAtByte = 0;
9837 bool Swap = false;
9838
9839 // Shifts required to get the half-word we want at element 3.
9840 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9841 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9842
9843 uint32_t Mask = 0;
9844 uint32_t OriginalOrderLow = 0x1234567;
9845 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9846 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9847 // 32-bit space, only need 4-bit nibbles per element.
9848 for (unsigned i = 0; i < NumHalfWords; ++i) {
9849 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9850 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9851 }
9852
9853 // For each mask element, find out if we're just inserting something
9854 // from V2 into V1 or vice versa. Possible permutations inserting an element
9855 // from V2 into V1:
9856 // X, 1, 2, 3, 4, 5, 6, 7
9857 // 0, X, 2, 3, 4, 5, 6, 7
9858 // 0, 1, X, 3, 4, 5, 6, 7
9859 // 0, 1, 2, X, 4, 5, 6, 7
9860 // 0, 1, 2, 3, X, 5, 6, 7
9861 // 0, 1, 2, 3, 4, X, 6, 7
9862 // 0, 1, 2, 3, 4, 5, X, 7
9863 // 0, 1, 2, 3, 4, 5, 6, X
9864 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9865
9866 bool FoundCandidate = false;
9867 // Go through the mask of half-words to find an element that's being moved
9868 // from one vector to the other.
9869 for (unsigned i = 0; i < NumHalfWords; ++i) {
9870 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9871 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9872 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9873 uint32_t TargetOrder = 0x0;
9874
9875 // If both vector operands for the shuffle are the same vector, the mask
9876 // will contain only elements from the first one and the second one will be
9877 // undef.
9878 if (V2.isUndef()) {
9879 ShiftElts = 0;
9880 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9881 TargetOrder = OriginalOrderLow;
9882 Swap = false;
9883 // Skip if not the correct element or mask of other elements don't equal
9884 // to our expected order.
9885 if (MaskOneElt == VINSERTHSrcElem &&
9886 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9887 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9888 FoundCandidate = true;
9889 break;
9890 }
9891 } else { // If both operands are defined.
9892 // Target order is [8,15] if the current mask is between [0,7].
9893 TargetOrder =
9894 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9895 // Skip if mask of other elements don't equal our expected order.
9896 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9897 // We only need the last 3 bits for the number of shifts.
9898 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9899 : BigEndianShifts[MaskOneElt & 0x7];
9900 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9901 Swap = MaskOneElt < NumHalfWords;
9902 FoundCandidate = true;
9903 break;
9904 }
9905 }
9906 }
9907
9908 if (!FoundCandidate)
9909 return SDValue();
9910
9911 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9912 // optionally with VECSHL if shift is required.
9913 if (Swap)
9914 std::swap(V1, V2);
9915 if (V2.isUndef())
9916 V2 = V1;
9917 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9918 if (ShiftElts) {
9919 // Double ShiftElts because we're left shifting on v16i8 type.
9920 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9921 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9922 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9923 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9924 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9925 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9926 }
9927 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9928 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9929 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9930 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9931}
9932
9933/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9934/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9935/// return the default SDValue.
9936SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9937 SelectionDAG &DAG) const {
9938 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9939 // to v16i8. Peek through the bitcasts to get the actual operands.
9942
9943 auto ShuffleMask = SVN->getMask();
9944 SDValue VecShuffle(SVN, 0);
9945 SDLoc DL(SVN);
9946
9947 // Check that we have a four byte shuffle.
9948 if (!isNByteElemShuffleMask(SVN, 4, 1))
9949 return SDValue();
9950
9951 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9952 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9953 std::swap(LHS, RHS);
9955 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
9956 if (!CommutedSV)
9957 return SDValue();
9958 ShuffleMask = CommutedSV->getMask();
9959 }
9960
9961 // Ensure that the RHS is a vector of constants.
9962 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9963 if (!BVN)
9964 return SDValue();
9965
9966 // Check if RHS is a splat of 4-bytes (or smaller).
9967 APInt APSplatValue, APSplatUndef;
9968 unsigned SplatBitSize;
9969 bool HasAnyUndefs;
9970 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9971 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9972 SplatBitSize > 32)
9973 return SDValue();
9974
9975 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9976 // The instruction splats a constant C into two words of the source vector
9977 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9978 // Thus we check that the shuffle mask is the equivalent of
9979 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9980 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9981 // within each word are consecutive, so we only need to check the first byte.
9982 SDValue Index;
9983 bool IsLE = Subtarget.isLittleEndian();
9984 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9985 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9986 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9987 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9988 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9989 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9990 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9991 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9992 else
9993 return SDValue();
9994
9995 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9996 // for XXSPLTI32DX.
9997 unsigned SplatVal = APSplatValue.getZExtValue();
9998 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9999 SplatVal |= (SplatVal << SplatBitSize);
10000
10001 SDValue SplatNode = DAG.getNode(
10002 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10003 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10004 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10005}
10006
10007/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10008/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10009/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10010/// i.e (or (shl x, C1), (srl x, 128-C1)).
10011SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10012 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10013 assert(Op.getValueType() == MVT::v1i128 &&
10014 "Only set v1i128 as custom, other type shouldn't reach here!");
10015 SDLoc dl(Op);
10016 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10017 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10018 unsigned SHLAmt = N1.getConstantOperandVal(0);
10019 if (SHLAmt % 8 == 0) {
10020 std::array<int, 16> Mask;
10021 std::iota(Mask.begin(), Mask.end(), 0);
10022 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10023 if (SDValue Shuffle =
10024 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10025 DAG.getUNDEF(MVT::v16i8), Mask))
10026 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10027 }
10028 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10029 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10030 DAG.getConstant(SHLAmt, dl, MVT::i32));
10031 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10032 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10033 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10034 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10035}
10036
10037/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10038/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10039/// return the code it can be lowered into. Worst case, it can always be
10040/// lowered into a vperm.
10041SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10042 SelectionDAG &DAG) const {
10043 SDLoc dl(Op);
10044 SDValue V1 = Op.getOperand(0);
10045 SDValue V2 = Op.getOperand(1);
10046 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10047
10048 // Any nodes that were combined in the target-independent combiner prior
10049 // to vector legalization will not be sent to the target combine. Try to
10050 // combine it here.
10051 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10052 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10053 return NewShuffle;
10054 Op = NewShuffle;
10055 SVOp = cast<ShuffleVectorSDNode>(Op);
10056 V1 = Op.getOperand(0);
10057 V2 = Op.getOperand(1);
10058 }
10059 EVT VT = Op.getValueType();
10060 bool isLittleEndian = Subtarget.isLittleEndian();
10061
10062 unsigned ShiftElts, InsertAtByte;
10063 bool Swap = false;
10064
10065 // If this is a load-and-splat, we can do that with a single instruction
10066 // in some cases. However if the load has multiple uses, we don't want to
10067 // combine it because that will just produce multiple loads.
10068 bool IsPermutedLoad = false;
10069 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10070 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10071 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10072 InputLoad->hasOneUse()) {
10073 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10074 int SplatIdx =
10075 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10076
10077 // The splat index for permuted loads will be in the left half of the vector
10078 // which is strictly wider than the loaded value by 8 bytes. So we need to
10079 // adjust the splat index to point to the correct address in memory.
10080 if (IsPermutedLoad) {
10081 assert((isLittleEndian || IsFourByte) &&
10082 "Unexpected size for permuted load on big endian target");
10083 SplatIdx += IsFourByte ? 2 : 1;
10084 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10085 "Splat of a value outside of the loaded memory");
10086 }
10087
10088 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10089 // For 4-byte load-and-splat, we need Power9.
10090 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10091 uint64_t Offset = 0;
10092 if (IsFourByte)
10093 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10094 else
10095 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10096
10097 // If the width of the load is the same as the width of the splat,
10098 // loading with an offset would load the wrong memory.
10099 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10100 Offset = 0;
10101
10102 SDValue BasePtr = LD->getBasePtr();
10103 if (Offset != 0)
10105 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10106 SDValue Ops[] = {
10107 LD->getChain(), // Chain
10108 BasePtr, // BasePtr
10109 DAG.getValueType(Op.getValueType()) // VT
10110 };
10111 SDVTList VTL =
10112 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10113 SDValue LdSplt =
10115 Ops, LD->getMemoryVT(), LD->getMemOperand());
10116 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10117 if (LdSplt.getValueType() != SVOp->getValueType(0))
10118 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10119 return LdSplt;
10120 }
10121 }
10122
10123 // All v2i64 and v2f64 shuffles are legal
10124 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10125 return Op;
10126
10127 if (Subtarget.hasP9Vector() &&
10128 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10129 isLittleEndian)) {
10130 if (Swap)
10131 std::swap(V1, V2);
10132 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10133 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10134 if (ShiftElts) {
10135 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10136 DAG.getConstant(ShiftElts, dl, MVT::i32));
10137 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10138 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10139 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10140 }
10141 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10142 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10143 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10144 }
10145
10146 if (Subtarget.hasPrefixInstrs()) {
10147 SDValue SplatInsertNode;
10148 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10149 return SplatInsertNode;
10150 }
10151
10152 if (Subtarget.hasP9Altivec()) {
10153 SDValue NewISDNode;
10154 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10155 return NewISDNode;
10156
10157 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10158 return NewISDNode;
10159 }
10160
10161 if (Subtarget.hasVSX() &&
10162 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10163 if (Swap)
10164 std::swap(V1, V2);
10165 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10166 SDValue Conv2 =
10167 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10168
10169 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10170 DAG.getConstant(ShiftElts, dl, MVT::i32));
10171 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10172 }
10173
10174 if (Subtarget.hasVSX() &&
10175 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10176 if (Swap)
10177 std::swap(V1, V2);
10178 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10179 SDValue Conv2 =
10180 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10181
10182 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10183 DAG.getConstant(ShiftElts, dl, MVT::i32));
10184 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10185 }
10186
10187 if (Subtarget.hasP9Vector()) {
10188 if (PPC::isXXBRHShuffleMask(SVOp)) {
10189 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10190 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10191 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10192 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10193 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10194 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10195 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10196 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10197 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10198 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10199 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10200 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10201 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10202 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10203 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10204 }
10205 }
10206
10207 if (Subtarget.hasVSX()) {
10208 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10209 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10210
10211 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10212 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10213 DAG.getConstant(SplatIdx, dl, MVT::i32));
10214 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10215 }
10216
10217 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10218 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10219 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10220 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10221 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10222 }
10223 }
10224
10225 // Cases that are handled by instructions that take permute immediates
10226 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10227 // selected by the instruction selector.
10228 if (V2.isUndef()) {
10229 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10230 PPC::isSplatShuffleMask(SVOp, 2) ||
10231 PPC::isSplatShuffleMask(SVOp, 4) ||
10232 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10233 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10234 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10235 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10236 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10237 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10238 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10239 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10240 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10241 (Subtarget.hasP8Altivec() && (
10242 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10243 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10244 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10245 return Op;
10246 }
10247 }
10248
10249 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10250 // and produce a fixed permutation. If any of these match, do not lower to
10251 // VPERM.
10252 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10253 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10254 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10255 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10256 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10257 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10258 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10259 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10260 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10261 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10262 (Subtarget.hasP8Altivec() && (
10263 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10264 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10265 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10266 return Op;
10267
10268 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10269 // perfect shuffle table to emit an optimal matching sequence.
10270 ArrayRef<int> PermMask = SVOp->getMask();
10271
10272 if (!DisablePerfectShuffle && !isLittleEndian) {
10273 unsigned PFIndexes[4];
10274 bool isFourElementShuffle = true;
10275 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10276 ++i) { // Element number
10277 unsigned EltNo = 8; // Start out undef.
10278 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10279 if (PermMask[i * 4 + j] < 0)
10280 continue; // Undef, ignore it.
10281
10282 unsigned ByteSource = PermMask[i * 4 + j];
10283 if ((ByteSource & 3) != j) {
10284 isFourElementShuffle = false;
10285 break;
10286 }
10287
10288 if (EltNo == 8) {
10289 EltNo = ByteSource / 4;
10290 } else if (EltNo != ByteSource / 4) {
10291 isFourElementShuffle = false;
10292 break;
10293 }
10294 }
10295 PFIndexes[i] = EltNo;
10296 }
10297
10298 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10299 // perfect shuffle vector to determine if it is cost effective to do this as
10300 // discrete instructions, or whether we should use a vperm.
10301 // For now, we skip this for little endian until such time as we have a
10302 // little-endian perfect shuffle table.
10303 if (isFourElementShuffle) {
10304 // Compute the index in the perfect shuffle table.
10305 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10306 PFIndexes[2] * 9 + PFIndexes[3];
10307
10308 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10309 unsigned Cost = (PFEntry >> 30);
10310
10311 // Determining when to avoid vperm is tricky. Many things affect the cost
10312 // of vperm, particularly how many times the perm mask needs to be
10313 // computed. For example, if the perm mask can be hoisted out of a loop or
10314 // is already used (perhaps because there are multiple permutes with the
10315 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10316 // permute mask out of the loop requires an extra register.
10317 //
10318 // As a compromise, we only emit discrete instructions if the shuffle can
10319 // be generated in 3 or fewer operations. When we have loop information
10320 // available, if this block is within a loop, we should avoid using vperm
10321 // for 3-operation perms and use a constant pool load instead.
10322 if (Cost < 3)
10323 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10324 }
10325 }
10326
10327 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10328 // vector that will get spilled to the constant pool.
10329 if (V2.isUndef()) V2 = V1;
10330
10331 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10332}
10333
10334SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10335 ArrayRef<int> PermMask, EVT VT,
10336 SDValue V1, SDValue V2) const {
10337 unsigned Opcode = PPCISD::VPERM;
10338 EVT ValType = V1.getValueType();
10339 SDLoc dl(Op);
10340 bool NeedSwap = false;
10341 bool isLittleEndian = Subtarget.isLittleEndian();
10342 bool isPPC64 = Subtarget.isPPC64();
10343
10344 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10345 (V1->hasOneUse() || V2->hasOneUse())) {
10346 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10347 "XXPERM instead\n");
10348 Opcode = PPCISD::XXPERM;
10349
10350 // The second input to XXPERM is also an output so if the second input has
10351 // multiple uses then copying is necessary, as a result we want the
10352 // single-use operand to be used as the second input to prevent copying.
10353 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10354 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10355 std::swap(V1, V2);
10356 NeedSwap = !NeedSwap;
10357 }
10358 }
10359
10360 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10361 // that it is in input element units, not in bytes. Convert now.
10362
10363 // For little endian, the order of the input vectors is reversed, and
10364 // the permutation mask is complemented with respect to 31. This is
10365 // necessary to produce proper semantics with the big-endian-based vperm
10366 // instruction.
10367 EVT EltVT = V1.getValueType().getVectorElementType();
10368 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10369
10370 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10371 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10372
10373 /*
10374 Vectors will be appended like so: [ V1 | v2 ]
10375 XXSWAPD on V1:
10376 [ A | B | C | D ] -> [ C | D | A | B ]
10377 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10378 i.e. index of A, B += 8, and index of C, D -= 8.
10379 XXSWAPD on V2:
10380 [ E | F | G | H ] -> [ G | H | E | F ]
10381 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10382 i.e. index of E, F += 8, index of G, H -= 8
10383 Swap V1 and V2:
10384 [ V1 | V2 ] -> [ V2 | V1 ]
10385 0-15 16-31 0-15 16-31
10386 i.e. index of V1 += 16, index of V2 -= 16
10387 */
10388
10389 SmallVector<SDValue, 16> ResultMask;
10390 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10391 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10392
10393 if (V1HasXXSWAPD) {
10394 if (SrcElt < 8)
10395 SrcElt += 8;
10396 else if (SrcElt < 16)
10397 SrcElt -= 8;
10398 }
10399 if (V2HasXXSWAPD) {
10400 if (SrcElt > 23)
10401 SrcElt -= 8;
10402 else if (SrcElt > 15)
10403 SrcElt += 8;
10404 }
10405 if (NeedSwap) {
10406 if (SrcElt < 16)
10407 SrcElt += 16;
10408 else
10409 SrcElt -= 16;
10410 }
10411 for (unsigned j = 0; j != BytesPerElement; ++j)
10412 if (isLittleEndian)
10413 ResultMask.push_back(
10414 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10415 else
10416 ResultMask.push_back(
10417 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10418 }
10419
10420 if (V1HasXXSWAPD) {
10421 dl = SDLoc(V1->getOperand(0));
10422 V1 = V1->getOperand(0)->getOperand(1);
10423 }
10424 if (V2HasXXSWAPD) {
10425 dl = SDLoc(V2->getOperand(0));
10426 V2 = V2->getOperand(0)->getOperand(1);
10427 }
10428
10429 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10430 if (ValType != MVT::v2f64)
10431 V1 = DAG.getBitcast(MVT::v2f64, V1);
10432 if (V2.getValueType() != MVT::v2f64)
10433 V2 = DAG.getBitcast(MVT::v2f64, V2);
10434 }
10435
10436 ShufflesHandledWithVPERM++;
10437 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10438 LLVM_DEBUG({
10439 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10440 if (Opcode == PPCISD::XXPERM) {
10441 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10442 } else {
10443 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10444 }
10445 SVOp->dump();
10446 dbgs() << "With the following permute control vector:\n";
10447 VPermMask.dump();
10448 });
10449
10450 if (Opcode == PPCISD::XXPERM)
10451 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10452
10453 // Only need to place items backwards in LE,
10454 // the mask was properly calculated.
10455 if (isLittleEndian)
10456 std::swap(V1, V2);
10457
10458 SDValue VPERMNode =
10459 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10460
10461 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10462 return VPERMNode;
10463}
10464
10465/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10466/// vector comparison. If it is, return true and fill in Opc/isDot with
10467/// information about the intrinsic.
10468static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10469 bool &isDot, const PPCSubtarget &Subtarget) {
10470 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10471 CompareOpc = -1;
10472 isDot = false;
10473 switch (IntrinsicID) {
10474 default:
10475 return false;
10476 // Comparison predicates.
10477 case Intrinsic::ppc_altivec_vcmpbfp_p:
10478 CompareOpc = 966;
10479 isDot = true;
10480 break;
10481 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10482 CompareOpc = 198;
10483 isDot = true;
10484 break;
10485 case Intrinsic::ppc_altivec_vcmpequb_p:
10486 CompareOpc = 6;
10487 isDot = true;
10488 break;
10489 case Intrinsic::ppc_altivec_vcmpequh_p:
10490 CompareOpc = 70;
10491 isDot = true;
10492 break;
10493 case Intrinsic::ppc_altivec_vcmpequw_p:
10494 CompareOpc = 134;
10495 isDot = true;
10496 break;
10497 case Intrinsic::ppc_altivec_vcmpequd_p:
10498 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10499 CompareOpc = 199;
10500 isDot = true;
10501 } else
10502 return false;
10503 break;
10504 case Intrinsic::ppc_altivec_vcmpneb_p:
10505 case Intrinsic::ppc_altivec_vcmpneh_p:
10506 case Intrinsic::ppc_altivec_vcmpnew_p:
10507 case Intrinsic::ppc_altivec_vcmpnezb_p:
10508 case Intrinsic::ppc_altivec_vcmpnezh_p:
10509 case Intrinsic::ppc_altivec_vcmpnezw_p:
10510 if (Subtarget.hasP9Altivec()) {
10511 switch (IntrinsicID) {
10512 default:
10513 llvm_unreachable("Unknown comparison intrinsic.");
10514 case Intrinsic::ppc_altivec_vcmpneb_p:
10515 CompareOpc = 7;
10516 break;
10517 case Intrinsic::ppc_altivec_vcmpneh_p:
10518 CompareOpc = 71;
10519 break;
10520 case Intrinsic::ppc_altivec_vcmpnew_p:
10521 CompareOpc = 135;
10522 break;
10523 case Intrinsic::ppc_altivec_vcmpnezb_p:
10524 CompareOpc = 263;
10525 break;
10526 case Intrinsic::ppc_altivec_vcmpnezh_p:
10527 CompareOpc = 327;
10528 break;
10529 case Intrinsic::ppc_altivec_vcmpnezw_p:
10530 CompareOpc = 391;
10531 break;
10532 }
10533 isDot = true;
10534 } else
10535 return false;
10536 break;
10537 case Intrinsic::ppc_altivec_vcmpgefp_p:
10538 CompareOpc = 454;
10539 isDot = true;
10540 break;
10541 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10542 CompareOpc = 710;
10543 isDot = true;
10544 break;
10545 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10546 CompareOpc = 774;
10547 isDot = true;
10548 break;
10549 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10550 CompareOpc = 838;
10551 isDot = true;
10552 break;
10553 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10554 CompareOpc = 902;
10555 isDot = true;
10556 break;
10557 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10558 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10559 CompareOpc = 967;
10560 isDot = true;
10561 } else
10562 return false;
10563 break;
10564 case Intrinsic::ppc_altivec_vcmpgtub_p:
10565 CompareOpc = 518;
10566 isDot = true;
10567 break;
10568 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10569 CompareOpc = 582;
10570 isDot = true;
10571 break;
10572 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10573 CompareOpc = 646;
10574 isDot = true;
10575 break;
10576 case Intrinsic::ppc_altivec_vcmpgtud_p:
10577 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10578 CompareOpc = 711;
10579 isDot = true;
10580 } else
10581 return false;
10582 break;
10583
10584 case Intrinsic::ppc_altivec_vcmpequq:
10585 case Intrinsic::ppc_altivec_vcmpgtsq:
10586 case Intrinsic::ppc_altivec_vcmpgtuq:
10587 if (!Subtarget.isISA3_1())
10588 return false;
10589 switch (IntrinsicID) {
10590 default:
10591 llvm_unreachable("Unknown comparison intrinsic.");
10592 case Intrinsic::ppc_altivec_vcmpequq:
10593 CompareOpc = 455;
10594 break;
10595 case Intrinsic::ppc_altivec_vcmpgtsq:
10596 CompareOpc = 903;
10597 break;
10598 case Intrinsic::ppc_altivec_vcmpgtuq:
10599 CompareOpc = 647;
10600 break;
10601 }
10602 break;
10603
10604 // VSX predicate comparisons use the same infrastructure
10605 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10606 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10607 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10608 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10609 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10610 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10611 if (Subtarget.hasVSX()) {
10612 switch (IntrinsicID) {
10613 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10614 CompareOpc = 99;
10615 break;
10616 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10617 CompareOpc = 115;
10618 break;
10619 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10620 CompareOpc = 107;
10621 break;
10622 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10623 CompareOpc = 67;
10624 break;
10625 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10626 CompareOpc = 83;
10627 break;
10628 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10629 CompareOpc = 75;
10630 break;
10631 }
10632 isDot = true;
10633 } else
10634 return false;
10635 break;
10636
10637 // Normal Comparisons.
10638 case Intrinsic::ppc_altivec_vcmpbfp:
10639 CompareOpc = 966;
10640 break;
10641 case Intrinsic::ppc_altivec_vcmpeqfp:
10642 CompareOpc = 198;
10643 break;
10644 case Intrinsic::ppc_altivec_vcmpequb:
10645 CompareOpc = 6;
10646 break;
10647 case Intrinsic::ppc_altivec_vcmpequh:
10648 CompareOpc = 70;
10649 break;
10650 case Intrinsic::ppc_altivec_vcmpequw:
10651 CompareOpc = 134;
10652 break;
10653 case Intrinsic::ppc_altivec_vcmpequd:
10654 if (Subtarget.hasP8Altivec())
10655 CompareOpc = 199;
10656 else
10657 return false;
10658 break;
10659 case Intrinsic::ppc_altivec_vcmpneb:
10660 case Intrinsic::ppc_altivec_vcmpneh:
10661 case Intrinsic::ppc_altivec_vcmpnew:
10662 case Intrinsic::ppc_altivec_vcmpnezb:
10663 case Intrinsic::ppc_altivec_vcmpnezh:
10664 case Intrinsic::ppc_altivec_vcmpnezw:
10665 if (Subtarget.hasP9Altivec())
10666 switch (IntrinsicID) {
10667 default:
10668 llvm_unreachable("Unknown comparison intrinsic.");
10669 case Intrinsic::ppc_altivec_vcmpneb:
10670 CompareOpc = 7;
10671 break;
10672 case Intrinsic::ppc_altivec_vcmpneh:
10673 CompareOpc = 71;
10674 break;
10675 case Intrinsic::ppc_altivec_vcmpnew:
10676 CompareOpc = 135;
10677 break;
10678 case Intrinsic::ppc_altivec_vcmpnezb:
10679 CompareOpc = 263;
10680 break;
10681 case Intrinsic::ppc_altivec_vcmpnezh:
10682 CompareOpc = 327;
10683 break;
10684 case Intrinsic::ppc_altivec_vcmpnezw:
10685 CompareOpc = 391;
10686 break;
10687 }
10688 else
10689 return false;
10690 break;
10691 case Intrinsic::ppc_altivec_vcmpgefp:
10692 CompareOpc = 454;
10693 break;
10694 case Intrinsic::ppc_altivec_vcmpgtfp:
10695 CompareOpc = 710;
10696 break;
10697 case Intrinsic::ppc_altivec_vcmpgtsb:
10698 CompareOpc = 774;
10699 break;
10700 case Intrinsic::ppc_altivec_vcmpgtsh:
10701 CompareOpc = 838;
10702 break;
10703 case Intrinsic::ppc_altivec_vcmpgtsw:
10704 CompareOpc = 902;
10705 break;
10706 case Intrinsic::ppc_altivec_vcmpgtsd:
10707 if (Subtarget.hasP8Altivec())
10708 CompareOpc = 967;
10709 else
10710 return false;
10711 break;
10712 case Intrinsic::ppc_altivec_vcmpgtub:
10713 CompareOpc = 518;
10714 break;
10715 case Intrinsic::ppc_altivec_vcmpgtuh:
10716 CompareOpc = 582;
10717 break;
10718 case Intrinsic::ppc_altivec_vcmpgtuw:
10719 CompareOpc = 646;
10720 break;
10721 case Intrinsic::ppc_altivec_vcmpgtud:
10722 if (Subtarget.hasP8Altivec())
10723 CompareOpc = 711;
10724 else
10725 return false;
10726 break;
10727 case Intrinsic::ppc_altivec_vcmpequq_p:
10728 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10729 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10730 if (!Subtarget.isISA3_1())
10731 return false;
10732 switch (IntrinsicID) {
10733 default:
10734 llvm_unreachable("Unknown comparison intrinsic.");
10735 case Intrinsic::ppc_altivec_vcmpequq_p:
10736 CompareOpc = 455;
10737 break;
10738 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10739 CompareOpc = 903;
10740 break;
10741 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10742 CompareOpc = 647;
10743 break;
10744 }
10745 isDot = true;
10746 break;
10747 }
10748 return true;
10749}
10750
10751/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10752/// lower, do it, otherwise return null.
10753SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10754 SelectionDAG &DAG) const {
10755 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10756
10757 SDLoc dl(Op);
10758
10759 switch (IntrinsicID) {
10760 case Intrinsic::thread_pointer:
10761 // Reads the thread pointer register, used for __builtin_thread_pointer.
10762 if (Subtarget.isPPC64())
10763 return DAG.getRegister(PPC::X13, MVT::i64);
10764 return DAG.getRegister(PPC::R2, MVT::i32);
10765
10766 case Intrinsic::ppc_rldimi: {
10767 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10768 SDValue Src = Op.getOperand(1);
10769 APInt Mask = Op.getConstantOperandAPInt(4);
10770 if (Mask.isZero())
10771 return Op.getOperand(2);
10772 if (Mask.isAllOnes())
10773 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10774 uint64_t SH = Op.getConstantOperandVal(3);
10775 unsigned MB = 0, ME = 0;
10776 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10777 report_fatal_error("invalid rldimi mask!");
10778 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10779 if (ME < 63 - SH) {
10780 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10781 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10782 } else if (ME > 63 - SH) {
10783 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10784 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10785 }
10786 return SDValue(
10787 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10788 {Op.getOperand(2), Src,
10789 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10790 DAG.getTargetConstant(MB, dl, MVT::i32)}),
10791 0);
10792 }
10793
10794 case Intrinsic::ppc_rlwimi: {
10795 APInt Mask = Op.getConstantOperandAPInt(4);
10796 if (Mask.isZero())
10797 return Op.getOperand(2);
10798 if (Mask.isAllOnes())
10799 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10800 Op.getOperand(3));
10801 unsigned MB = 0, ME = 0;
10802 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10803 report_fatal_error("invalid rlwimi mask!");
10804 return SDValue(DAG.getMachineNode(
10805 PPC::RLWIMI, dl, MVT::i32,
10806 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10807 DAG.getTargetConstant(MB, dl, MVT::i32),
10808 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10809 0);
10810 }
10811
10812 case Intrinsic::ppc_rlwnm: {
10813 if (Op.getConstantOperandVal(3) == 0)
10814 return DAG.getConstant(0, dl, MVT::i32);
10815 unsigned MB = 0, ME = 0;
10816 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10817 report_fatal_error("invalid rlwnm mask!");
10818 return SDValue(
10819 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10820 {Op.getOperand(1), Op.getOperand(2),
10821 DAG.getTargetConstant(MB, dl, MVT::i32),
10822 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10823 0);
10824 }
10825
10826 case Intrinsic::ppc_mma_disassemble_acc: {
10827 if (Subtarget.isISAFuture()) {
10828 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10829 SDValue WideVec = SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
10830 ArrayRef(ReturnTypes, 2),
10831 Op.getOperand(1)),
10832 0);
10834 SDValue Value = SDValue(WideVec.getNode(), 0);
10835 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10836
10837 SDValue Extract;
10838 Extract = DAG.getNode(
10839 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10840 Subtarget.isLittleEndian() ? Value2 : Value,
10841 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10842 dl, getPointerTy(DAG.getDataLayout())));
10843 RetOps.push_back(Extract);
10844 Extract = DAG.getNode(
10845 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10846 Subtarget.isLittleEndian() ? Value2 : Value,
10847 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10848 dl, getPointerTy(DAG.getDataLayout())));
10849 RetOps.push_back(Extract);
10850 Extract = DAG.getNode(
10851 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10852 Subtarget.isLittleEndian() ? Value : Value2,
10853 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10854 dl, getPointerTy(DAG.getDataLayout())));
10855 RetOps.push_back(Extract);
10856 Extract = DAG.getNode(
10857 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10858 Subtarget.isLittleEndian() ? Value : Value2,
10859 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10860 dl, getPointerTy(DAG.getDataLayout())));
10861 RetOps.push_back(Extract);
10862 return DAG.getMergeValues(RetOps, dl);
10863 }
10864 [[fallthrough]];
10865 }
10866 case Intrinsic::ppc_vsx_disassemble_pair: {
10867 int NumVecs = 2;
10868 SDValue WideVec = Op.getOperand(1);
10869 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10870 NumVecs = 4;
10871 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10872 }
10874 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10875 SDValue Extract = DAG.getNode(
10876 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10877 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10878 : VecNo,
10879 dl, getPointerTy(DAG.getDataLayout())));
10880 RetOps.push_back(Extract);
10881 }
10882 return DAG.getMergeValues(RetOps, dl);
10883 }
10884
10885 case Intrinsic::ppc_mma_xxmfacc:
10886 case Intrinsic::ppc_mma_xxmtacc: {
10887 // Allow pre-isa-future subtargets to lower as normal.
10888 if (!Subtarget.isISAFuture())
10889 return SDValue();
10890 // The intrinsics for xxmtacc and xxmfacc take one argument of
10891 // type v512i1, for future cpu the corresponding wacc instruction
10892 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
10893 // the need to produce the xxm[t|f]acc.
10894 SDValue WideVec = Op.getOperand(1);
10895 DAG.ReplaceAllUsesWith(Op, WideVec);
10896 return SDValue();
10897 }
10898
10899 case Intrinsic::ppc_unpack_longdouble: {
10900 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10901 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10902 "Argument of long double unpack must be 0 or 1!");
10903 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10904 DAG.getConstant(!!(Idx->getSExtValue()), dl,
10905 Idx->getValueType(0)));
10906 }
10907
10908 case Intrinsic::ppc_compare_exp_lt:
10909 case Intrinsic::ppc_compare_exp_gt:
10910 case Intrinsic::ppc_compare_exp_eq:
10911 case Intrinsic::ppc_compare_exp_uo: {
10912 unsigned Pred;
10913 switch (IntrinsicID) {
10914 case Intrinsic::ppc_compare_exp_lt:
10915 Pred = PPC::PRED_LT;
10916 break;
10917 case Intrinsic::ppc_compare_exp_gt:
10918 Pred = PPC::PRED_GT;
10919 break;
10920 case Intrinsic::ppc_compare_exp_eq:
10921 Pred = PPC::PRED_EQ;
10922 break;
10923 case Intrinsic::ppc_compare_exp_uo:
10924 Pred = PPC::PRED_UN;
10925 break;
10926 }
10927 return SDValue(
10928 DAG.getMachineNode(
10929 PPC::SELECT_CC_I4, dl, MVT::i32,
10930 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10931 Op.getOperand(1), Op.getOperand(2)),
10932 0),
10933 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10934 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10935 0);
10936 }
10937 case Intrinsic::ppc_test_data_class: {
10938 EVT OpVT = Op.getOperand(1).getValueType();
10939 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
10940 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
10941 : PPC::XSTSTDCSP);
10942 return SDValue(
10943 DAG.getMachineNode(
10944 PPC::SELECT_CC_I4, dl, MVT::i32,
10945 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10946 Op.getOperand(1)),
10947 0),
10948 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10949 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10950 0);
10951 }
10952 case Intrinsic::ppc_fnmsub: {
10953 EVT VT = Op.getOperand(1).getValueType();
10954 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
10955 return DAG.getNode(
10956 ISD::FNEG, dl, VT,
10957 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
10958 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
10959 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
10960 Op.getOperand(2), Op.getOperand(3));
10961 }
10962 case Intrinsic::ppc_convert_f128_to_ppcf128:
10963 case Intrinsic::ppc_convert_ppcf128_to_f128: {
10964 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10965 ? RTLIB::CONVERT_PPCF128_F128
10966 : RTLIB::CONVERT_F128_PPCF128;
10967 MakeLibCallOptions CallOptions;
10968 std::pair<SDValue, SDValue> Result =
10969 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
10970 dl, SDValue());
10971 return Result.first;
10972 }
10973 case Intrinsic::ppc_maxfe:
10974 case Intrinsic::ppc_maxfl:
10975 case Intrinsic::ppc_maxfs:
10976 case Intrinsic::ppc_minfe:
10977 case Intrinsic::ppc_minfl:
10978 case Intrinsic::ppc_minfs: {
10979 EVT VT = Op.getValueType();
10980 assert(
10981 all_of(Op->ops().drop_front(4),
10982 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
10983 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
10984 (void)VT;
10986 if (IntrinsicID == Intrinsic::ppc_minfe ||
10987 IntrinsicID == Intrinsic::ppc_minfl ||
10988 IntrinsicID == Intrinsic::ppc_minfs)
10989 CC = ISD::SETLT;
10990 unsigned I = Op.getNumOperands() - 2, Cnt = I;
10991 SDValue Res = Op.getOperand(I);
10992 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
10993 Res =
10994 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
10995 }
10996 return Res;
10997 }
10998 }
10999
11000 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11001 // opcode number of the comparison.
11002 int CompareOpc;
11003 bool isDot;
11004 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11005 return SDValue(); // Don't custom lower most intrinsics.
11006
11007 // If this is a non-dot comparison, make the VCMP node and we are done.
11008 if (!isDot) {
11009 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11010 Op.getOperand(1), Op.getOperand(2),
11011 DAG.getConstant(CompareOpc, dl, MVT::i32));
11012 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11013 }
11014
11015 // Create the PPCISD altivec 'dot' comparison node.
11016 SDValue Ops[] = {
11017 Op.getOperand(2), // LHS
11018 Op.getOperand(3), // RHS
11019 DAG.getConstant(CompareOpc, dl, MVT::i32)
11020 };
11021 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11022 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11023
11024 // Now that we have the comparison, emit a copy from the CR to a GPR.
11025 // This is flagged to the above dot comparison.
11026 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11027 DAG.getRegister(PPC::CR6, MVT::i32),
11028 CompNode.getValue(1));
11029
11030 // Unpack the result based on how the target uses it.
11031 unsigned BitNo; // Bit # of CR6.
11032 bool InvertBit; // Invert result?
11033 switch (Op.getConstantOperandVal(1)) {
11034 default: // Can't happen, don't crash on invalid number though.
11035 case 0: // Return the value of the EQ bit of CR6.
11036 BitNo = 0; InvertBit = false;
11037 break;
11038 case 1: // Return the inverted value of the EQ bit of CR6.
11039 BitNo = 0; InvertBit = true;
11040 break;
11041 case 2: // Return the value of the LT bit of CR6.
11042 BitNo = 2; InvertBit = false;
11043 break;
11044 case 3: // Return the inverted value of the LT bit of CR6.
11045 BitNo = 2; InvertBit = true;
11046 break;
11047 }
11048
11049 // Shift the bit into the low position.
11050 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11051 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11052 // Isolate the bit.
11053 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11054 DAG.getConstant(1, dl, MVT::i32));
11055
11056 // If we are supposed to, toggle the bit.
11057 if (InvertBit)
11058 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11059 DAG.getConstant(1, dl, MVT::i32));
11060 return Flags;
11061}
11062
11063SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11064 SelectionDAG &DAG) const {
11065 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11066 // the beginning of the argument list.
11067 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11068 SDLoc DL(Op);
11069 switch (Op.getConstantOperandVal(ArgStart)) {
11070 case Intrinsic::ppc_cfence: {
11071 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11072 SDValue Val = Op.getOperand(ArgStart + 1);
11073 EVT Ty = Val.getValueType();
11074 if (Ty == MVT::i128) {
11075 // FIXME: Testing one of two paired registers is sufficient to guarantee
11076 // ordering?
11077 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11078 }
11079 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11080 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11081 return SDValue(
11082 DAG.getMachineNode(Opcode, DL, MVT::Other,
11083 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11084 Op.getOperand(0)),
11085 0);
11086 }
11087 default:
11088 break;
11089 }
11090 return SDValue();
11091}
11092
11093// Lower scalar BSWAP64 to xxbrd.
11094SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11095 SDLoc dl(Op);
11096 if (!Subtarget.isPPC64())
11097 return Op;
11098 // MTVSRDD
11099 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11100 Op.getOperand(0));
11101 // XXBRD
11102 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11103 // MFVSRD
11104 int VectorIndex = 0;
11105 if (Subtarget.isLittleEndian())
11106 VectorIndex = 1;
11107 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11108 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11109 return Op;
11110}
11111
11112// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11113// compared to a value that is atomically loaded (atomic loads zero-extend).
11114SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11115 SelectionDAG &DAG) const {
11116 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11117 "Expecting an atomic compare-and-swap here.");
11118 SDLoc dl(Op);
11119 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11120 EVT MemVT = AtomicNode->getMemoryVT();
11121 if (MemVT.getSizeInBits() >= 32)
11122 return Op;
11123
11124 SDValue CmpOp = Op.getOperand(2);
11125 // If this is already correctly zero-extended, leave it alone.
11126 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11127 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11128 return Op;
11129
11130 // Clear the high bits of the compare operand.
11131 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11132 SDValue NewCmpOp =
11133 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11134 DAG.getConstant(MaskVal, dl, MVT::i32));
11135
11136 // Replace the existing compare operand with the properly zero-extended one.
11138 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11139 Ops.push_back(AtomicNode->getOperand(i));
11140 Ops[2] = NewCmpOp;
11141 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11142 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11143 auto NodeTy =
11145 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11146}
11147
11148SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11149 SelectionDAG &DAG) const {
11150 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11151 EVT MemVT = N->getMemoryVT();
11152 assert(MemVT.getSimpleVT() == MVT::i128 &&
11153 "Expect quadword atomic operations");
11154 SDLoc dl(N);
11155 unsigned Opc = N->getOpcode();
11156 switch (Opc) {
11157 case ISD::ATOMIC_LOAD: {
11158 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11159 // lowered to ppc instructions by pattern matching instruction selector.
11160 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11162 N->getOperand(0),
11163 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11164 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11165 Ops.push_back(N->getOperand(I));
11166 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11167 Ops, MemVT, N->getMemOperand());
11168 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11169 SDValue ValHi =
11170 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11171 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11172 DAG.getConstant(64, dl, MVT::i32));
11173 SDValue Val =
11174 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11175 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11176 {Val, LoadedVal.getValue(2)});
11177 }
11178 case ISD::ATOMIC_STORE: {
11179 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11180 // lowered to ppc instructions by pattern matching instruction selector.
11181 SDVTList Tys = DAG.getVTList(MVT::Other);
11183 N->getOperand(0),
11184 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11185 SDValue Val = N->getOperand(1);
11186 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11187 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11188 DAG.getConstant(64, dl, MVT::i32));
11189 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11190 Ops.push_back(ValLo);
11191 Ops.push_back(ValHi);
11192 Ops.push_back(N->getOperand(2));
11193 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11194 N->getMemOperand());
11195 }
11196 default:
11197 llvm_unreachable("Unexpected atomic opcode");
11198 }
11199}
11200
11202 SelectionDAG &DAG,
11203 const PPCSubtarget &Subtarget) {
11204 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11205
11206 enum DataClassMask {
11207 DC_NAN = 1 << 6,
11208 DC_NEG_INF = 1 << 4,
11209 DC_POS_INF = 1 << 5,
11210 DC_NEG_ZERO = 1 << 2,
11211 DC_POS_ZERO = 1 << 3,
11212 DC_NEG_SUBNORM = 1,
11213 DC_POS_SUBNORM = 1 << 1,
11214 };
11215
11216 EVT VT = Op.getValueType();
11217
11218 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11219 : VT == MVT::f64 ? PPC::XSTSTDCDP
11220 : PPC::XSTSTDCSP;
11221
11222 if (Mask == fcAllFlags)
11223 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11224 if (Mask == 0)
11225 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11226
11227 // When it's cheaper or necessary to test reverse flags.
11228 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11229 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11230 return DAG.getNOT(Dl, Rev, MVT::i1);
11231 }
11232
11233 // Power doesn't support testing whether a value is 'normal'. Test the rest
11234 // first, and test if it's 'not not-normal' with expected sign.
11235 if (Mask & fcNormal) {
11236 SDValue Rev(DAG.getMachineNode(
11237 TestOp, Dl, MVT::i32,
11238 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11239 DC_NEG_ZERO | DC_POS_ZERO |
11240 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11241 Dl, MVT::i32),
11242 Op),
11243 0);
11244 // Sign are stored in CR bit 0, result are in CR bit 2.
11245 SDValue Sign(
11246 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11247 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11248 0);
11249 SDValue Normal(DAG.getNOT(
11250 Dl,
11252 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11253 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11254 0),
11255 MVT::i1));
11256 if (Mask & fcPosNormal)
11257 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11258 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11259 if (Mask == fcPosNormal || Mask == fcNegNormal)
11260 return Result;
11261
11262 return DAG.getNode(
11263 ISD::OR, Dl, MVT::i1,
11264 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11265 }
11266
11267 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11268 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11269 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11270 bool IsQuiet = Mask & fcQNan;
11271 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11272
11273 // Quietness is determined by the first bit in fraction field.
11274 uint64_t QuietMask = 0;
11275 SDValue HighWord;
11276 if (VT == MVT::f128) {
11277 HighWord = DAG.getNode(
11278 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11279 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11280 QuietMask = 0x8000;
11281 } else if (VT == MVT::f64) {
11282 if (Subtarget.isPPC64()) {
11283 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11284 DAG.getBitcast(MVT::i64, Op),
11285 DAG.getConstant(1, Dl, MVT::i32));
11286 } else {
11287 SDValue Vec = DAG.getBitcast(
11288 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11289 HighWord = DAG.getNode(
11290 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11291 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11292 }
11293 QuietMask = 0x80000;
11294 } else if (VT == MVT::f32) {
11295 HighWord = DAG.getBitcast(MVT::i32, Op);
11296 QuietMask = 0x400000;
11297 }
11298 SDValue NanRes = DAG.getSetCC(
11299 Dl, MVT::i1,
11300 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11301 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11302 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11303 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11304 if (Mask == fcQNan || Mask == fcSNan)
11305 return NanRes;
11306
11307 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11308 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11309 NanRes);
11310 }
11311
11312 unsigned NativeMask = 0;
11313 if ((Mask & fcNan) == fcNan)
11314 NativeMask |= DC_NAN;
11315 if (Mask & fcNegInf)
11316 NativeMask |= DC_NEG_INF;
11317 if (Mask & fcPosInf)
11318 NativeMask |= DC_POS_INF;
11319 if (Mask & fcNegZero)
11320 NativeMask |= DC_NEG_ZERO;
11321 if (Mask & fcPosZero)
11322 NativeMask |= DC_POS_ZERO;
11323 if (Mask & fcNegSubnormal)
11324 NativeMask |= DC_NEG_SUBNORM;
11325 if (Mask & fcPosSubnormal)
11326 NativeMask |= DC_POS_SUBNORM;
11327 return SDValue(
11328 DAG.getMachineNode(
11329 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11331 TestOp, Dl, MVT::i32,
11332 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11333 0),
11334 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11335 0);
11336}
11337
11338SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11339 SelectionDAG &DAG) const {
11340 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11341 SDValue LHS = Op.getOperand(0);
11342 uint64_t RHSC = Op.getConstantOperandVal(1);
11343 SDLoc Dl(Op);
11344 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11345 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11346}
11347
11348SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11349 SelectionDAG &DAG) const {
11350 SDLoc dl(Op);
11351 // Create a stack slot that is 16-byte aligned.
11353 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11354 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11355 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11356
11357 // Store the input value into Value#0 of the stack slot.
11358 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11360 // Load it out.
11361 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11362}
11363
11364SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11365 SelectionDAG &DAG) const {
11366 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11367 "Should only be called for ISD::INSERT_VECTOR_ELT");
11368
11369 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11370
11371 EVT VT = Op.getValueType();
11372 SDLoc dl(Op);
11373 SDValue V1 = Op.getOperand(0);
11374 SDValue V2 = Op.getOperand(1);
11375
11376 if (VT == MVT::v2f64 && C)
11377 return Op;
11378
11379 if (Subtarget.hasP9Vector()) {
11380 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11381 // because on P10, it allows this specific insert_vector_elt load pattern to
11382 // utilize the refactored load and store infrastructure in order to exploit
11383 // prefixed loads.
11384 // On targets with inexpensive direct moves (Power9 and up), a
11385 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11386 // load since a single precision load will involve conversion to double
11387 // precision on the load followed by another conversion to single precision.
11388 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11389 (isa<LoadSDNode>(V2))) {
11390 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11391 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11392 SDValue InsVecElt =
11393 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11394 BitcastLoad, Op.getOperand(2));
11395 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11396 }
11397 }
11398
11399 if (Subtarget.isISA3_1()) {
11400 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11401 return SDValue();
11402 // On P10, we have legal lowering for constant and variable indices for
11403 // all vectors.
11404 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11405 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11406 return Op;
11407 }
11408
11409 // Before P10, we have legal lowering for constant indices but not for
11410 // variable ones.
11411 if (!C)
11412 return SDValue();
11413
11414 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11415 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11416 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11417 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11418 unsigned InsertAtElement = C->getZExtValue();
11419 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11420 if (Subtarget.isLittleEndian()) {
11421 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11422 }
11423 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11424 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11425 }
11426 return Op;
11427}
11428
11429SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11430 SelectionDAG &DAG) const {
11431 SDLoc dl(Op);
11432 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11433 SDValue LoadChain = LN->getChain();
11434 SDValue BasePtr = LN->getBasePtr();
11435 EVT VT = Op.getValueType();
11436
11437 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11438 return Op;
11439
11440 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11441 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11442 // 2 or 4 vsx registers.
11443 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11444 "Type unsupported without MMA");
11445 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11446 "Type unsupported without paired vector support");
11447 Align Alignment = LN->getAlign();
11449 SmallVector<SDValue, 4> LoadChains;
11450 unsigned NumVecs = VT.getSizeInBits() / 128;
11451 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11452 SDValue Load =
11453 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11454 LN->getPointerInfo().getWithOffset(Idx * 16),
11455 commonAlignment(Alignment, Idx * 16),
11456 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11457 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11458 DAG.getConstant(16, dl, BasePtr.getValueType()));
11459 Loads.push_back(Load);
11460 LoadChains.push_back(Load.getValue(1));
11461 }
11462 if (Subtarget.isLittleEndian()) {
11463 std::reverse(Loads.begin(), Loads.end());
11464 std::reverse(LoadChains.begin(), LoadChains.end());
11465 }
11466 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11467 SDValue Value =
11468 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11469 dl, VT, Loads);
11470 SDValue RetOps[] = {Value, TF};
11471 return DAG.getMergeValues(RetOps, dl);
11472}
11473
11474SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11475 SelectionDAG &DAG) const {
11476 SDLoc dl(Op);
11477 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11478 SDValue StoreChain = SN->getChain();
11479 SDValue BasePtr = SN->getBasePtr();
11480 SDValue Value = SN->getValue();
11481 SDValue Value2 = SN->getValue();
11482 EVT StoreVT = Value.getValueType();
11483
11484 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11485 return Op;
11486
11487 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11488 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11489 // underlying registers individually.
11490 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11491 "Type unsupported without MMA");
11492 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11493 "Type unsupported without paired vector support");
11494 Align Alignment = SN->getAlign();
11496 unsigned NumVecs = 2;
11497 if (StoreVT == MVT::v512i1) {
11498 if (Subtarget.isISAFuture()) {
11499 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11500 MachineSDNode *ExtNode = DAG.getMachineNode(
11501 PPC::DMXXEXTFDMR512, dl, ArrayRef(ReturnTypes, 2), Op.getOperand(1));
11502
11503 Value = SDValue(ExtNode, 0);
11504 Value2 = SDValue(ExtNode, 1);
11505 } else
11506 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11507 NumVecs = 4;
11508 }
11509 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11510 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11511 SDValue Elt;
11512 if (Subtarget.isISAFuture()) {
11513 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11514 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11515 Idx > 1 ? Value2 : Value,
11516 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11517 } else
11518 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11519 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11520
11521 SDValue Store =
11522 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11523 SN->getPointerInfo().getWithOffset(Idx * 16),
11524 commonAlignment(Alignment, Idx * 16),
11525 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11526 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11527 DAG.getConstant(16, dl, BasePtr.getValueType()));
11528 Stores.push_back(Store);
11529 }
11530 SDValue TF = DAG.getTokenFactor(dl, Stores);
11531 return TF;
11532}
11533
11534SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11535 SDLoc dl(Op);
11536 if (Op.getValueType() == MVT::v4i32) {
11537 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11538
11539 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11540 // +16 as shift amt.
11541 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11542 SDValue RHSSwap = // = vrlw RHS, 16
11543 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11544
11545 // Shrinkify inputs to v8i16.
11546 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11547 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11548 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11549
11550 // Low parts multiplied together, generating 32-bit results (we ignore the
11551 // top parts).
11552 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11553 LHS, RHS, DAG, dl, MVT::v4i32);
11554
11555 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11556 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11557 // Shift the high parts up 16 bits.
11558 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11559 Neg16, DAG, dl);
11560 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11561 } else if (Op.getValueType() == MVT::v16i8) {
11562 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11563 bool isLittleEndian = Subtarget.isLittleEndian();
11564
11565 // Multiply the even 8-bit parts, producing 16-bit sums.
11566 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11567 LHS, RHS, DAG, dl, MVT::v8i16);
11568 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11569
11570 // Multiply the odd 8-bit parts, producing 16-bit sums.
11571 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11572 LHS, RHS, DAG, dl, MVT::v8i16);
11573 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11574
11575 // Merge the results together. Because vmuleub and vmuloub are
11576 // instructions with a big-endian bias, we must reverse the
11577 // element numbering and reverse the meaning of "odd" and "even"
11578 // when generating little endian code.
11579 int Ops[16];
11580 for (unsigned i = 0; i != 8; ++i) {
11581 if (isLittleEndian) {
11582 Ops[i*2 ] = 2*i;
11583 Ops[i*2+1] = 2*i+16;
11584 } else {
11585 Ops[i*2 ] = 2*i+1;
11586 Ops[i*2+1] = 2*i+1+16;
11587 }
11588 }
11589 if (isLittleEndian)
11590 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11591 else
11592 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11593 } else {
11594 llvm_unreachable("Unknown mul to lower!");
11595 }
11596}
11597
11598SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11599 bool IsStrict = Op->isStrictFPOpcode();
11600 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11601 !Subtarget.hasP9Vector())
11602 return SDValue();
11603
11604 return Op;
11605}
11606
11607// Custom lowering for fpext vf32 to v2f64
11608SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11609
11610 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11611 "Should only be called for ISD::FP_EXTEND");
11612
11613 // FIXME: handle extends from half precision float vectors on P9.
11614 // We only want to custom lower an extend from v2f32 to v2f64.
11615 if (Op.getValueType() != MVT::v2f64 ||
11616 Op.getOperand(0).getValueType() != MVT::v2f32)
11617 return SDValue();
11618
11619 SDLoc dl(Op);
11620 SDValue Op0 = Op.getOperand(0);
11621
11622 switch (Op0.getOpcode()) {
11623 default:
11624 return SDValue();
11626 assert(Op0.getNumOperands() == 2 &&
11627 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11628 "Node should have 2 operands with second one being a constant!");
11629
11630 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11631 return SDValue();
11632
11633 // Custom lower is only done for high or low doubleword.
11634 int Idx = Op0.getConstantOperandVal(1);
11635 if (Idx % 2 != 0)
11636 return SDValue();
11637
11638 // Since input is v4f32, at this point Idx is either 0 or 2.
11639 // Shift to get the doubleword position we want.
11640 int DWord = Idx >> 1;
11641
11642 // High and low word positions are different on little endian.
11643 if (Subtarget.isLittleEndian())
11644 DWord ^= 0x1;
11645
11646 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11647 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11648 }
11649 case ISD::FADD:
11650 case ISD::FMUL:
11651 case ISD::FSUB: {
11652 SDValue NewLoad[2];
11653 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11654 // Ensure both input are loads.
11655 SDValue LdOp = Op0.getOperand(i);
11656 if (LdOp.getOpcode() != ISD::LOAD)
11657 return SDValue();
11658 // Generate new load node.
11659 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11660 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11661 NewLoad[i] = DAG.getMemIntrinsicNode(
11662 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11663 LD->getMemoryVT(), LD->getMemOperand());
11664 }
11665 SDValue NewOp =
11666 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11667 NewLoad[1], Op0.getNode()->getFlags());
11668 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11669 DAG.getConstant(0, dl, MVT::i32));
11670 }
11671 case ISD::LOAD: {
11672 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11673 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11674 SDValue NewLd = DAG.getMemIntrinsicNode(
11675 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11676 LD->getMemoryVT(), LD->getMemOperand());
11677 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11678 DAG.getConstant(0, dl, MVT::i32));
11679 }
11680 }
11681 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11682}
11683
11684/// LowerOperation - Provide custom lowering hooks for some operations.
11685///
11687 switch (Op.getOpcode()) {
11688 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11689 case ISD::FPOW: return lowerPow(Op, DAG);
11690 case ISD::FSIN: return lowerSin(Op, DAG);
11691 case ISD::FCOS: return lowerCos(Op, DAG);
11692 case ISD::FLOG: return lowerLog(Op, DAG);
11693 case ISD::FLOG10: return lowerLog10(Op, DAG);
11694 case ISD::FEXP: return lowerExp(Op, DAG);
11695 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11696 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11697 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11698 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11699 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11700 case ISD::STRICT_FSETCC:
11702 case ISD::SETCC: return LowerSETCC(Op, DAG);
11703 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11704 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11705
11706 case ISD::INLINEASM:
11707 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11708 // Variable argument lowering.
11709 case ISD::VASTART: return LowerVASTART(Op, DAG);
11710 case ISD::VAARG: return LowerVAARG(Op, DAG);
11711 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11712
11713 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11714 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11716 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11717
11718 // Exception handling lowering.
11719 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11720 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11721 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11722
11723 case ISD::LOAD: return LowerLOAD(Op, DAG);
11724 case ISD::STORE: return LowerSTORE(Op, DAG);
11725 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11726 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11729 case ISD::FP_TO_UINT:
11730 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11733 case ISD::UINT_TO_FP:
11734 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11735 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11736
11737 // Lower 64-bit shifts.
11738 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11739 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11740 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11741
11742 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11743 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11744
11745 // Vector-related lowering.
11746 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11747 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11748 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11749 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11750 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11751 case ISD::MUL: return LowerMUL(Op, DAG);
11752 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11754 case ISD::FP_ROUND:
11755 return LowerFP_ROUND(Op, DAG);
11756 case ISD::ROTL: return LowerROTL(Op, DAG);
11757
11758 // For counter-based loop handling.
11759 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11760
11761 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11762
11763 // Frame & Return address.
11764 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11765 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11766
11768 return LowerINTRINSIC_VOID(Op, DAG);
11769 case ISD::BSWAP:
11770 return LowerBSWAP(Op, DAG);
11772 return LowerATOMIC_CMP_SWAP(Op, DAG);
11773 case ISD::ATOMIC_STORE:
11774 return LowerATOMIC_LOAD_STORE(Op, DAG);
11775 case ISD::IS_FPCLASS:
11776 return LowerIS_FPCLASS(Op, DAG);
11777 }
11778}
11779
11782 SelectionDAG &DAG) const {
11783 SDLoc dl(N);
11784 switch (N->getOpcode()) {
11785 default:
11786 llvm_unreachable("Do not know how to custom type legalize this operation!");
11787 case ISD::ATOMIC_LOAD: {
11788 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11789 Results.push_back(Res);
11790 Results.push_back(Res.getValue(1));
11791 break;
11792 }
11793 case ISD::READCYCLECOUNTER: {
11794 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11795 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11796
11797 Results.push_back(
11798 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11799 Results.push_back(RTB.getValue(2));
11800 break;
11801 }
11803 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11804 break;
11805
11806 assert(N->getValueType(0) == MVT::i1 &&
11807 "Unexpected result type for CTR decrement intrinsic");
11808 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11809 N->getValueType(0));
11810 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11811 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11812 N->getOperand(1));
11813
11814 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11815 Results.push_back(NewInt.getValue(1));
11816 break;
11817 }
11819 switch (N->getConstantOperandVal(0)) {
11820 case Intrinsic::ppc_pack_longdouble:
11821 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11822 N->getOperand(2), N->getOperand(1)));
11823 break;
11824 case Intrinsic::ppc_maxfe:
11825 case Intrinsic::ppc_minfe:
11826 case Intrinsic::ppc_fnmsub:
11827 case Intrinsic::ppc_convert_f128_to_ppcf128:
11828 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11829 break;
11830 }
11831 break;
11832 }
11833 case ISD::VAARG: {
11834 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11835 return;
11836
11837 EVT VT = N->getValueType(0);
11838
11839 if (VT == MVT::i64) {
11840 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11841
11842 Results.push_back(NewNode);
11843 Results.push_back(NewNode.getValue(1));
11844 }
11845 return;
11846 }
11849 case ISD::FP_TO_SINT:
11850 case ISD::FP_TO_UINT: {
11851 // LowerFP_TO_INT() can only handle f32 and f64.
11852 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11853 MVT::ppcf128)
11854 return;
11855 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11856 Results.push_back(LoweredValue);
11857 if (N->isStrictFPOpcode())
11858 Results.push_back(LoweredValue.getValue(1));
11859 return;
11860 }
11861 case ISD::TRUNCATE: {
11862 if (!N->getValueType(0).isVector())
11863 return;
11864 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11865 if (Lowered)
11866 Results.push_back(Lowered);
11867 return;
11868 }
11869 case ISD::FSHL:
11870 case ISD::FSHR:
11871 // Don't handle funnel shifts here.
11872 return;
11873 case ISD::BITCAST:
11874 // Don't handle bitcast here.
11875 return;
11876 case ISD::FP_EXTEND:
11877 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11878 if (Lowered)
11879 Results.push_back(Lowered);
11880 return;
11881 }
11882}
11883
11884//===----------------------------------------------------------------------===//
11885// Other Lowering Code
11886//===----------------------------------------------------------------------===//
11887
11889 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11890 Function *Func = Intrinsic::getDeclaration(M, Id);
11891 return Builder.CreateCall(Func, {});
11892}
11893
11894// The mappings for emitLeading/TrailingFence is taken from
11895// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11897 Instruction *Inst,
11898 AtomicOrdering Ord) const {
11900 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11901 if (isReleaseOrStronger(Ord))
11902 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11903 return nullptr;
11904}
11905
11907 Instruction *Inst,
11908 AtomicOrdering Ord) const {
11909 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11910 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11911 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11912 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11913 if (isa<LoadInst>(Inst))
11914 return Builder.CreateCall(
11916 Builder.GetInsertBlock()->getParent()->getParent(),
11917 Intrinsic::ppc_cfence, {Inst->getType()}),
11918 {Inst});
11919 // FIXME: Can use isync for rmw operation.
11920 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11921 }
11922 return nullptr;
11923}
11924
11927 unsigned AtomicSize,
11928 unsigned BinOpcode,
11929 unsigned CmpOpcode,
11930 unsigned CmpPred) const {
11931 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11932 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11933
11934 auto LoadMnemonic = PPC::LDARX;
11935 auto StoreMnemonic = PPC::STDCX;
11936 switch (AtomicSize) {
11937 default:
11938 llvm_unreachable("Unexpected size of atomic entity");
11939 case 1:
11940 LoadMnemonic = PPC::LBARX;
11941 StoreMnemonic = PPC::STBCX;
11942 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11943 break;
11944 case 2:
11945 LoadMnemonic = PPC::LHARX;
11946 StoreMnemonic = PPC::STHCX;
11947 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11948 break;
11949 case 4:
11950 LoadMnemonic = PPC::LWARX;
11951 StoreMnemonic = PPC::STWCX;
11952 break;
11953 case 8:
11954 LoadMnemonic = PPC::LDARX;
11955 StoreMnemonic = PPC::STDCX;
11956 break;
11957 }
11958
11959 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11960 MachineFunction *F = BB->getParent();
11962
11963 Register dest = MI.getOperand(0).getReg();
11964 Register ptrA = MI.getOperand(1).getReg();
11965 Register ptrB = MI.getOperand(2).getReg();
11966 Register incr = MI.getOperand(3).getReg();
11967 DebugLoc dl = MI.getDebugLoc();
11968
11969 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11970 MachineBasicBlock *loop2MBB =
11971 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11972 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11973 F->insert(It, loopMBB);
11974 if (CmpOpcode)
11975 F->insert(It, loop2MBB);
11976 F->insert(It, exitMBB);
11977 exitMBB->splice(exitMBB->begin(), BB,
11978 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11980
11981 MachineRegisterInfo &RegInfo = F->getRegInfo();
11982 Register TmpReg = (!BinOpcode) ? incr :
11983 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11984 : &PPC::GPRCRegClass);
11985
11986 // thisMBB:
11987 // ...
11988 // fallthrough --> loopMBB
11989 BB->addSuccessor(loopMBB);
11990
11991 // loopMBB:
11992 // l[wd]arx dest, ptr
11993 // add r0, dest, incr
11994 // st[wd]cx. r0, ptr
11995 // bne- loopMBB
11996 // fallthrough --> exitMBB
11997
11998 // For max/min...
11999 // loopMBB:
12000 // l[wd]arx dest, ptr
12001 // cmpl?[wd] dest, incr
12002 // bgt exitMBB
12003 // loop2MBB:
12004 // st[wd]cx. dest, ptr
12005 // bne- loopMBB
12006 // fallthrough --> exitMBB
12007
12008 BB = loopMBB;
12009 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12010 .addReg(ptrA).addReg(ptrB);
12011 if (BinOpcode)
12012 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12013 if (CmpOpcode) {
12014 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12015 // Signed comparisons of byte or halfword values must be sign-extended.
12016 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12017 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12018 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12019 ExtReg).addReg(dest);
12020 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12021 } else
12022 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12023
12024 BuildMI(BB, dl, TII->get(PPC::BCC))
12025 .addImm(CmpPred)
12026 .addReg(CrReg)
12027 .addMBB(exitMBB);
12028 BB->addSuccessor(loop2MBB);
12029 BB->addSuccessor(exitMBB);
12030 BB = loop2MBB;
12031 }
12032 BuildMI(BB, dl, TII->get(StoreMnemonic))
12033 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12034 BuildMI(BB, dl, TII->get(PPC::BCC))
12035 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12036 BB->addSuccessor(loopMBB);
12037 BB->addSuccessor(exitMBB);
12038
12039 // exitMBB:
12040 // ...
12041 BB = exitMBB;
12042 return BB;
12043}
12044
12046 switch(MI.getOpcode()) {
12047 default:
12048 return false;
12049 case PPC::COPY:
12050 return TII->isSignExtended(MI.getOperand(1).getReg(),
12051 &MI.getMF()->getRegInfo());
12052 case PPC::LHA:
12053 case PPC::LHA8:
12054 case PPC::LHAU:
12055 case PPC::LHAU8:
12056 case PPC::LHAUX:
12057 case PPC::LHAUX8:
12058 case PPC::LHAX:
12059 case PPC::LHAX8:
12060 case PPC::LWA:
12061 case PPC::LWAUX:
12062 case PPC::LWAX:
12063 case PPC::LWAX_32:
12064 case PPC::LWA_32:
12065 case PPC::PLHA:
12066 case PPC::PLHA8:
12067 case PPC::PLHA8pc:
12068 case PPC::PLHApc:
12069 case PPC::PLWA:
12070 case PPC::PLWA8:
12071 case PPC::PLWA8pc:
12072 case PPC::PLWApc:
12073 case PPC::EXTSB:
12074 case PPC::EXTSB8:
12075 case PPC::EXTSB8_32_64:
12076 case PPC::EXTSB8_rec:
12077 case PPC::EXTSB_rec:
12078 case PPC::EXTSH:
12079 case PPC::EXTSH8:
12080 case PPC::EXTSH8_32_64:
12081 case PPC::EXTSH8_rec:
12082 case PPC::EXTSH_rec:
12083 case PPC::EXTSW:
12084 case PPC::EXTSWSLI:
12085 case PPC::EXTSWSLI_32_64:
12086 case PPC::EXTSWSLI_32_64_rec:
12087 case PPC::EXTSWSLI_rec:
12088 case PPC::EXTSW_32:
12089 case PPC::EXTSW_32_64:
12090 case PPC::EXTSW_32_64_rec:
12091 case PPC::EXTSW_rec:
12092 case PPC::SRAW:
12093 case PPC::SRAWI:
12094 case PPC::SRAWI_rec:
12095 case PPC::SRAW_rec:
12096 return true;
12097 }
12098 return false;
12099}
12100
12103 bool is8bit, // operation
12104 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12105 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12106 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12107
12108 // If this is a signed comparison and the value being compared is not known
12109 // to be sign extended, sign extend it here.
12110 DebugLoc dl = MI.getDebugLoc();
12111 MachineFunction *F = BB->getParent();
12112 MachineRegisterInfo &RegInfo = F->getRegInfo();
12113 Register incr = MI.getOperand(3).getReg();
12114 bool IsSignExtended =
12115 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12116
12117 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12118 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12119 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12120 .addReg(MI.getOperand(3).getReg());
12121 MI.getOperand(3).setReg(ValueReg);
12122 incr = ValueReg;
12123 }
12124 // If we support part-word atomic mnemonics, just use them
12125 if (Subtarget.hasPartwordAtomics())
12126 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12127 CmpPred);
12128
12129 // In 64 bit mode we have to use 64 bits for addresses, even though the
12130 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12131 // registers without caring whether they're 32 or 64, but here we're
12132 // doing actual arithmetic on the addresses.
12133 bool is64bit = Subtarget.isPPC64();
12134 bool isLittleEndian = Subtarget.isLittleEndian();
12135 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12136
12137 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12139
12140 Register dest = MI.getOperand(0).getReg();
12141 Register ptrA = MI.getOperand(1).getReg();
12142 Register ptrB = MI.getOperand(2).getReg();
12143
12144 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12145 MachineBasicBlock *loop2MBB =
12146 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12147 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12148 F->insert(It, loopMBB);
12149 if (CmpOpcode)
12150 F->insert(It, loop2MBB);
12151 F->insert(It, exitMBB);
12152 exitMBB->splice(exitMBB->begin(), BB,
12153 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12155
12156 const TargetRegisterClass *RC =
12157 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12158 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12159
12160 Register PtrReg = RegInfo.createVirtualRegister(RC);
12161 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12162 Register ShiftReg =
12163 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12164 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12165 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12166 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12167 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12168 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12169 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12170 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12171 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12172 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12173 Register Ptr1Reg;
12174 Register TmpReg =
12175 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12176
12177 // thisMBB:
12178 // ...
12179 // fallthrough --> loopMBB
12180 BB->addSuccessor(loopMBB);
12181
12182 // The 4-byte load must be aligned, while a char or short may be
12183 // anywhere in the word. Hence all this nasty bookkeeping code.
12184 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12185 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12186 // xori shift, shift1, 24 [16]
12187 // rlwinm ptr, ptr1, 0, 0, 29
12188 // slw incr2, incr, shift
12189 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12190 // slw mask, mask2, shift
12191 // loopMBB:
12192 // lwarx tmpDest, ptr
12193 // add tmp, tmpDest, incr2
12194 // andc tmp2, tmpDest, mask
12195 // and tmp3, tmp, mask
12196 // or tmp4, tmp3, tmp2
12197 // stwcx. tmp4, ptr
12198 // bne- loopMBB
12199 // fallthrough --> exitMBB
12200 // srw SrwDest, tmpDest, shift
12201 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12202 if (ptrA != ZeroReg) {
12203 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12204 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12205 .addReg(ptrA)
12206 .addReg(ptrB);
12207 } else {
12208 Ptr1Reg = ptrB;
12209 }
12210 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12211 // mode.
12212 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12213 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12214 .addImm(3)
12215 .addImm(27)
12216 .addImm(is8bit ? 28 : 27);
12217 if (!isLittleEndian)
12218 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12219 .addReg(Shift1Reg)
12220 .addImm(is8bit ? 24 : 16);
12221 if (is64bit)
12222 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12223 .addReg(Ptr1Reg)
12224 .addImm(0)
12225 .addImm(61);
12226 else
12227 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12228 .addReg(Ptr1Reg)
12229 .addImm(0)
12230 .addImm(0)
12231 .addImm(29);
12232 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12233 if (is8bit)
12234 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12235 else {
12236 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12237 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12238 .addReg(Mask3Reg)
12239 .addImm(65535);
12240 }
12241 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12242 .addReg(Mask2Reg)
12243 .addReg(ShiftReg);
12244
12245 BB = loopMBB;
12246 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12247 .addReg(ZeroReg)
12248 .addReg(PtrReg);
12249 if (BinOpcode)
12250 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12251 .addReg(Incr2Reg)
12252 .addReg(TmpDestReg);
12253 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12254 .addReg(TmpDestReg)
12255 .addReg(MaskReg);
12256 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12257 if (CmpOpcode) {
12258 // For unsigned comparisons, we can directly compare the shifted values.
12259 // For signed comparisons we shift and sign extend.
12260 Register SReg = RegInfo.createVirtualRegister(GPRC);
12261 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12262 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12263 .addReg(TmpDestReg)
12264 .addReg(MaskReg);
12265 unsigned ValueReg = SReg;
12266 unsigned CmpReg = Incr2Reg;
12267 if (CmpOpcode == PPC::CMPW) {
12268 ValueReg = RegInfo.createVirtualRegister(GPRC);
12269 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12270 .addReg(SReg)
12271 .addReg(ShiftReg);
12272 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12273 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12274 .addReg(ValueReg);
12275 ValueReg = ValueSReg;
12276 CmpReg = incr;
12277 }
12278 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12279 BuildMI(BB, dl, TII->get(PPC::BCC))
12280 .addImm(CmpPred)
12281 .addReg(CrReg)
12282 .addMBB(exitMBB);
12283 BB->addSuccessor(loop2MBB);
12284 BB->addSuccessor(exitMBB);
12285 BB = loop2MBB;
12286 }
12287 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12288 BuildMI(BB, dl, TII->get(PPC::STWCX))
12289 .addReg(Tmp4Reg)
12290 .addReg(ZeroReg)
12291 .addReg(PtrReg);
12292 BuildMI(BB, dl, TII->get(PPC::BCC))
12294 .addReg(PPC::CR0)
12295 .addMBB(loopMBB);
12296 BB->addSuccessor(loopMBB);
12297 BB->addSuccessor(exitMBB);
12298
12299 // exitMBB:
12300 // ...
12301 BB = exitMBB;
12302 // Since the shift amount is not a constant, we need to clear
12303 // the upper bits with a separate RLWINM.
12304 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12305 .addReg(SrwDestReg)
12306 .addImm(0)
12307 .addImm(is8bit ? 24 : 16)
12308 .addImm(31);
12309 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12310 .addReg(TmpDestReg)
12311 .addReg(ShiftReg);
12312 return BB;
12313}
12314
12317 MachineBasicBlock *MBB) const {
12318 DebugLoc DL = MI.getDebugLoc();
12319 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12320 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12321
12322 MachineFunction *MF = MBB->getParent();
12324
12325 const BasicBlock *BB = MBB->getBasicBlock();
12327
12328 Register DstReg = MI.getOperand(0).getReg();
12329 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12330 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12331 Register mainDstReg = MRI.createVirtualRegister(RC);
12332 Register restoreDstReg = MRI.createVirtualRegister(RC);
12333
12334 MVT PVT = getPointerTy(MF->getDataLayout());
12335 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12336 "Invalid Pointer Size!");
12337 // For v = setjmp(buf), we generate
12338 //
12339 // thisMBB:
12340 // SjLjSetup mainMBB
12341 // bl mainMBB
12342 // v_restore = 1
12343 // b sinkMBB
12344 //
12345 // mainMBB:
12346 // buf[LabelOffset] = LR
12347 // v_main = 0
12348 //
12349 // sinkMBB:
12350 // v = phi(main, restore)
12351 //
12352
12353 MachineBasicBlock *thisMBB = MBB;
12354 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12355 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12356 MF->insert(I, mainMBB);
12357 MF->insert(I, sinkMBB);
12358
12360
12361 // Transfer the remainder of BB and its successor edges to sinkMBB.
12362 sinkMBB->splice(sinkMBB->begin(), MBB,
12363 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12365
12366 // Note that the structure of the jmp_buf used here is not compatible
12367 // with that used by libc, and is not designed to be. Specifically, it
12368 // stores only those 'reserved' registers that LLVM does not otherwise
12369 // understand how to spill. Also, by convention, by the time this
12370 // intrinsic is called, Clang has already stored the frame address in the
12371 // first slot of the buffer and stack address in the third. Following the
12372 // X86 target code, we'll store the jump address in the second slot. We also
12373 // need to save the TOC pointer (R2) to handle jumps between shared
12374 // libraries, and that will be stored in the fourth slot. The thread
12375 // identifier (R13) is not affected.
12376
12377 // thisMBB:
12378 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12379 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12380 const int64_t BPOffset = 4 * PVT.getStoreSize();
12381
12382 // Prepare IP either in reg.
12383 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12384 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12385 Register BufReg = MI.getOperand(1).getReg();
12386
12387 if (Subtarget.is64BitELFABI()) {
12389 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12390 .addReg(PPC::X2)
12391 .addImm(TOCOffset)
12392 .addReg(BufReg)
12393 .cloneMemRefs(MI);
12394 }
12395
12396 // Naked functions never have a base pointer, and so we use r1. For all
12397 // other functions, this decision must be delayed until during PEI.
12398 unsigned BaseReg;
12399 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12400 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12401 else
12402 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12403
12404 MIB = BuildMI(*thisMBB, MI, DL,
12405 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12406 .addReg(BaseReg)
12407 .addImm(BPOffset)
12408 .addReg(BufReg)
12409 .cloneMemRefs(MI);
12410
12411 // Setup
12412 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12413 MIB.addRegMask(TRI->getNoPreservedMask());
12414
12415 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12416
12417 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12418 .addMBB(mainMBB);
12419 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12420
12421 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12422 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12423
12424 // mainMBB:
12425 // mainDstReg = 0
12426 MIB =
12427 BuildMI(mainMBB, DL,
12428 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12429
12430 // Store IP
12431 if (Subtarget.isPPC64()) {
12432 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12433 .addReg(LabelReg)
12434 .addImm(LabelOffset)
12435 .addReg(BufReg);
12436 } else {
12437 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12438 .addReg(LabelReg)
12439 .addImm(LabelOffset)
12440 .addReg(BufReg);
12441 }
12442 MIB.cloneMemRefs(MI);
12443
12444 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12445 mainMBB->addSuccessor(sinkMBB);
12446
12447 // sinkMBB:
12448 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12449 TII->get(PPC::PHI), DstReg)
12450 .addReg(mainDstReg).addMBB(mainMBB)
12451 .addReg(restoreDstReg).addMBB(thisMBB);
12452
12453 MI.eraseFromParent();
12454 return sinkMBB;
12455}
12456
12459 MachineBasicBlock *MBB) const {
12460 DebugLoc DL = MI.getDebugLoc();
12461 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12462
12463 MachineFunction *MF = MBB->getParent();
12465
12466 MVT PVT = getPointerTy(MF->getDataLayout());
12467 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12468 "Invalid Pointer Size!");
12469
12470 const TargetRegisterClass *RC =
12471 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12472 Register Tmp = MRI.createVirtualRegister(RC);
12473 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12474 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12475 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12476 unsigned BP =
12477 (PVT == MVT::i64)
12478 ? PPC::X30
12479 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12480 : PPC::R30);
12481
12483
12484 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12485 const int64_t SPOffset = 2 * PVT.getStoreSize();
12486 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12487 const int64_t BPOffset = 4 * PVT.getStoreSize();
12488
12489 Register BufReg = MI.getOperand(0).getReg();
12490
12491 // Reload FP (the jumped-to function may not have had a
12492 // frame pointer, and if so, then its r31 will be restored
12493 // as necessary).
12494 if (PVT == MVT::i64) {
12495 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12496 .addImm(0)
12497 .addReg(BufReg);
12498 } else {
12499 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12500 .addImm(0)
12501 .addReg(BufReg);
12502 }
12503 MIB.cloneMemRefs(MI);
12504
12505 // Reload IP
12506 if (PVT == MVT::i64) {
12507 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12508 .addImm(LabelOffset)
12509 .addReg(BufReg);
12510 } else {
12511 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12512 .addImm(LabelOffset)
12513 .addReg(BufReg);
12514 }
12515 MIB.cloneMemRefs(MI);
12516
12517 // Reload SP
12518 if (PVT == MVT::i64) {
12519 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12520 .addImm(SPOffset)
12521 .addReg(BufReg);
12522 } else {
12523 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12524 .addImm(SPOffset)
12525 .addReg(BufReg);
12526 }
12527 MIB.cloneMemRefs(MI);
12528
12529 // Reload BP
12530 if (PVT == MVT::i64) {
12531 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12532 .addImm(BPOffset)
12533 .addReg(BufReg);
12534 } else {
12535 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12536 .addImm(BPOffset)
12537 .addReg(BufReg);
12538 }
12539 MIB.cloneMemRefs(MI);
12540
12541 // Reload TOC
12542 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12544 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12545 .addImm(TOCOffset)
12546 .addReg(BufReg)
12547 .cloneMemRefs(MI);
12548 }
12549
12550 // Jump
12551 BuildMI(*MBB, MI, DL,
12552 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12553 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12554
12555 MI.eraseFromParent();
12556 return MBB;
12557}
12558
12560 // If the function specifically requests inline stack probes, emit them.
12561 if (MF.getFunction().hasFnAttribute("probe-stack"))
12562 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12563 "inline-asm";
12564 return false;
12565}
12566
12568 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12569 unsigned StackAlign = TFI->getStackAlignment();
12570 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12571 "Unexpected stack alignment");
12572 // The default stack probe size is 4096 if the function has no
12573 // stack-probe-size attribute.
12574 const Function &Fn = MF.getFunction();
12575 unsigned StackProbeSize =
12576 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12577 // Round down to the stack alignment.
12578 StackProbeSize &= ~(StackAlign - 1);
12579 return StackProbeSize ? StackProbeSize : StackAlign;
12580}
12581
12582// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12583// into three phases. In the first phase, it uses pseudo instruction
12584// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12585// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12586// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12587// MaxCallFrameSize so that it can calculate correct data area pointer.
12590 MachineBasicBlock *MBB) const {
12591 const bool isPPC64 = Subtarget.isPPC64();
12592 MachineFunction *MF = MBB->getParent();
12593 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12594 DebugLoc DL = MI.getDebugLoc();
12595 const unsigned ProbeSize = getStackProbeSize(*MF);
12596 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12598 // The CFG of probing stack looks as
12599 // +-----+
12600 // | MBB |
12601 // +--+--+
12602 // |
12603 // +----v----+
12604 // +--->+ TestMBB +---+
12605 // | +----+----+ |
12606 // | | |
12607 // | +-----v----+ |
12608 // +---+ BlockMBB | |
12609 // +----------+ |
12610 // |
12611 // +---------+ |
12612 // | TailMBB +<--+
12613 // +---------+
12614 // In MBB, calculate previous frame pointer and final stack pointer.
12615 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12616 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12617 // TailMBB is spliced via \p MI.
12618 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12619 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12620 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12621
12623 MF->insert(MBBIter, TestMBB);
12624 MF->insert(MBBIter, BlockMBB);
12625 MF->insert(MBBIter, TailMBB);
12626
12627 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12628 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12629
12630 Register DstReg = MI.getOperand(0).getReg();
12631 Register NegSizeReg = MI.getOperand(1).getReg();
12632 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12633 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12634 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12635 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12636
12637 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12638 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12639 // NegSize.
12640 unsigned ProbeOpc;
12641 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12642 ProbeOpc =
12643 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12644 else
12645 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12646 // and NegSizeReg will be allocated in the same phyreg to avoid
12647 // redundant copy when NegSizeReg has only one use which is current MI and
12648 // will be replaced by PREPARE_PROBED_ALLOCA then.
12649 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12650 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12651 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12652 .addDef(ActualNegSizeReg)
12653 .addReg(NegSizeReg)
12654 .add(MI.getOperand(2))
12655 .add(MI.getOperand(3));
12656
12657 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12658 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12659 FinalStackPtr)
12660 .addReg(SPReg)
12661 .addReg(ActualNegSizeReg);
12662
12663 // Materialize a scratch register for update.
12664 int64_t NegProbeSize = -(int64_t)ProbeSize;
12665 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12666 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12667 if (!isInt<16>(NegProbeSize)) {
12668 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12669 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12670 .addImm(NegProbeSize >> 16);
12671 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12672 ScratchReg)
12673 .addReg(TempReg)
12674 .addImm(NegProbeSize & 0xFFFF);
12675 } else
12676 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12677 .addImm(NegProbeSize);
12678
12679 {
12680 // Probing leading residual part.
12681 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12682 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12683 .addReg(ActualNegSizeReg)
12684 .addReg(ScratchReg);
12685 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12686 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12687 .addReg(Div)
12688 .addReg(ScratchReg);
12689 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12690 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12691 .addReg(Mul)
12692 .addReg(ActualNegSizeReg);
12693 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12694 .addReg(FramePointer)
12695 .addReg(SPReg)
12696 .addReg(NegMod);
12697 }
12698
12699 {
12700 // Remaining part should be multiple of ProbeSize.
12701 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12702 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12703 .addReg(SPReg)
12704 .addReg(FinalStackPtr);
12705 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12707 .addReg(CmpResult)
12708 .addMBB(TailMBB);
12709 TestMBB->addSuccessor(BlockMBB);
12710 TestMBB->addSuccessor(TailMBB);
12711 }
12712
12713 {
12714 // Touch the block.
12715 // |P...|P...|P...
12716 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12717 .addReg(FramePointer)
12718 .addReg(SPReg)
12719 .addReg(ScratchReg);
12720 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12721 BlockMBB->addSuccessor(TestMBB);
12722 }
12723
12724 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12725 // DYNAREAOFFSET pseudo instruction to get the future result.
12726 Register MaxCallFrameSizeReg =
12727 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12728 BuildMI(TailMBB, DL,
12729 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12730 MaxCallFrameSizeReg)
12731 .add(MI.getOperand(2))
12732 .add(MI.getOperand(3));
12733 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12734 .addReg(SPReg)
12735 .addReg(MaxCallFrameSizeReg);
12736
12737 // Splice instructions after MI to TailMBB.
12738 TailMBB->splice(TailMBB->end(), MBB,
12739 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12741 MBB->addSuccessor(TestMBB);
12742
12743 // Delete the pseudo instruction.
12744 MI.eraseFromParent();
12745
12746 ++NumDynamicAllocaProbed;
12747 return TailMBB;
12748}
12749
12751 switch (MI.getOpcode()) {
12752 case PPC::SELECT_CC_I4:
12753 case PPC::SELECT_CC_I8:
12754 case PPC::SELECT_CC_F4:
12755 case PPC::SELECT_CC_F8:
12756 case PPC::SELECT_CC_F16:
12757 case PPC::SELECT_CC_VRRC:
12758 case PPC::SELECT_CC_VSFRC:
12759 case PPC::SELECT_CC_VSSRC:
12760 case PPC::SELECT_CC_VSRC:
12761 case PPC::SELECT_CC_SPE4:
12762 case PPC::SELECT_CC_SPE:
12763 return true;
12764 default:
12765 return false;
12766 }
12767}
12768
12769static bool IsSelect(MachineInstr &MI) {
12770 switch (MI.getOpcode()) {
12771 case PPC::SELECT_I4:
12772 case PPC::SELECT_I8:
12773 case PPC::SELECT_F4:
12774 case PPC::SELECT_F8:
12775 case PPC::SELECT_F16:
12776 case PPC::SELECT_SPE:
12777 case PPC::SELECT_SPE4:
12778 case PPC::SELECT_VRRC:
12779 case PPC::SELECT_VSFRC:
12780 case PPC::SELECT_VSSRC:
12781 case PPC::SELECT_VSRC:
12782 return true;
12783 default:
12784 return false;
12785 }
12786}
12787
12790 MachineBasicBlock *BB) const {
12791 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12792 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12793 if (Subtarget.is64BitELFABI() &&
12794 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12795 !Subtarget.isUsingPCRelativeCalls()) {
12796 // Call lowering should have added an r2 operand to indicate a dependence
12797 // on the TOC base pointer value. It can't however, because there is no
12798 // way to mark the dependence as implicit there, and so the stackmap code
12799 // will confuse it with a regular operand. Instead, add the dependence
12800 // here.
12801 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12802 }
12803
12804 return emitPatchPoint(MI, BB);
12805 }
12806
12807 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12808 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12809 return emitEHSjLjSetJmp(MI, BB);
12810 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12811 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12812 return emitEHSjLjLongJmp(MI, BB);
12813 }
12814
12815 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12816
12817 // To "insert" these instructions we actually have to insert their
12818 // control-flow patterns.
12819 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12821
12822 MachineFunction *F = BB->getParent();
12823 MachineRegisterInfo &MRI = F->getRegInfo();
12824
12825 if (Subtarget.hasISEL() &&
12826 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12827 MI.getOpcode() == PPC::SELECT_CC_I8 ||
12828 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12830 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12831 MI.getOpcode() == PPC::SELECT_CC_I8)
12832 Cond.push_back(MI.getOperand(4));
12833 else
12835 Cond.push_back(MI.getOperand(1));
12836
12837 DebugLoc dl = MI.getDebugLoc();
12838 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12839 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12840 } else if (IsSelectCC(MI) || IsSelect(MI)) {
12841 // The incoming instruction knows the destination vreg to set, the
12842 // condition code register to branch on, the true/false values to
12843 // select between, and a branch opcode to use.
12844
12845 // thisMBB:
12846 // ...
12847 // TrueVal = ...
12848 // cmpTY ccX, r1, r2
12849 // bCC sinkMBB
12850 // fallthrough --> copy0MBB
12851 MachineBasicBlock *thisMBB = BB;
12852 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12853 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12854 DebugLoc dl = MI.getDebugLoc();
12855 F->insert(It, copy0MBB);
12856 F->insert(It, sinkMBB);
12857
12858 // Set the call frame size on entry to the new basic blocks.
12859 // See https://reviews.llvm.org/D156113.
12860 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12861 copy0MBB->setCallFrameSize(CallFrameSize);
12862 sinkMBB->setCallFrameSize(CallFrameSize);
12863
12864 // Transfer the remainder of BB and its successor edges to sinkMBB.
12865 sinkMBB->splice(sinkMBB->begin(), BB,
12866 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12868
12869 // Next, add the true and fallthrough blocks as its successors.
12870 BB->addSuccessor(copy0MBB);
12871 BB->addSuccessor(sinkMBB);
12872
12873 if (IsSelect(MI)) {
12874 BuildMI(BB, dl, TII->get(PPC::BC))
12875 .addReg(MI.getOperand(1).getReg())
12876 .addMBB(sinkMBB);
12877 } else {
12878 unsigned SelectPred = MI.getOperand(4).getImm();
12879 BuildMI(BB, dl, TII->get(PPC::BCC))
12880 .addImm(SelectPred)
12881 .addReg(MI.getOperand(1).getReg())
12882 .addMBB(sinkMBB);
12883 }
12884
12885 // copy0MBB:
12886 // %FalseValue = ...
12887 // # fallthrough to sinkMBB
12888 BB = copy0MBB;
12889
12890 // Update machine-CFG edges
12891 BB->addSuccessor(sinkMBB);
12892
12893 // sinkMBB:
12894 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12895 // ...
12896 BB = sinkMBB;
12897 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12898 .addReg(MI.getOperand(3).getReg())
12899 .addMBB(copy0MBB)
12900 .addReg(MI.getOperand(2).getReg())
12901 .addMBB(thisMBB);
12902 } else if (MI.getOpcode() == PPC::ReadTB) {
12903 // To read the 64-bit time-base register on a 32-bit target, we read the
12904 // two halves. Should the counter have wrapped while it was being read, we
12905 // need to try again.
12906 // ...
12907 // readLoop:
12908 // mfspr Rx,TBU # load from TBU
12909 // mfspr Ry,TB # load from TB
12910 // mfspr Rz,TBU # load from TBU
12911 // cmpw crX,Rx,Rz # check if 'old'='new'
12912 // bne readLoop # branch if they're not equal
12913 // ...
12914
12915 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12916 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12917 DebugLoc dl = MI.getDebugLoc();
12918 F->insert(It, readMBB);
12919 F->insert(It, sinkMBB);
12920
12921 // Transfer the remainder of BB and its successor edges to sinkMBB.
12922 sinkMBB->splice(sinkMBB->begin(), BB,
12923 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12925
12926 BB->addSuccessor(readMBB);
12927 BB = readMBB;
12928
12929 MachineRegisterInfo &RegInfo = F->getRegInfo();
12930 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12931 Register LoReg = MI.getOperand(0).getReg();
12932 Register HiReg = MI.getOperand(1).getReg();
12933
12934 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12935 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12936 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12937
12938 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12939
12940 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12941 .addReg(HiReg)
12942 .addReg(ReadAgainReg);
12943 BuildMI(BB, dl, TII->get(PPC::BCC))
12945 .addReg(CmpReg)
12946 .addMBB(readMBB);
12947
12948 BB->addSuccessor(readMBB);
12949 BB->addSuccessor(sinkMBB);
12950 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12951 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12952 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12953 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12954 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12955 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12956 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12957 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12958
12959 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12960 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12961 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12962 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12963 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12964 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12965 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12966 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12967
12968 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12969 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12970 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12971 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12972 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12973 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12974 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12975 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12976
12977 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12978 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12979 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12980 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12981 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12982 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12983 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12984 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12985
12986 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12987 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12988 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12989 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12990 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12991 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12992 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12993 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12994
12995 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12996 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12997 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12998 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12999 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13000 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13001 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13002 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13003
13004 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13005 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13006 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13007 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13008 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13009 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13010 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13011 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13012
13013 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13014 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13015 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13016 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13017 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13018 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13019 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13020 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13021
13022 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13023 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13024 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13025 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13026 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13027 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13028 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13029 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13030
13031 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13032 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13033 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13034 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13035 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13036 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13037 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13038 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13039
13040 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13041 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13042 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13043 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13044 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13045 BB = EmitAtomicBinary(MI, BB, 4, 0);
13046 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13047 BB = EmitAtomicBinary(MI, BB, 8, 0);
13048 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13049 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13050 (Subtarget.hasPartwordAtomics() &&
13051 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13052 (Subtarget.hasPartwordAtomics() &&
13053 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13054 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13055
13056 auto LoadMnemonic = PPC::LDARX;
13057 auto StoreMnemonic = PPC::STDCX;
13058 switch (MI.getOpcode()) {
13059 default:
13060 llvm_unreachable("Compare and swap of unknown size");
13061 case PPC::ATOMIC_CMP_SWAP_I8:
13062 LoadMnemonic = PPC::LBARX;
13063 StoreMnemonic = PPC::STBCX;
13064 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13065 break;
13066 case PPC::ATOMIC_CMP_SWAP_I16:
13067 LoadMnemonic = PPC::LHARX;
13068 StoreMnemonic = PPC::STHCX;
13069 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13070 break;
13071 case PPC::ATOMIC_CMP_SWAP_I32:
13072 LoadMnemonic = PPC::LWARX;
13073 StoreMnemonic = PPC::STWCX;
13074 break;
13075 case PPC::ATOMIC_CMP_SWAP_I64:
13076 LoadMnemonic = PPC::LDARX;
13077 StoreMnemonic = PPC::STDCX;
13078 break;
13079 }
13080 MachineRegisterInfo &RegInfo = F->getRegInfo();
13081 Register dest = MI.getOperand(0).getReg();
13082 Register ptrA = MI.getOperand(1).getReg();
13083 Register ptrB = MI.getOperand(2).getReg();
13084 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13085 Register oldval = MI.getOperand(3).getReg();
13086 Register newval = MI.getOperand(4).getReg();
13087 DebugLoc dl = MI.getDebugLoc();
13088
13089 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13090 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13091 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13092 F->insert(It, loop1MBB);
13093 F->insert(It, loop2MBB);
13094 F->insert(It, exitMBB);
13095 exitMBB->splice(exitMBB->begin(), BB,
13096 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13098
13099 // thisMBB:
13100 // ...
13101 // fallthrough --> loopMBB
13102 BB->addSuccessor(loop1MBB);
13103
13104 // loop1MBB:
13105 // l[bhwd]arx dest, ptr
13106 // cmp[wd] dest, oldval
13107 // bne- exitBB
13108 // loop2MBB:
13109 // st[bhwd]cx. newval, ptr
13110 // bne- loopMBB
13111 // b exitBB
13112 // exitBB:
13113 BB = loop1MBB;
13114 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13115 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13116 .addReg(dest)
13117 .addReg(oldval);
13118 BuildMI(BB, dl, TII->get(PPC::BCC))
13120 .addReg(CrReg)
13121 .addMBB(exitMBB);
13122 BB->addSuccessor(loop2MBB);
13123 BB->addSuccessor(exitMBB);
13124
13125 BB = loop2MBB;
13126 BuildMI(BB, dl, TII->get(StoreMnemonic))
13127 .addReg(newval)
13128 .addReg(ptrA)
13129 .addReg(ptrB);
13130 BuildMI(BB, dl, TII->get(PPC::BCC))
13132 .addReg(PPC::CR0)
13133 .addMBB(loop1MBB);
13134 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13135 BB->addSuccessor(loop1MBB);
13136 BB->addSuccessor(exitMBB);
13137
13138 // exitMBB:
13139 // ...
13140 BB = exitMBB;
13141 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13142 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13143 // We must use 64-bit registers for addresses when targeting 64-bit,
13144 // since we're actually doing arithmetic on them. Other registers
13145 // can be 32-bit.
13146 bool is64bit = Subtarget.isPPC64();
13147 bool isLittleEndian = Subtarget.isLittleEndian();
13148 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13149
13150 Register dest = MI.getOperand(0).getReg();
13151 Register ptrA = MI.getOperand(1).getReg();
13152 Register ptrB = MI.getOperand(2).getReg();
13153 Register oldval = MI.getOperand(3).getReg();
13154 Register newval = MI.getOperand(4).getReg();
13155 DebugLoc dl = MI.getDebugLoc();
13156
13157 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13158 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13159 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13160 F->insert(It, loop1MBB);
13161 F->insert(It, loop2MBB);
13162 F->insert(It, exitMBB);
13163 exitMBB->splice(exitMBB->begin(), BB,
13164 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13166
13167 MachineRegisterInfo &RegInfo = F->getRegInfo();
13168 const TargetRegisterClass *RC =
13169 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13170 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13171
13172 Register PtrReg = RegInfo.createVirtualRegister(RC);
13173 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13174 Register ShiftReg =
13175 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13176 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13177 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13178 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13179 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13180 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13181 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13182 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13183 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13184 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13185 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13186 Register Ptr1Reg;
13187 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13188 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13189 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13190 // thisMBB:
13191 // ...
13192 // fallthrough --> loopMBB
13193 BB->addSuccessor(loop1MBB);
13194
13195 // The 4-byte load must be aligned, while a char or short may be
13196 // anywhere in the word. Hence all this nasty bookkeeping code.
13197 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13198 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13199 // xori shift, shift1, 24 [16]
13200 // rlwinm ptr, ptr1, 0, 0, 29
13201 // slw newval2, newval, shift
13202 // slw oldval2, oldval,shift
13203 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13204 // slw mask, mask2, shift
13205 // and newval3, newval2, mask
13206 // and oldval3, oldval2, mask
13207 // loop1MBB:
13208 // lwarx tmpDest, ptr
13209 // and tmp, tmpDest, mask
13210 // cmpw tmp, oldval3
13211 // bne- exitBB
13212 // loop2MBB:
13213 // andc tmp2, tmpDest, mask
13214 // or tmp4, tmp2, newval3
13215 // stwcx. tmp4, ptr
13216 // bne- loop1MBB
13217 // b exitBB
13218 // exitBB:
13219 // srw dest, tmpDest, shift
13220 if (ptrA != ZeroReg) {
13221 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13222 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13223 .addReg(ptrA)
13224 .addReg(ptrB);
13225 } else {
13226 Ptr1Reg = ptrB;
13227 }
13228
13229 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13230 // mode.
13231 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13232 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13233 .addImm(3)
13234 .addImm(27)
13235 .addImm(is8bit ? 28 : 27);
13236 if (!isLittleEndian)
13237 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13238 .addReg(Shift1Reg)
13239 .addImm(is8bit ? 24 : 16);
13240 if (is64bit)
13241 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13242 .addReg(Ptr1Reg)
13243 .addImm(0)
13244 .addImm(61);
13245 else
13246 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13247 .addReg(Ptr1Reg)
13248 .addImm(0)
13249 .addImm(0)
13250 .addImm(29);
13251 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13252 .addReg(newval)
13253 .addReg(ShiftReg);
13254 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13255 .addReg(oldval)
13256 .addReg(ShiftReg);
13257 if (is8bit)
13258 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13259 else {
13260 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13261 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13262 .addReg(Mask3Reg)
13263 .addImm(65535);
13264 }
13265 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13266 .addReg(Mask2Reg)
13267 .addReg(ShiftReg);
13268 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13269 .addReg(NewVal2Reg)
13270 .addReg(MaskReg);
13271 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13272 .addReg(OldVal2Reg)
13273 .addReg(MaskReg);
13274
13275 BB = loop1MBB;
13276 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13277 .addReg(ZeroReg)
13278 .addReg(PtrReg);
13279 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13280 .addReg(TmpDestReg)
13281 .addReg(MaskReg);
13282 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13283 .addReg(TmpReg)
13284 .addReg(OldVal3Reg);
13285 BuildMI(BB, dl, TII->get(PPC::BCC))
13287 .addReg(CrReg)
13288 .addMBB(exitMBB);
13289 BB->addSuccessor(loop2MBB);
13290 BB->addSuccessor(exitMBB);
13291
13292 BB = loop2MBB;
13293 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13294 .addReg(TmpDestReg)
13295 .addReg(MaskReg);
13296 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13297 .addReg(Tmp2Reg)
13298 .addReg(NewVal3Reg);
13299 BuildMI(BB, dl, TII->get(PPC::STWCX))
13300 .addReg(Tmp4Reg)
13301 .addReg(ZeroReg)
13302 .addReg(PtrReg);
13303 BuildMI(BB, dl, TII->get(PPC::BCC))
13305 .addReg(PPC::CR0)
13306 .addMBB(loop1MBB);
13307 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13308 BB->addSuccessor(loop1MBB);
13309 BB->addSuccessor(exitMBB);
13310
13311 // exitMBB:
13312 // ...
13313 BB = exitMBB;
13314 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13315 .addReg(TmpReg)
13316 .addReg(ShiftReg);
13317 } else if (MI.getOpcode() == PPC::FADDrtz) {
13318 // This pseudo performs an FADD with rounding mode temporarily forced
13319 // to round-to-zero. We emit this via custom inserter since the FPSCR
13320 // is not modeled at the SelectionDAG level.
13321 Register Dest = MI.getOperand(0).getReg();
13322 Register Src1 = MI.getOperand(1).getReg();
13323 Register Src2 = MI.getOperand(2).getReg();
13324 DebugLoc dl = MI.getDebugLoc();
13325
13326 MachineRegisterInfo &RegInfo = F->getRegInfo();
13327 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13328
13329 // Save FPSCR value.
13330 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13331
13332 // Set rounding mode to round-to-zero.
13333 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13334 .addImm(31)
13336
13337 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13338 .addImm(30)
13340
13341 // Perform addition.
13342 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13343 .addReg(Src1)
13344 .addReg(Src2);
13345 if (MI.getFlag(MachineInstr::NoFPExcept))
13347
13348 // Restore FPSCR value.
13349 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13350 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13351 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13352 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13353 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13354 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13355 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13356 ? PPC::ANDI8_rec
13357 : PPC::ANDI_rec;
13358 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13359 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13360
13361 MachineRegisterInfo &RegInfo = F->getRegInfo();
13362 Register Dest = RegInfo.createVirtualRegister(
13363 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13364
13365 DebugLoc Dl = MI.getDebugLoc();
13366 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13367 .addReg(MI.getOperand(1).getReg())
13368 .addImm(1);
13369 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13370 MI.getOperand(0).getReg())
13371 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13372 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13373 DebugLoc Dl = MI.getDebugLoc();
13374 MachineRegisterInfo &RegInfo = F->getRegInfo();
13375 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13376 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13377 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13378 MI.getOperand(0).getReg())
13379 .addReg(CRReg);
13380 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13381 DebugLoc Dl = MI.getDebugLoc();
13382 unsigned Imm = MI.getOperand(1).getImm();
13383 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13384 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13385 MI.getOperand(0).getReg())
13386 .addReg(PPC::CR0EQ);
13387 } else if (MI.getOpcode() == PPC::SETRNDi) {
13388 DebugLoc dl = MI.getDebugLoc();
13389 Register OldFPSCRReg = MI.getOperand(0).getReg();
13390
13391 // Save FPSCR value.
13392 if (MRI.use_empty(OldFPSCRReg))
13393 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13394 else
13395 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13396
13397 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13398 // the following settings:
13399 // 00 Round to nearest
13400 // 01 Round to 0
13401 // 10 Round to +inf
13402 // 11 Round to -inf
13403
13404 // When the operand is immediate, using the two least significant bits of
13405 // the immediate to set the bits 62:63 of FPSCR.
13406 unsigned Mode = MI.getOperand(1).getImm();
13407 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13408 .addImm(31)
13410
13411 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13412 .addImm(30)
13414 } else if (MI.getOpcode() == PPC::SETRND) {
13415 DebugLoc dl = MI.getDebugLoc();
13416
13417 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13418 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13419 // If the target doesn't have DirectMove, we should use stack to do the
13420 // conversion, because the target doesn't have the instructions like mtvsrd
13421 // or mfvsrd to do this conversion directly.
13422 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13423 if (Subtarget.hasDirectMove()) {
13424 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13425 .addReg(SrcReg);
13426 } else {
13427 // Use stack to do the register copy.
13428 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13429 MachineRegisterInfo &RegInfo = F->getRegInfo();
13430 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13431 if (RC == &PPC::F8RCRegClass) {
13432 // Copy register from F8RCRegClass to G8RCRegclass.
13433 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13434 "Unsupported RegClass.");
13435
13436 StoreOp = PPC::STFD;
13437 LoadOp = PPC::LD;
13438 } else {
13439 // Copy register from G8RCRegClass to F8RCRegclass.
13440 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13441 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13442 "Unsupported RegClass.");
13443 }
13444
13445 MachineFrameInfo &MFI = F->getFrameInfo();
13446 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13447
13448 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13449 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13451 MFI.getObjectAlign(FrameIdx));
13452
13453 // Store the SrcReg into the stack.
13454 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13455 .addReg(SrcReg)
13456 .addImm(0)
13457 .addFrameIndex(FrameIdx)
13458 .addMemOperand(MMOStore);
13459
13460 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13461 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13463 MFI.getObjectAlign(FrameIdx));
13464
13465 // Load from the stack where SrcReg is stored, and save to DestReg,
13466 // so we have done the RegClass conversion from RegClass::SrcReg to
13467 // RegClass::DestReg.
13468 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13469 .addImm(0)
13470 .addFrameIndex(FrameIdx)
13471 .addMemOperand(MMOLoad);
13472 }
13473 };
13474
13475 Register OldFPSCRReg = MI.getOperand(0).getReg();
13476
13477 // Save FPSCR value.
13478 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13479
13480 // When the operand is gprc register, use two least significant bits of the
13481 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13482 //
13483 // copy OldFPSCRTmpReg, OldFPSCRReg
13484 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13485 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13486 // copy NewFPSCRReg, NewFPSCRTmpReg
13487 // mtfsf 255, NewFPSCRReg
13488 MachineOperand SrcOp = MI.getOperand(1);
13489 MachineRegisterInfo &RegInfo = F->getRegInfo();
13490 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13491
13492 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13493
13494 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13495 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13496
13497 // The first operand of INSERT_SUBREG should be a register which has
13498 // subregisters, we only care about its RegClass, so we should use an
13499 // IMPLICIT_DEF register.
13500 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13501 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13502 .addReg(ImDefReg)
13503 .add(SrcOp)
13504 .addImm(1);
13505
13506 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13507 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13508 .addReg(OldFPSCRTmpReg)
13509 .addReg(ExtSrcReg)
13510 .addImm(0)
13511 .addImm(62);
13512
13513 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13514 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13515
13516 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13517 // bits of FPSCR.
13518 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13519 .addImm(255)
13520 .addReg(NewFPSCRReg)
13521 .addImm(0)
13522 .addImm(0);
13523 } else if (MI.getOpcode() == PPC::SETFLM) {
13524 DebugLoc Dl = MI.getDebugLoc();
13525
13526 // Result of setflm is previous FPSCR content, so we need to save it first.
13527 Register OldFPSCRReg = MI.getOperand(0).getReg();
13528 if (MRI.use_empty(OldFPSCRReg))
13529 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13530 else
13531 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13532
13533 // Put bits in 32:63 to FPSCR.
13534 Register NewFPSCRReg = MI.getOperand(1).getReg();
13535 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13536 .addImm(255)
13537 .addReg(NewFPSCRReg)
13538 .addImm(0)
13539 .addImm(0);
13540 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13541 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13542 return emitProbedAlloca(MI, BB);
13543 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13544 DebugLoc DL = MI.getDebugLoc();
13545 Register Src = MI.getOperand(2).getReg();
13546 Register Lo = MI.getOperand(0).getReg();
13547 Register Hi = MI.getOperand(1).getReg();
13548 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13549 .addDef(Lo)
13550 .addUse(Src, 0, PPC::sub_gp8_x1);
13551 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13552 .addDef(Hi)
13553 .addUse(Src, 0, PPC::sub_gp8_x0);
13554 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13555 MI.getOpcode() == PPC::STQX_PSEUDO) {
13556 DebugLoc DL = MI.getDebugLoc();
13557 // Ptr is used as the ptr_rc_no_r0 part
13558 // of LQ/STQ's memory operand and adding result of RA and RB,
13559 // so it has to be g8rc_and_g8rc_nox0.
13560 Register Ptr =
13561 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13562 Register Val = MI.getOperand(0).getReg();
13563 Register RA = MI.getOperand(1).getReg();
13564 Register RB = MI.getOperand(2).getReg();
13565 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13566 BuildMI(*BB, MI, DL,
13567 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13568 : TII->get(PPC::STQ))
13569 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13570 .addImm(0)
13571 .addReg(Ptr);
13572 } else {
13573 llvm_unreachable("Unexpected instr type to insert");
13574 }
13575
13576 MI.eraseFromParent(); // The pseudo instruction is gone now.
13577 return BB;
13578}
13579
13580//===----------------------------------------------------------------------===//
13581// Target Optimization Hooks
13582//===----------------------------------------------------------------------===//
13583
13584static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13585 // For the estimates, convergence is quadratic, so we essentially double the
13586 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13587 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13588 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13589 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13590 if (VT.getScalarType() == MVT::f64)
13591 RefinementSteps++;
13592 return RefinementSteps;
13593}
13594
13595SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13596 const DenormalMode &Mode) const {
13597 // We only have VSX Vector Test for software Square Root.
13598 EVT VT = Op.getValueType();
13599 if (!isTypeLegal(MVT::i1) ||
13600 (VT != MVT::f64 &&
13601 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13602 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13603
13604 SDLoc DL(Op);
13605 // The output register of FTSQRT is CR field.
13606 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13607 // ftsqrt BF,FRB
13608 // Let e_b be the unbiased exponent of the double-precision
13609 // floating-point operand in register FRB.
13610 // fe_flag is set to 1 if either of the following conditions occurs.
13611 // - The double-precision floating-point operand in register FRB is a zero,
13612 // a NaN, or an infinity, or a negative value.
13613 // - e_b is less than or equal to -970.
13614 // Otherwise fe_flag is set to 0.
13615 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13616 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13617 // exponent is less than -970)
13618 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13619 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13620 FTSQRT, SRIdxVal),
13621 0);
13622}
13623
13624SDValue
13625PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13626 SelectionDAG &DAG) const {
13627 // We only have VSX Vector Square Root.
13628 EVT VT = Op.getValueType();
13629 if (VT != MVT::f64 &&
13630 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13632
13633 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13634}
13635
13636SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13637 int Enabled, int &RefinementSteps,
13638 bool &UseOneConstNR,
13639 bool Reciprocal) const {
13640 EVT VT = Operand.getValueType();
13641 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13642 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13643 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13644 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13645 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13646 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13647
13648 // The Newton-Raphson computation with a single constant does not provide
13649 // enough accuracy on some CPUs.
13650 UseOneConstNR = !Subtarget.needsTwoConstNR();
13651 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13652 }
13653 return SDValue();
13654}
13655
13656SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13657 int Enabled,
13658 int &RefinementSteps) const {
13659 EVT VT = Operand.getValueType();
13660 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13661 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13662 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13663 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13664 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13665 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13666 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13667 }
13668 return SDValue();
13669}
13670
13671unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13672 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13673 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13674 // enabled for division), this functionality is redundant with the default
13675 // combiner logic (once the division -> reciprocal/multiply transformation
13676 // has taken place). As a result, this matters more for older cores than for
13677 // newer ones.
13678
13679 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13680 // reciprocal if there are two or more FDIVs (for embedded cores with only
13681 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13682 switch (Subtarget.getCPUDirective()) {
13683 default:
13684 return 3;
13685 case PPC::DIR_440:
13686 case PPC::DIR_A2:
13687 case PPC::DIR_E500:
13688 case PPC::DIR_E500mc:
13689 case PPC::DIR_E5500:
13690 return 2;
13691 }
13692}
13693
13694// isConsecutiveLSLoc needs to work even if all adds have not yet been
13695// collapsed, and so we need to look through chains of them.
13697 int64_t& Offset, SelectionDAG &DAG) {
13698 if (DAG.isBaseWithConstantOffset(Loc)) {
13699 Base = Loc.getOperand(0);
13700 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13701
13702 // The base might itself be a base plus an offset, and if so, accumulate
13703 // that as well.
13705 }
13706}
13707
13709 unsigned Bytes, int Dist,
13710 SelectionDAG &DAG) {
13711 if (VT.getSizeInBits() / 8 != Bytes)
13712 return false;
13713
13714 SDValue BaseLoc = Base->getBasePtr();
13715 if (Loc.getOpcode() == ISD::FrameIndex) {
13716 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13717 return false;
13719 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13720 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13721 int FS = MFI.getObjectSize(FI);
13722 int BFS = MFI.getObjectSize(BFI);
13723 if (FS != BFS || FS != (int)Bytes) return false;
13724 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13725 }
13726
13727 SDValue Base1 = Loc, Base2 = BaseLoc;
13728 int64_t Offset1 = 0, Offset2 = 0;
13729 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13730 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13731 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13732 return true;
13733
13734 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13735 const GlobalValue *GV1 = nullptr;
13736 const GlobalValue *GV2 = nullptr;
13737 Offset1 = 0;
13738 Offset2 = 0;
13739 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13740 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13741 if (isGA1 && isGA2 && GV1 == GV2)
13742 return Offset1 == (Offset2 + Dist*Bytes);
13743 return false;
13744}
13745
13746// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13747// not enforce equality of the chain operands.
13749 unsigned Bytes, int Dist,
13750 SelectionDAG &DAG) {
13751 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13752 EVT VT = LS->getMemoryVT();
13753 SDValue Loc = LS->getBasePtr();
13754 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13755 }
13756
13757 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13758 EVT VT;
13759 switch (N->getConstantOperandVal(1)) {
13760 default: return false;
13761 case Intrinsic::ppc_altivec_lvx:
13762 case Intrinsic::ppc_altivec_lvxl:
13763 case Intrinsic::ppc_vsx_lxvw4x:
13764 case Intrinsic::ppc_vsx_lxvw4x_be:
13765 VT = MVT::v4i32;
13766 break;
13767 case Intrinsic::ppc_vsx_lxvd2x:
13768 case Intrinsic::ppc_vsx_lxvd2x_be:
13769 VT = MVT::v2f64;
13770 break;
13771 case Intrinsic::ppc_altivec_lvebx:
13772 VT = MVT::i8;
13773 break;
13774 case Intrinsic::ppc_altivec_lvehx:
13775 VT = MVT::i16;
13776 break;
13777 case Intrinsic::ppc_altivec_lvewx:
13778 VT = MVT::i32;
13779 break;
13780 }
13781
13782 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13783 }
13784
13785 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13786 EVT VT;
13787 switch (N->getConstantOperandVal(1)) {
13788 default: return false;
13789 case Intrinsic::ppc_altivec_stvx:
13790 case Intrinsic::ppc_altivec_stvxl:
13791 case Intrinsic::ppc_vsx_stxvw4x:
13792 VT = MVT::v4i32;
13793 break;
13794 case Intrinsic::ppc_vsx_stxvd2x:
13795 VT = MVT::v2f64;
13796 break;
13797 case Intrinsic::ppc_vsx_stxvw4x_be:
13798 VT = MVT::v4i32;
13799 break;
13800 case Intrinsic::ppc_vsx_stxvd2x_be:
13801 VT = MVT::v2f64;
13802 break;
13803 case Intrinsic::ppc_altivec_stvebx:
13804 VT = MVT::i8;
13805 break;
13806 case Intrinsic::ppc_altivec_stvehx:
13807 VT = MVT::i16;
13808 break;
13809 case Intrinsic::ppc_altivec_stvewx:
13810 VT = MVT::i32;
13811 break;
13812 }
13813
13814 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13815 }
13816
13817 return false;
13818}
13819
13820// Return true is there is a nearyby consecutive load to the one provided
13821// (regardless of alignment). We search up and down the chain, looking though
13822// token factors and other loads (but nothing else). As a result, a true result
13823// indicates that it is safe to create a new consecutive load adjacent to the
13824// load provided.
13826 SDValue Chain = LD->getChain();
13827 EVT VT = LD->getMemoryVT();
13828
13829 SmallSet<SDNode *, 16> LoadRoots;
13830 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13831 SmallSet<SDNode *, 16> Visited;
13832
13833 // First, search up the chain, branching to follow all token-factor operands.
13834 // If we find a consecutive load, then we're done, otherwise, record all
13835 // nodes just above the top-level loads and token factors.
13836 while (!Queue.empty()) {
13837 SDNode *ChainNext = Queue.pop_back_val();
13838 if (!Visited.insert(ChainNext).second)
13839 continue;
13840
13841 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13842 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13843 return true;
13844
13845 if (!Visited.count(ChainLD->getChain().getNode()))
13846 Queue.push_back(ChainLD->getChain().getNode());
13847 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13848 for (const SDUse &O : ChainNext->ops())
13849 if (!Visited.count(O.getNode()))
13850 Queue.push_back(O.getNode());
13851 } else
13852 LoadRoots.insert(ChainNext);
13853 }
13854
13855 // Second, search down the chain, starting from the top-level nodes recorded
13856 // in the first phase. These top-level nodes are the nodes just above all
13857 // loads and token factors. Starting with their uses, recursively look though
13858 // all loads (just the chain uses) and token factors to find a consecutive
13859 // load.
13860 Visited.clear();
13861 Queue.clear();
13862
13863 for (SDNode *I : LoadRoots) {
13864 Queue.push_back(I);
13865
13866 while (!Queue.empty()) {
13867 SDNode *LoadRoot = Queue.pop_back_val();
13868 if (!Visited.insert(LoadRoot).second)
13869 continue;
13870
13871 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13872 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13873 return true;
13874
13875 for (SDNode *U : LoadRoot->uses())
13876 if (((isa<MemSDNode>(U) &&
13877 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13878 U->getOpcode() == ISD::TokenFactor) &&
13879 !Visited.count(U))
13880 Queue.push_back(U);
13881 }
13882 }
13883
13884 return false;
13885}
13886
13887/// This function is called when we have proved that a SETCC node can be replaced
13888/// by subtraction (and other supporting instructions) so that the result of
13889/// comparison is kept in a GPR instead of CR. This function is purely for
13890/// codegen purposes and has some flags to guide the codegen process.
13891static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13892 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13893 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13894
13895 // Zero extend the operands to the largest legal integer. Originally, they
13896 // must be of a strictly smaller size.
13897 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13898 DAG.getConstant(Size, DL, MVT::i32));
13899 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13900 DAG.getConstant(Size, DL, MVT::i32));
13901
13902 // Swap if needed. Depends on the condition code.
13903 if (Swap)
13904 std::swap(Op0, Op1);
13905
13906 // Subtract extended integers.
13907 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13908
13909 // Move the sign bit to the least significant position and zero out the rest.
13910 // Now the least significant bit carries the result of original comparison.
13911 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13912 DAG.getConstant(Size - 1, DL, MVT::i32));
13913 auto Final = Shifted;
13914
13915 // Complement the result if needed. Based on the condition code.
13916 if (Complement)
13917 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13918 DAG.getConstant(1, DL, MVT::i64));
13919
13920 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13921}
13922
13923SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13924 DAGCombinerInfo &DCI) const {
13925 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13926
13927 SelectionDAG &DAG = DCI.DAG;
13928 SDLoc DL(N);
13929
13930 // Size of integers being compared has a critical role in the following
13931 // analysis, so we prefer to do this when all types are legal.
13932 if (!DCI.isAfterLegalizeDAG())
13933 return SDValue();
13934
13935 // If all users of SETCC extend its value to a legal integer type
13936 // then we replace SETCC with a subtraction
13937 for (const SDNode *U : N->uses())
13938 if (U->getOpcode() != ISD::ZERO_EXTEND)
13939 return SDValue();
13940
13941 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13942 auto OpSize = N->getOperand(0).getValueSizeInBits();
13943
13945
13946 if (OpSize < Size) {
13947 switch (CC) {
13948 default: break;
13949 case ISD::SETULT:
13950 return generateEquivalentSub(N, Size, false, false, DL, DAG);
13951 case ISD::SETULE:
13952 return generateEquivalentSub(N, Size, true, true, DL, DAG);
13953 case ISD::SETUGT:
13954 return generateEquivalentSub(N, Size, false, true, DL, DAG);
13955 case ISD::SETUGE:
13956 return generateEquivalentSub(N, Size, true, false, DL, DAG);
13957 }
13958 }
13959
13960 return SDValue();
13961}
13962
13963SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13964 DAGCombinerInfo &DCI) const {
13965 SelectionDAG &DAG = DCI.DAG;
13966 SDLoc dl(N);
13967
13968 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13969 // If we're tracking CR bits, we need to be careful that we don't have:
13970 // trunc(binary-ops(zext(x), zext(y)))
13971 // or
13972 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13973 // such that we're unnecessarily moving things into GPRs when it would be
13974 // better to keep them in CR bits.
13975
13976 // Note that trunc here can be an actual i1 trunc, or can be the effective
13977 // truncation that comes from a setcc or select_cc.
13978 if (N->getOpcode() == ISD::TRUNCATE &&
13979 N->getValueType(0) != MVT::i1)
13980 return SDValue();
13981
13982 if (N->getOperand(0).getValueType() != MVT::i32 &&
13983 N->getOperand(0).getValueType() != MVT::i64)
13984 return SDValue();
13985
13986 if (N->getOpcode() == ISD::SETCC ||
13987 N->getOpcode() == ISD::SELECT_CC) {
13988 // If we're looking at a comparison, then we need to make sure that the
13989 // high bits (all except for the first) don't matter the result.
13991 cast<CondCodeSDNode>(N->getOperand(
13992 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13993 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13994
13996 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13997 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13998 return SDValue();
13999 } else if (ISD::isUnsignedIntSetCC(CC)) {
14000 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14001 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14002 !DAG.MaskedValueIsZero(N->getOperand(1),
14003 APInt::getHighBitsSet(OpBits, OpBits-1)))
14004 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14005 : SDValue());
14006 } else {
14007 // This is neither a signed nor an unsigned comparison, just make sure
14008 // that the high bits are equal.
14009 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14010 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14011
14012 // We don't really care about what is known about the first bit (if
14013 // anything), so pretend that it is known zero for both to ensure they can
14014 // be compared as constants.
14015 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14016 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14017
14018 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14019 Op1Known.getConstant() != Op2Known.getConstant())
14020 return SDValue();
14021 }
14022 }
14023
14024 // We now know that the higher-order bits are irrelevant, we just need to
14025 // make sure that all of the intermediate operations are bit operations, and
14026 // all inputs are extensions.
14027 if (N->getOperand(0).getOpcode() != ISD::AND &&
14028 N->getOperand(0).getOpcode() != ISD::OR &&
14029 N->getOperand(0).getOpcode() != ISD::XOR &&
14030 N->getOperand(0).getOpcode() != ISD::SELECT &&
14031 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14032 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14033 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14034 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14035 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14036 return SDValue();
14037
14038 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14039 N->getOperand(1).getOpcode() != ISD::AND &&
14040 N->getOperand(1).getOpcode() != ISD::OR &&
14041 N->getOperand(1).getOpcode() != ISD::XOR &&
14042 N->getOperand(1).getOpcode() != ISD::SELECT &&
14043 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14044 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14045 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14046 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14047 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14048 return SDValue();
14049
14051 SmallVector<SDValue, 8> BinOps, PromOps;
14053
14054 for (unsigned i = 0; i < 2; ++i) {
14055 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14056 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14057 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14058 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14059 isa<ConstantSDNode>(N->getOperand(i)))
14060 Inputs.push_back(N->getOperand(i));
14061 else
14062 BinOps.push_back(N->getOperand(i));
14063
14064 if (N->getOpcode() == ISD::TRUNCATE)
14065 break;
14066 }
14067
14068 // Visit all inputs, collect all binary operations (and, or, xor and
14069 // select) that are all fed by extensions.
14070 while (!BinOps.empty()) {
14071 SDValue BinOp = BinOps.pop_back_val();
14072
14073 if (!Visited.insert(BinOp.getNode()).second)
14074 continue;
14075
14076 PromOps.push_back(BinOp);
14077
14078 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14079 // The condition of the select is not promoted.
14080 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14081 continue;
14082 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14083 continue;
14084
14085 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14086 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14087 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14088 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14089 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14090 Inputs.push_back(BinOp.getOperand(i));
14091 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14092 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14093 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14094 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14095 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14096 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14097 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14098 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14099 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14100 BinOps.push_back(BinOp.getOperand(i));
14101 } else {
14102 // We have an input that is not an extension or another binary
14103 // operation; we'll abort this transformation.
14104 return SDValue();
14105 }
14106 }
14107 }
14108
14109 // Make sure that this is a self-contained cluster of operations (which
14110 // is not quite the same thing as saying that everything has only one
14111 // use).
14112 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14113 if (isa<ConstantSDNode>(Inputs[i]))
14114 continue;
14115
14116 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14117 if (User != N && !Visited.count(User))
14118 return SDValue();
14119
14120 // Make sure that we're not going to promote the non-output-value
14121 // operand(s) or SELECT or SELECT_CC.
14122 // FIXME: Although we could sometimes handle this, and it does occur in
14123 // practice that one of the condition inputs to the select is also one of
14124 // the outputs, we currently can't deal with this.
14125 if (User->getOpcode() == ISD::SELECT) {
14126 if (User->getOperand(0) == Inputs[i])
14127 return SDValue();
14128 } else if (User->getOpcode() == ISD::SELECT_CC) {
14129 if (User->getOperand(0) == Inputs[i] ||
14130 User->getOperand(1) == Inputs[i])
14131 return SDValue();
14132 }
14133 }
14134 }
14135
14136 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14137 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14138 if (User != N && !Visited.count(User))
14139 return SDValue();
14140
14141 // Make sure that we're not going to promote the non-output-value
14142 // operand(s) or SELECT or SELECT_CC.
14143 // FIXME: Although we could sometimes handle this, and it does occur in
14144 // practice that one of the condition inputs to the select is also one of
14145 // the outputs, we currently can't deal with this.
14146 if (User->getOpcode() == ISD::SELECT) {
14147 if (User->getOperand(0) == PromOps[i])
14148 return SDValue();
14149 } else if (User->getOpcode() == ISD::SELECT_CC) {
14150 if (User->getOperand(0) == PromOps[i] ||
14151 User->getOperand(1) == PromOps[i])
14152 return SDValue();
14153 }
14154 }
14155 }
14156
14157 // Replace all inputs with the extension operand.
14158 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14159 // Constants may have users outside the cluster of to-be-promoted nodes,
14160 // and so we need to replace those as we do the promotions.
14161 if (isa<ConstantSDNode>(Inputs[i]))
14162 continue;
14163 else
14164 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14165 }
14166
14167 std::list<HandleSDNode> PromOpHandles;
14168 for (auto &PromOp : PromOps)
14169 PromOpHandles.emplace_back(PromOp);
14170
14171 // Replace all operations (these are all the same, but have a different
14172 // (i1) return type). DAG.getNode will validate that the types of
14173 // a binary operator match, so go through the list in reverse so that
14174 // we've likely promoted both operands first. Any intermediate truncations or
14175 // extensions disappear.
14176 while (!PromOpHandles.empty()) {
14177 SDValue PromOp = PromOpHandles.back().getValue();
14178 PromOpHandles.pop_back();
14179
14180 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14181 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14182 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14183 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14184 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14185 PromOp.getOperand(0).getValueType() != MVT::i1) {
14186 // The operand is not yet ready (see comment below).
14187 PromOpHandles.emplace_front(PromOp);
14188 continue;
14189 }
14190
14191 SDValue RepValue = PromOp.getOperand(0);
14192 if (isa<ConstantSDNode>(RepValue))
14193 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14194
14195 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14196 continue;
14197 }
14198
14199 unsigned C;
14200 switch (PromOp.getOpcode()) {
14201 default: C = 0; break;
14202 case ISD::SELECT: C = 1; break;
14203 case ISD::SELECT_CC: C = 2; break;
14204 }
14205
14206 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14207 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14208 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14209 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14210 // The to-be-promoted operands of this node have not yet been
14211 // promoted (this should be rare because we're going through the
14212 // list backward, but if one of the operands has several users in
14213 // this cluster of to-be-promoted nodes, it is possible).
14214 PromOpHandles.emplace_front(PromOp);
14215 continue;
14216 }
14217
14219 PromOp.getNode()->op_end());
14220
14221 // If there are any constant inputs, make sure they're replaced now.
14222 for (unsigned i = 0; i < 2; ++i)
14223 if (isa<ConstantSDNode>(Ops[C+i]))
14224 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14225
14226 DAG.ReplaceAllUsesOfValueWith(PromOp,
14227 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14228 }
14229
14230 // Now we're left with the initial truncation itself.
14231 if (N->getOpcode() == ISD::TRUNCATE)
14232 return N->getOperand(0);
14233
14234 // Otherwise, this is a comparison. The operands to be compared have just
14235 // changed type (to i1), but everything else is the same.
14236 return SDValue(N, 0);
14237}
14238
14239SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14240 DAGCombinerInfo &DCI) const {
14241 SelectionDAG &DAG = DCI.DAG;
14242 SDLoc dl(N);
14243
14244 // If we're tracking CR bits, we need to be careful that we don't have:
14245 // zext(binary-ops(trunc(x), trunc(y)))
14246 // or
14247 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14248 // such that we're unnecessarily moving things into CR bits that can more
14249 // efficiently stay in GPRs. Note that if we're not certain that the high
14250 // bits are set as required by the final extension, we still may need to do
14251 // some masking to get the proper behavior.
14252
14253 // This same functionality is important on PPC64 when dealing with
14254 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14255 // the return values of functions. Because it is so similar, it is handled
14256 // here as well.
14257
14258 if (N->getValueType(0) != MVT::i32 &&
14259 N->getValueType(0) != MVT::i64)
14260 return SDValue();
14261
14262 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14263 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14264 return SDValue();
14265
14266 if (N->getOperand(0).getOpcode() != ISD::AND &&
14267 N->getOperand(0).getOpcode() != ISD::OR &&
14268 N->getOperand(0).getOpcode() != ISD::XOR &&
14269 N->getOperand(0).getOpcode() != ISD::SELECT &&
14270 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14271 return SDValue();
14272
14274 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14276
14277 // Visit all inputs, collect all binary operations (and, or, xor and
14278 // select) that are all fed by truncations.
14279 while (!BinOps.empty()) {
14280 SDValue BinOp = BinOps.pop_back_val();
14281
14282 if (!Visited.insert(BinOp.getNode()).second)
14283 continue;
14284
14285 PromOps.push_back(BinOp);
14286
14287 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14288 // The condition of the select is not promoted.
14289 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14290 continue;
14291 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14292 continue;
14293
14294 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14295 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14296 Inputs.push_back(BinOp.getOperand(i));
14297 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14298 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14299 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14300 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14301 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14302 BinOps.push_back(BinOp.getOperand(i));
14303 } else {
14304 // We have an input that is not a truncation or another binary
14305 // operation; we'll abort this transformation.
14306 return SDValue();
14307 }
14308 }
14309 }
14310
14311 // The operands of a select that must be truncated when the select is
14312 // promoted because the operand is actually part of the to-be-promoted set.
14313 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14314
14315 // Make sure that this is a self-contained cluster of operations (which
14316 // is not quite the same thing as saying that everything has only one
14317 // use).
14318 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14319 if (isa<ConstantSDNode>(Inputs[i]))
14320 continue;
14321
14322 for (SDNode *User : Inputs[i].getNode()->uses()) {
14323 if (User != N && !Visited.count(User))
14324 return SDValue();
14325
14326 // If we're going to promote the non-output-value operand(s) or SELECT or
14327 // SELECT_CC, record them for truncation.
14328 if (User->getOpcode() == ISD::SELECT) {
14329 if (User->getOperand(0) == Inputs[i])
14330 SelectTruncOp[0].insert(std::make_pair(User,
14331 User->getOperand(0).getValueType()));
14332 } else if (User->getOpcode() == ISD::SELECT_CC) {
14333 if (User->getOperand(0) == Inputs[i])
14334 SelectTruncOp[0].insert(std::make_pair(User,
14335 User->getOperand(0).getValueType()));
14336 if (User->getOperand(1) == Inputs[i])
14337 SelectTruncOp[1].insert(std::make_pair(User,
14338 User->getOperand(1).getValueType()));
14339 }
14340 }
14341 }
14342
14343 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14344 for (SDNode *User : PromOps[i].getNode()->uses()) {
14345 if (User != N && !Visited.count(User))
14346 return SDValue();
14347
14348 // If we're going to promote the non-output-value operand(s) or SELECT or
14349 // SELECT_CC, record them for truncation.
14350 if (User->getOpcode() == ISD::SELECT) {
14351 if (User->getOperand(0) == PromOps[i])
14352 SelectTruncOp[0].insert(std::make_pair(User,
14353 User->getOperand(0).getValueType()));
14354 } else if (User->getOpcode() == ISD::SELECT_CC) {
14355 if (User->getOperand(0) == PromOps[i])
14356 SelectTruncOp[0].insert(std::make_pair(User,
14357 User->getOperand(0).getValueType()));
14358 if (User->getOperand(1) == PromOps[i])
14359 SelectTruncOp[1].insert(std::make_pair(User,
14360 User->getOperand(1).getValueType()));
14361 }
14362 }
14363 }
14364
14365 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14366 bool ReallyNeedsExt = false;
14367 if (N->getOpcode() != ISD::ANY_EXTEND) {
14368 // If all of the inputs are not already sign/zero extended, then
14369 // we'll still need to do that at the end.
14370 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14371 if (isa<ConstantSDNode>(Inputs[i]))
14372 continue;
14373
14374 unsigned OpBits =
14375 Inputs[i].getOperand(0).getValueSizeInBits();
14376 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14377
14378 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14379 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14380 APInt::getHighBitsSet(OpBits,
14381 OpBits-PromBits))) ||
14382 (N->getOpcode() == ISD::SIGN_EXTEND &&
14383 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14384 (OpBits-(PromBits-1)))) {
14385 ReallyNeedsExt = true;
14386 break;
14387 }
14388 }
14389 }
14390
14391 // Replace all inputs, either with the truncation operand, or a
14392 // truncation or extension to the final output type.
14393 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14394 // Constant inputs need to be replaced with the to-be-promoted nodes that
14395 // use them because they might have users outside of the cluster of
14396 // promoted nodes.
14397 if (isa<ConstantSDNode>(Inputs[i]))
14398 continue;
14399
14400 SDValue InSrc = Inputs[i].getOperand(0);
14401 if (Inputs[i].getValueType() == N->getValueType(0))
14402 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14403 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14404 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14405 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14406 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14407 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14408 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14409 else
14410 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14411 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14412 }
14413
14414 std::list<HandleSDNode> PromOpHandles;
14415 for (auto &PromOp : PromOps)
14416 PromOpHandles.emplace_back(PromOp);
14417
14418 // Replace all operations (these are all the same, but have a different
14419 // (promoted) return type). DAG.getNode will validate that the types of
14420 // a binary operator match, so go through the list in reverse so that
14421 // we've likely promoted both operands first.
14422 while (!PromOpHandles.empty()) {
14423 SDValue PromOp = PromOpHandles.back().getValue();
14424 PromOpHandles.pop_back();
14425
14426 unsigned C;
14427 switch (PromOp.getOpcode()) {
14428 default: C = 0; break;
14429 case ISD::SELECT: C = 1; break;
14430 case ISD::SELECT_CC: C = 2; break;
14431 }
14432
14433 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14434 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14435 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14436 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14437 // The to-be-promoted operands of this node have not yet been
14438 // promoted (this should be rare because we're going through the
14439 // list backward, but if one of the operands has several users in
14440 // this cluster of to-be-promoted nodes, it is possible).
14441 PromOpHandles.emplace_front(PromOp);
14442 continue;
14443 }
14444
14445 // For SELECT and SELECT_CC nodes, we do a similar check for any
14446 // to-be-promoted comparison inputs.
14447 if (PromOp.getOpcode() == ISD::SELECT ||
14448 PromOp.getOpcode() == ISD::SELECT_CC) {
14449 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14450 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14451 (SelectTruncOp[1].count(PromOp.getNode()) &&
14452 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14453 PromOpHandles.emplace_front(PromOp);
14454 continue;
14455 }
14456 }
14457
14459 PromOp.getNode()->op_end());
14460
14461 // If this node has constant inputs, then they'll need to be promoted here.
14462 for (unsigned i = 0; i < 2; ++i) {
14463 if (!isa<ConstantSDNode>(Ops[C+i]))
14464 continue;
14465 if (Ops[C+i].getValueType() == N->getValueType(0))
14466 continue;
14467
14468 if (N->getOpcode() == ISD::SIGN_EXTEND)
14469 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14470 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14471 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14472 else
14473 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14474 }
14475
14476 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14477 // truncate them again to the original value type.
14478 if (PromOp.getOpcode() == ISD::SELECT ||
14479 PromOp.getOpcode() == ISD::SELECT_CC) {
14480 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14481 if (SI0 != SelectTruncOp[0].end())
14482 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14483 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14484 if (SI1 != SelectTruncOp[1].end())
14485 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14486 }
14487
14488 DAG.ReplaceAllUsesOfValueWith(PromOp,
14489 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14490 }
14491
14492 // Now we're left with the initial extension itself.
14493 if (!ReallyNeedsExt)
14494 return N->getOperand(0);
14495
14496 // To zero extend, just mask off everything except for the first bit (in the
14497 // i1 case).
14498 if (N->getOpcode() == ISD::ZERO_EXTEND)
14499 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14501 N->getValueSizeInBits(0), PromBits),
14502 dl, N->getValueType(0)));
14503
14504 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14505 "Invalid extension type");
14506 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14507 SDValue ShiftCst =
14508 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14509 return DAG.getNode(
14510 ISD::SRA, dl, N->getValueType(0),
14511 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14512 ShiftCst);
14513}
14514
14515SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14516 DAGCombinerInfo &DCI) const {
14517 assert(N->getOpcode() == ISD::SETCC &&
14518 "Should be called with a SETCC node");
14519
14520 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14521 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14522 SDValue LHS = N->getOperand(0);
14523 SDValue RHS = N->getOperand(1);
14524
14525 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14526 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14527 LHS.hasOneUse())
14528 std::swap(LHS, RHS);
14529
14530 // x == 0-y --> x+y == 0
14531 // x != 0-y --> x+y != 0
14532 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14533 RHS.hasOneUse()) {
14534 SDLoc DL(N);
14535 SelectionDAG &DAG = DCI.DAG;
14536 EVT VT = N->getValueType(0);
14537 EVT OpVT = LHS.getValueType();
14538 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14539 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14540 }
14541 }
14542
14543 return DAGCombineTruncBoolExt(N, DCI);
14544}
14545
14546// Is this an extending load from an f32 to an f64?
14547static bool isFPExtLoad(SDValue Op) {
14548 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14549 return LD->getExtensionType() == ISD::EXTLOAD &&
14550 Op.getValueType() == MVT::f64;
14551 return false;
14552}
14553
14554/// Reduces the number of fp-to-int conversion when building a vector.
14555///
14556/// If this vector is built out of floating to integer conversions,
14557/// transform it to a vector built out of floating point values followed by a
14558/// single floating to integer conversion of the vector.
14559/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14560/// becomes (fptosi (build_vector ($A, $B, ...)))
14561SDValue PPCTargetLowering::
14562combineElementTruncationToVectorTruncation(SDNode *N,
14563 DAGCombinerInfo &DCI) const {
14564 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14565 "Should be called with a BUILD_VECTOR node");
14566
14567 SelectionDAG &DAG = DCI.DAG;
14568 SDLoc dl(N);
14569
14570 SDValue FirstInput = N->getOperand(0);
14571 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14572 "The input operand must be an fp-to-int conversion.");
14573
14574 // This combine happens after legalization so the fp_to_[su]i nodes are
14575 // already converted to PPCSISD nodes.
14576 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14577 if (FirstConversion == PPCISD::FCTIDZ ||
14578 FirstConversion == PPCISD::FCTIDUZ ||
14579 FirstConversion == PPCISD::FCTIWZ ||
14580 FirstConversion == PPCISD::FCTIWUZ) {
14581 bool IsSplat = true;
14582 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14583 FirstConversion == PPCISD::FCTIWUZ;
14584 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14586 EVT TargetVT = N->getValueType(0);
14587 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14588 SDValue NextOp = N->getOperand(i);
14589 if (NextOp.getOpcode() != PPCISD::MFVSR)
14590 return SDValue();
14591 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14592 if (NextConversion != FirstConversion)
14593 return SDValue();
14594 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14595 // This is not valid if the input was originally double precision. It is
14596 // also not profitable to do unless this is an extending load in which
14597 // case doing this combine will allow us to combine consecutive loads.
14598 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14599 return SDValue();
14600 if (N->getOperand(i) != FirstInput)
14601 IsSplat = false;
14602 }
14603
14604 // If this is a splat, we leave it as-is since there will be only a single
14605 // fp-to-int conversion followed by a splat of the integer. This is better
14606 // for 32-bit and smaller ints and neutral for 64-bit ints.
14607 if (IsSplat)
14608 return SDValue();
14609
14610 // Now that we know we have the right type of node, get its operands
14611 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14612 SDValue In = N->getOperand(i).getOperand(0);
14613 if (Is32Bit) {
14614 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14615 // here, we know that all inputs are extending loads so this is safe).
14616 if (In.isUndef())
14617 Ops.push_back(DAG.getUNDEF(SrcVT));
14618 else {
14619 SDValue Trunc =
14620 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14621 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14622 Ops.push_back(Trunc);
14623 }
14624 } else
14625 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14626 }
14627
14628 unsigned Opcode;
14629 if (FirstConversion == PPCISD::FCTIDZ ||
14630 FirstConversion == PPCISD::FCTIWZ)
14631 Opcode = ISD::FP_TO_SINT;
14632 else
14633 Opcode = ISD::FP_TO_UINT;
14634
14635 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14636 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14637 return DAG.getNode(Opcode, dl, TargetVT, BV);
14638 }
14639 return SDValue();
14640}
14641
14642/// Reduce the number of loads when building a vector.
14643///
14644/// Building a vector out of multiple loads can be converted to a load
14645/// of the vector type if the loads are consecutive. If the loads are
14646/// consecutive but in descending order, a shuffle is added at the end
14647/// to reorder the vector.
14649 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14650 "Should be called with a BUILD_VECTOR node");
14651
14652 SDLoc dl(N);
14653
14654 // Return early for non byte-sized type, as they can't be consecutive.
14655 if (!N->getValueType(0).getVectorElementType().isByteSized())
14656 return SDValue();
14657
14658 bool InputsAreConsecutiveLoads = true;
14659 bool InputsAreReverseConsecutive = true;
14660 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14661 SDValue FirstInput = N->getOperand(0);
14662 bool IsRoundOfExtLoad = false;
14663 LoadSDNode *FirstLoad = nullptr;
14664
14665 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14666 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14667 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14668 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14669 }
14670 // Not a build vector of (possibly fp_rounded) loads.
14671 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14672 N->getNumOperands() == 1)
14673 return SDValue();
14674
14675 if (!IsRoundOfExtLoad)
14676 FirstLoad = cast<LoadSDNode>(FirstInput);
14677
14679 InputLoads.push_back(FirstLoad);
14680 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14681 // If any inputs are fp_round(extload), they all must be.
14682 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14683 return SDValue();
14684
14685 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14686 N->getOperand(i);
14687 if (NextInput.getOpcode() != ISD::LOAD)
14688 return SDValue();
14689
14690 SDValue PreviousInput =
14691 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14692 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14693 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14694
14695 // If any inputs are fp_round(extload), they all must be.
14696 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14697 return SDValue();
14698
14699 // We only care about regular loads. The PPC-specific load intrinsics
14700 // will not lead to a merge opportunity.
14701 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14702 InputsAreConsecutiveLoads = false;
14703 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14704 InputsAreReverseConsecutive = false;
14705
14706 // Exit early if the loads are neither consecutive nor reverse consecutive.
14707 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14708 return SDValue();
14709 InputLoads.push_back(LD2);
14710 }
14711
14712 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14713 "The loads cannot be both consecutive and reverse consecutive.");
14714
14715 SDValue WideLoad;
14716 SDValue ReturnSDVal;
14717 if (InputsAreConsecutiveLoads) {
14718 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14719 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14720 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14721 FirstLoad->getAlign());
14722 ReturnSDVal = WideLoad;
14723 } else if (InputsAreReverseConsecutive) {
14724 LoadSDNode *LastLoad = InputLoads.back();
14725 assert(LastLoad && "Input needs to be a LoadSDNode.");
14726 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14727 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14728 LastLoad->getAlign());
14730 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14731 Ops.push_back(i);
14732
14733 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14734 DAG.getUNDEF(N->getValueType(0)), Ops);
14735 } else
14736 return SDValue();
14737
14738 for (auto *LD : InputLoads)
14739 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14740 return ReturnSDVal;
14741}
14742
14743// This function adds the required vector_shuffle needed to get
14744// the elements of the vector extract in the correct position
14745// as specified by the CorrectElems encoding.
14747 SDValue Input, uint64_t Elems,
14748 uint64_t CorrectElems) {
14749 SDLoc dl(N);
14750
14751 unsigned NumElems = Input.getValueType().getVectorNumElements();
14752 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14753
14754 // Knowing the element indices being extracted from the original
14755 // vector and the order in which they're being inserted, just put
14756 // them at element indices required for the instruction.
14757 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14758 if (DAG.getDataLayout().isLittleEndian())
14759 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14760 else
14761 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14762 CorrectElems = CorrectElems >> 8;
14763 Elems = Elems >> 8;
14764 }
14765
14766 SDValue Shuffle =
14767 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14768 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14769
14770 EVT VT = N->getValueType(0);
14771 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14772
14773 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14776 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14777 DAG.getValueType(ExtVT));
14778}
14779
14780// Look for build vector patterns where input operands come from sign
14781// extended vector_extract elements of specific indices. If the correct indices
14782// aren't used, add a vector shuffle to fix up the indices and create
14783// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14784// during instruction selection.
14786 // This array encodes the indices that the vector sign extend instructions
14787 // extract from when extending from one type to another for both BE and LE.
14788 // The right nibble of each byte corresponds to the LE incides.
14789 // and the left nibble of each byte corresponds to the BE incides.
14790 // For example: 0x3074B8FC byte->word
14791 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14792 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14793 // For example: 0x000070F8 byte->double word
14794 // For LE: the allowed indices are: 0x0,0x8
14795 // For BE: the allowed indices are: 0x7,0xF
14796 uint64_t TargetElems[] = {
14797 0x3074B8FC, // b->w
14798 0x000070F8, // b->d
14799 0x10325476, // h->w
14800 0x00003074, // h->d
14801 0x00001032, // w->d
14802 };
14803
14804 uint64_t Elems = 0;
14805 int Index;
14806 SDValue Input;
14807
14808 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14809 if (!Op)
14810 return false;
14811 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14812 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14813 return false;
14814
14815 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14816 // of the right width.
14817 SDValue Extract = Op.getOperand(0);
14818 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14819 Extract = Extract.getOperand(0);
14820 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14821 return false;
14822
14823 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14824 if (!ExtOp)
14825 return false;
14826
14827 Index = ExtOp->getZExtValue();
14828 if (Input && Input != Extract.getOperand(0))
14829 return false;
14830
14831 if (!Input)
14832 Input = Extract.getOperand(0);
14833
14834 Elems = Elems << 8;
14835 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14836 Elems |= Index;
14837
14838 return true;
14839 };
14840
14841 // If the build vector operands aren't sign extended vector extracts,
14842 // of the same input vector, then return.
14843 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14844 if (!isSExtOfVecExtract(N->getOperand(i))) {
14845 return SDValue();
14846 }
14847 }
14848
14849 // If the vector extract indicies are not correct, add the appropriate
14850 // vector_shuffle.
14851 int TgtElemArrayIdx;
14852 int InputSize = Input.getValueType().getScalarSizeInBits();
14853 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14854 if (InputSize + OutputSize == 40)
14855 TgtElemArrayIdx = 0;
14856 else if (InputSize + OutputSize == 72)
14857 TgtElemArrayIdx = 1;
14858 else if (InputSize + OutputSize == 48)
14859 TgtElemArrayIdx = 2;
14860 else if (InputSize + OutputSize == 80)
14861 TgtElemArrayIdx = 3;
14862 else if (InputSize + OutputSize == 96)
14863 TgtElemArrayIdx = 4;
14864 else
14865 return SDValue();
14866
14867 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14868 CorrectElems = DAG.getDataLayout().isLittleEndian()
14869 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14870 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14871 if (Elems != CorrectElems) {
14872 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14873 }
14874
14875 // Regular lowering will catch cases where a shuffle is not needed.
14876 return SDValue();
14877}
14878
14879// Look for the pattern of a load from a narrow width to i128, feeding
14880// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14881// (LXVRZX). This node represents a zero extending load that will be matched
14882// to the Load VSX Vector Rightmost instructions.
14884 SDLoc DL(N);
14885
14886 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14887 if (N->getValueType(0) != MVT::v1i128)
14888 return SDValue();
14889
14890 SDValue Operand = N->getOperand(0);
14891 // Proceed with the transformation if the operand to the BUILD_VECTOR
14892 // is a load instruction.
14893 if (Operand.getOpcode() != ISD::LOAD)
14894 return SDValue();
14895
14896 auto *LD = cast<LoadSDNode>(Operand);
14897 EVT MemoryType = LD->getMemoryVT();
14898
14899 // This transformation is only valid if the we are loading either a byte,
14900 // halfword, word, or doubleword.
14901 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14902 MemoryType == MVT::i32 || MemoryType == MVT::i64;
14903
14904 // Ensure that the load from the narrow width is being zero extended to i128.
14905 if (!ValidLDType ||
14906 (LD->getExtensionType() != ISD::ZEXTLOAD &&
14907 LD->getExtensionType() != ISD::EXTLOAD))
14908 return SDValue();
14909
14910 SDValue LoadOps[] = {
14911 LD->getChain(), LD->getBasePtr(),
14912 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14913
14915 DAG.getVTList(MVT::v1i128, MVT::Other),
14916 LoadOps, MemoryType, LD->getMemOperand());
14917}
14918
14919SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14920 DAGCombinerInfo &DCI) const {
14921 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14922 "Should be called with a BUILD_VECTOR node");
14923
14924 SelectionDAG &DAG = DCI.DAG;
14925 SDLoc dl(N);
14926
14927 if (!Subtarget.hasVSX())
14928 return SDValue();
14929
14930 // The target independent DAG combiner will leave a build_vector of
14931 // float-to-int conversions intact. We can generate MUCH better code for
14932 // a float-to-int conversion of a vector of floats.
14933 SDValue FirstInput = N->getOperand(0);
14934 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14935 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14936 if (Reduced)
14937 return Reduced;
14938 }
14939
14940 // If we're building a vector out of consecutive loads, just load that
14941 // vector type.
14942 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14943 if (Reduced)
14944 return Reduced;
14945
14946 // If we're building a vector out of extended elements from another vector
14947 // we have P9 vector integer extend instructions. The code assumes legal
14948 // input types (i.e. it can't handle things like v4i16) so do not run before
14949 // legalization.
14950 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14951 Reduced = combineBVOfVecSExt(N, DAG);
14952 if (Reduced)
14953 return Reduced;
14954 }
14955
14956 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14957 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14958 // is a load from <valid narrow width> to i128.
14959 if (Subtarget.isISA3_1()) {
14960 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14961 if (BVOfZLoad)
14962 return BVOfZLoad;
14963 }
14964
14965 if (N->getValueType(0) != MVT::v2f64)
14966 return SDValue();
14967
14968 // Looking for:
14969 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14970 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14971 FirstInput.getOpcode() != ISD::UINT_TO_FP)
14972 return SDValue();
14973 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14974 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14975 return SDValue();
14976 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14977 return SDValue();
14978
14979 SDValue Ext1 = FirstInput.getOperand(0);
14980 SDValue Ext2 = N->getOperand(1).getOperand(0);
14981 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14983 return SDValue();
14984
14985 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14986 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14987 if (!Ext1Op || !Ext2Op)
14988 return SDValue();
14989 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14990 Ext1.getOperand(0) != Ext2.getOperand(0))
14991 return SDValue();
14992
14993 int FirstElem = Ext1Op->getZExtValue();
14994 int SecondElem = Ext2Op->getZExtValue();
14995 int SubvecIdx;
14996 if (FirstElem == 0 && SecondElem == 1)
14997 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14998 else if (FirstElem == 2 && SecondElem == 3)
14999 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15000 else
15001 return SDValue();
15002
15003 SDValue SrcVec = Ext1.getOperand(0);
15004 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15006 return DAG.getNode(NodeType, dl, MVT::v2f64,
15007 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15008}
15009
15010SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15011 DAGCombinerInfo &DCI) const {
15012 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15013 N->getOpcode() == ISD::UINT_TO_FP) &&
15014 "Need an int -> FP conversion node here");
15015
15016 if (useSoftFloat() || !Subtarget.has64BitSupport())
15017 return SDValue();
15018
15019 SelectionDAG &DAG = DCI.DAG;
15020 SDLoc dl(N);
15021 SDValue Op(N, 0);
15022
15023 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15024 // from the hardware.
15025 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15026 return SDValue();
15027 if (!Op.getOperand(0).getValueType().isSimple())
15028 return SDValue();
15029 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15030 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15031 return SDValue();
15032
15033 SDValue FirstOperand(Op.getOperand(0));
15034 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15035 (FirstOperand.getValueType() == MVT::i8 ||
15036 FirstOperand.getValueType() == MVT::i16);
15037 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15038 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15039 bool DstDouble = Op.getValueType() == MVT::f64;
15040 unsigned ConvOp = Signed ?
15041 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15042 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15043 SDValue WidthConst =
15044 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15045 dl, false);
15046 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15047 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15049 DAG.getVTList(MVT::f64, MVT::Other),
15050 Ops, MVT::i8, LDN->getMemOperand());
15051 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15052
15053 // For signed conversion, we need to sign-extend the value in the VSR
15054 if (Signed) {
15055 SDValue ExtOps[] = { Ld, WidthConst };
15056 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15057 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15058 } else
15059 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15060 }
15061
15062
15063 // For i32 intermediate values, unfortunately, the conversion functions
15064 // leave the upper 32 bits of the value are undefined. Within the set of
15065 // scalar instructions, we have no method for zero- or sign-extending the
15066 // value. Thus, we cannot handle i32 intermediate values here.
15067 if (Op.getOperand(0).getValueType() == MVT::i32)
15068 return SDValue();
15069
15070 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15071 "UINT_TO_FP is supported only with FPCVT");
15072
15073 // If we have FCFIDS, then use it when converting to single-precision.
15074 // Otherwise, convert to double-precision and then round.
15075 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15076 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15078 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15079 : PPCISD::FCFID);
15080 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15081 ? MVT::f32
15082 : MVT::f64;
15083
15084 // If we're converting from a float, to an int, and back to a float again,
15085 // then we don't need the store/load pair at all.
15086 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15087 Subtarget.hasFPCVT()) ||
15088 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15089 SDValue Src = Op.getOperand(0).getOperand(0);
15090 if (Src.getValueType() == MVT::f32) {
15091 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15092 DCI.AddToWorklist(Src.getNode());
15093 } else if (Src.getValueType() != MVT::f64) {
15094 // Make sure that we don't pick up a ppc_fp128 source value.
15095 return SDValue();
15096 }
15097
15098 unsigned FCTOp =
15099 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15101
15102 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15103 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15104
15105 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15106 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15107 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15108 DCI.AddToWorklist(FP.getNode());
15109 }
15110
15111 return FP;
15112 }
15113
15114 return SDValue();
15115}
15116
15117// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15118// builtins) into loads with swaps.
15120 DAGCombinerInfo &DCI) const {
15121 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15122 // load combines.
15123 if (DCI.isBeforeLegalizeOps())
15124 return SDValue();
15125
15126 SelectionDAG &DAG = DCI.DAG;
15127 SDLoc dl(N);
15128 SDValue Chain;
15129 SDValue Base;
15130 MachineMemOperand *MMO;
15131
15132 switch (N->getOpcode()) {
15133 default:
15134 llvm_unreachable("Unexpected opcode for little endian VSX load");
15135 case ISD::LOAD: {
15136 LoadSDNode *LD = cast<LoadSDNode>(N);
15137 Chain = LD->getChain();
15138 Base = LD->getBasePtr();
15139 MMO = LD->getMemOperand();
15140 // If the MMO suggests this isn't a load of a full vector, leave
15141 // things alone. For a built-in, we have to make the change for
15142 // correctness, so if there is a size problem that will be a bug.
15143 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15144 return SDValue();
15145 break;
15146 }
15148 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15149 Chain = Intrin->getChain();
15150 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15151 // us what we want. Get operand 2 instead.
15152 Base = Intrin->getOperand(2);
15153 MMO = Intrin->getMemOperand();
15154 break;
15155 }
15156 }
15157
15158 MVT VecTy = N->getValueType(0).getSimpleVT();
15159
15160 SDValue LoadOps[] = { Chain, Base };
15162 DAG.getVTList(MVT::v2f64, MVT::Other),
15163 LoadOps, MVT::v2f64, MMO);
15164
15165 DCI.AddToWorklist(Load.getNode());
15166 Chain = Load.getValue(1);
15167 SDValue Swap = DAG.getNode(
15168 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15169 DCI.AddToWorklist(Swap.getNode());
15170
15171 // Add a bitcast if the resulting load type doesn't match v2f64.
15172 if (VecTy != MVT::v2f64) {
15173 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15174 DCI.AddToWorklist(N.getNode());
15175 // Package {bitcast value, swap's chain} to match Load's shape.
15176 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15177 N, Swap.getValue(1));
15178 }
15179
15180 return Swap;
15181}
15182
15183// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15184// builtins) into stores with swaps.
15186 DAGCombinerInfo &DCI) const {
15187 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15188 // store combines.
15189 if (DCI.isBeforeLegalizeOps())
15190 return SDValue();
15191
15192 SelectionDAG &DAG = DCI.DAG;
15193 SDLoc dl(N);
15194 SDValue Chain;
15195 SDValue Base;
15196 unsigned SrcOpnd;
15197 MachineMemOperand *MMO;
15198
15199 switch (N->getOpcode()) {
15200 default:
15201 llvm_unreachable("Unexpected opcode for little endian VSX store");
15202 case ISD::STORE: {
15203 StoreSDNode *ST = cast<StoreSDNode>(N);
15204 Chain = ST->getChain();
15205 Base = ST->getBasePtr();
15206 MMO = ST->getMemOperand();
15207 SrcOpnd = 1;
15208 // If the MMO suggests this isn't a store of a full vector, leave
15209 // things alone. For a built-in, we have to make the change for
15210 // correctness, so if there is a size problem that will be a bug.
15211 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15212 return SDValue();
15213 break;
15214 }
15215 case ISD::INTRINSIC_VOID: {
15216 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15217 Chain = Intrin->getChain();
15218 // Intrin->getBasePtr() oddly does not get what we want.
15219 Base = Intrin->getOperand(3);
15220 MMO = Intrin->getMemOperand();
15221 SrcOpnd = 2;
15222 break;
15223 }
15224 }
15225
15226 SDValue Src = N->getOperand(SrcOpnd);
15227 MVT VecTy = Src.getValueType().getSimpleVT();
15228
15229 // All stores are done as v2f64 and possible bit cast.
15230 if (VecTy != MVT::v2f64) {
15231 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15232 DCI.AddToWorklist(Src.getNode());
15233 }
15234
15235 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15236 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15237 DCI.AddToWorklist(Swap.getNode());
15238 Chain = Swap.getValue(1);
15239 SDValue StoreOps[] = { Chain, Swap, Base };
15241 DAG.getVTList(MVT::Other),
15242 StoreOps, VecTy, MMO);
15243 DCI.AddToWorklist(Store.getNode());
15244 return Store;
15245}
15246
15247// Handle DAG combine for STORE (FP_TO_INT F).
15248SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15249 DAGCombinerInfo &DCI) const {
15250 SelectionDAG &DAG = DCI.DAG;
15251 SDLoc dl(N);
15252 unsigned Opcode = N->getOperand(1).getOpcode();
15253 (void)Opcode;
15254 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15255
15256 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15257 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15258 && "Not a FP_TO_INT Instruction!");
15259
15260 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15261 EVT Op1VT = N->getOperand(1).getValueType();
15262 EVT ResVT = Val.getValueType();
15263
15264 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15265 return SDValue();
15266
15267 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15268 bool ValidTypeForStoreFltAsInt =
15269 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15270 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15271
15272 // TODO: Lower conversion from f128 on all VSX targets
15273 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15274 return SDValue();
15275
15276 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15277 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15278 return SDValue();
15279
15280 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15281
15282 // Set number of bytes being converted.
15283 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15284 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15285 DAG.getIntPtrConstant(ByteSize, dl, false),
15286 DAG.getValueType(Op1VT)};
15287
15289 DAG.getVTList(MVT::Other), Ops,
15290 cast<StoreSDNode>(N)->getMemoryVT(),
15291 cast<StoreSDNode>(N)->getMemOperand());
15292
15293 return Val;
15294}
15295
15296static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15297 // Check that the source of the element keeps flipping
15298 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15299 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15300 for (int i = 1, e = Mask.size(); i < e; i++) {
15301 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15302 return false;
15303 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15304 return false;
15305 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15306 }
15307 return true;
15308}
15309
15310static bool isSplatBV(SDValue Op) {
15311 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15312 return false;
15313 SDValue FirstOp;
15314
15315 // Find first non-undef input.
15316 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15317 FirstOp = Op.getOperand(i);
15318 if (!FirstOp.isUndef())
15319 break;
15320 }
15321
15322 // All inputs are undef or the same as the first non-undef input.
15323 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15324 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15325 return false;
15326 return true;
15327}
15328
15330 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15331 return Op;
15332 if (Op.getOpcode() != ISD::BITCAST)
15333 return SDValue();
15334 Op = Op.getOperand(0);
15335 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15336 return Op;
15337 return SDValue();
15338}
15339
15340// Fix up the shuffle mask to account for the fact that the result of
15341// scalar_to_vector is not in lane zero. This just takes all values in
15342// the ranges specified by the min/max indices and adds the number of
15343// elements required to ensure each element comes from the respective
15344// position in the valid lane.
15345// On little endian, that's just the corresponding element in the other
15346// half of the vector. On big endian, it is in the same half but right
15347// justified rather than left justified in that half.
15349 int LHSMaxIdx, int RHSMinIdx,
15350 int RHSMaxIdx, int HalfVec,
15351 unsigned ValidLaneWidth,
15352 const PPCSubtarget &Subtarget) {
15353 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15354 int Idx = ShuffV[i];
15355 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15356 ShuffV[i] +=
15357 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15358 }
15359}
15360
15361// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15362// the original is:
15363// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15364// In such a case, just change the shuffle mask to extract the element
15365// from the permuted index.
15367 const PPCSubtarget &Subtarget) {
15368 SDLoc dl(OrigSToV);
15369 EVT VT = OrigSToV.getValueType();
15370 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15371 "Expecting a SCALAR_TO_VECTOR here");
15372 SDValue Input = OrigSToV.getOperand(0);
15373
15374 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15375 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15376 SDValue OrigVector = Input.getOperand(0);
15377
15378 // Can't handle non-const element indices or different vector types
15379 // for the input to the extract and the output of the scalar_to_vector.
15380 if (Idx && VT == OrigVector.getValueType()) {
15381 unsigned NumElts = VT.getVectorNumElements();
15382 assert(
15383 NumElts > 1 &&
15384 "Cannot produce a permuted scalar_to_vector for one element vector");
15385 SmallVector<int, 16> NewMask(NumElts, -1);
15386 unsigned ResultInElt = NumElts / 2;
15387 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15388 NewMask[ResultInElt] = Idx->getZExtValue();
15389 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15390 }
15391 }
15392 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15393 OrigSToV.getOperand(0));
15394}
15395
15396// On little endian subtargets, combine shuffles such as:
15397// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15398// into:
15399// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15400// because the latter can be matched to a single instruction merge.
15401// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15402// to put the value into element zero. Adjust the shuffle mask so that the
15403// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15404// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15405// nodes with elements smaller than doubleword because all the ways
15406// of getting scalar data into a vector register put the value in the
15407// rightmost element of the left half of the vector.
15408SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15409 SelectionDAG &DAG) const {
15410 SDValue LHS = SVN->getOperand(0);
15411 SDValue RHS = SVN->getOperand(1);
15412 auto Mask = SVN->getMask();
15413 int NumElts = LHS.getValueType().getVectorNumElements();
15414 SDValue Res(SVN, 0);
15415 SDLoc dl(SVN);
15416 bool IsLittleEndian = Subtarget.isLittleEndian();
15417
15418 // On big endian targets this is only useful for subtargets with direct moves.
15419 // On little endian targets it would be useful for all subtargets with VSX.
15420 // However adding special handling for LE subtargets without direct moves
15421 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15422 // which includes direct moves.
15423 if (!Subtarget.hasDirectMove())
15424 return Res;
15425
15426 // If this is not a shuffle of a shuffle and the first element comes from
15427 // the second vector, canonicalize to the commuted form. This will make it
15428 // more likely to match one of the single instruction patterns.
15429 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15430 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15431 std::swap(LHS, RHS);
15432 Res = DAG.getCommutedVectorShuffle(*SVN);
15433 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15434 }
15435
15436 // Adjust the shuffle mask if either input vector comes from a
15437 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15438 // form (to prevent the need for a swap).
15439 SmallVector<int, 16> ShuffV(Mask);
15440 SDValue SToVLHS = isScalarToVec(LHS);
15441 SDValue SToVRHS = isScalarToVec(RHS);
15442 if (SToVLHS || SToVRHS) {
15443 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15444 // same type and have differing element sizes, then do not perform
15445 // the following transformation. The current transformation for
15446 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15447 // element size. This will be updated in the future to account for
15448 // differing sizes of the LHS and RHS.
15449 if (SToVLHS && SToVRHS &&
15450 (SToVLHS.getValueType().getScalarSizeInBits() !=
15451 SToVRHS.getValueType().getScalarSizeInBits()))
15452 return Res;
15453
15454 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15455 : SToVRHS.getValueType().getVectorNumElements();
15456 int NumEltsOut = ShuffV.size();
15457 // The width of the "valid lane" (i.e. the lane that contains the value that
15458 // is vectorized) needs to be expressed in terms of the number of elements
15459 // of the shuffle. It is thereby the ratio of the values before and after
15460 // any bitcast.
15461 unsigned ValidLaneWidth =
15462 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15463 LHS.getValueType().getScalarSizeInBits()
15464 : SToVRHS.getValueType().getScalarSizeInBits() /
15465 RHS.getValueType().getScalarSizeInBits();
15466
15467 // Initially assume that neither input is permuted. These will be adjusted
15468 // accordingly if either input is.
15469 int LHSMaxIdx = -1;
15470 int RHSMinIdx = -1;
15471 int RHSMaxIdx = -1;
15472 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15473
15474 // Get the permuted scalar to vector nodes for the source(s) that come from
15475 // ISD::SCALAR_TO_VECTOR.
15476 // On big endian systems, this only makes sense for element sizes smaller
15477 // than 64 bits since for 64-bit elements, all instructions already put
15478 // the value into element zero. Since scalar size of LHS and RHS may differ
15479 // after isScalarToVec, this should be checked using their own sizes.
15480 if (SToVLHS) {
15481 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15482 return Res;
15483 // Set up the values for the shuffle vector fixup.
15484 LHSMaxIdx = NumEltsOut / NumEltsIn;
15485 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15486 if (SToVLHS.getValueType() != LHS.getValueType())
15487 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15488 LHS = SToVLHS;
15489 }
15490 if (SToVRHS) {
15491 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15492 return Res;
15493 RHSMinIdx = NumEltsOut;
15494 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15495 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15496 if (SToVRHS.getValueType() != RHS.getValueType())
15497 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15498 RHS = SToVRHS;
15499 }
15500
15501 // Fix up the shuffle mask to reflect where the desired element actually is.
15502 // The minimum and maximum indices that correspond to element zero for both
15503 // the LHS and RHS are computed and will control which shuffle mask entries
15504 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15505 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15506 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15507 HalfVec, ValidLaneWidth, Subtarget);
15508 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15509
15510 // We may have simplified away the shuffle. We won't be able to do anything
15511 // further with it here.
15512 if (!isa<ShuffleVectorSDNode>(Res))
15513 return Res;
15514 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15515 }
15516
15517 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15518 // The common case after we commuted the shuffle is that the RHS is a splat
15519 // and we have elements coming in from the splat at indices that are not
15520 // conducive to using a merge.
15521 // Example:
15522 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15523 if (!isSplatBV(TheSplat))
15524 return Res;
15525
15526 // We are looking for a mask such that all even elements are from
15527 // one vector and all odd elements from the other.
15528 if (!isAlternatingShuffMask(Mask, NumElts))
15529 return Res;
15530
15531 // Adjust the mask so we are pulling in the same index from the splat
15532 // as the index from the interesting vector in consecutive elements.
15533 if (IsLittleEndian) {
15534 // Example (even elements from first vector):
15535 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15536 if (Mask[0] < NumElts)
15537 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15538 if (ShuffV[i] < 0)
15539 continue;
15540 // If element from non-splat is undef, pick first element from splat.
15541 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15542 }
15543 // Example (odd elements from first vector):
15544 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15545 else
15546 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15547 if (ShuffV[i] < 0)
15548 continue;
15549 // If element from non-splat is undef, pick first element from splat.
15550 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15551 }
15552 } else {
15553 // Example (even elements from first vector):
15554 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15555 if (Mask[0] < NumElts)
15556 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15557 if (ShuffV[i] < 0)
15558 continue;
15559 // If element from non-splat is undef, pick first element from splat.
15560 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15561 }
15562 // Example (odd elements from first vector):
15563 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15564 else
15565 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15566 if (ShuffV[i] < 0)
15567 continue;
15568 // If element from non-splat is undef, pick first element from splat.
15569 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15570 }
15571 }
15572
15573 // If the RHS has undefs, we need to remove them since we may have created
15574 // a shuffle that adds those instead of the splat value.
15575 SDValue SplatVal =
15576 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15577 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15578
15579 if (IsLittleEndian)
15580 RHS = TheSplat;
15581 else
15582 LHS = TheSplat;
15583 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15584}
15585
15586SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15587 LSBaseSDNode *LSBase,
15588 DAGCombinerInfo &DCI) const {
15589 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15590 "Not a reverse memop pattern!");
15591
15592 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15593 auto Mask = SVN->getMask();
15594 int i = 0;
15595 auto I = Mask.rbegin();
15596 auto E = Mask.rend();
15597
15598 for (; I != E; ++I) {
15599 if (*I != i)
15600 return false;
15601 i++;
15602 }
15603 return true;
15604 };
15605
15606 SelectionDAG &DAG = DCI.DAG;
15607 EVT VT = SVN->getValueType(0);
15608
15609 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15610 return SDValue();
15611
15612 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15613 // See comment in PPCVSXSwapRemoval.cpp.
15614 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15615 if (!Subtarget.hasP9Vector())
15616 return SDValue();
15617
15618 if(!IsElementReverse(SVN))
15619 return SDValue();
15620
15621 if (LSBase->getOpcode() == ISD::LOAD) {
15622 // If the load return value 0 has more than one user except the
15623 // shufflevector instruction, it is not profitable to replace the
15624 // shufflevector with a reverse load.
15625 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15626 UI != UE; ++UI)
15627 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15628 return SDValue();
15629
15630 SDLoc dl(LSBase);
15631 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15632 return DAG.getMemIntrinsicNode(
15633 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15634 LSBase->getMemoryVT(), LSBase->getMemOperand());
15635 }
15636
15637 if (LSBase->getOpcode() == ISD::STORE) {
15638 // If there are other uses of the shuffle, the swap cannot be avoided.
15639 // Forcing the use of an X-Form (since swapped stores only have
15640 // X-Forms) without removing the swap is unprofitable.
15641 if (!SVN->hasOneUse())
15642 return SDValue();
15643
15644 SDLoc dl(LSBase);
15645 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15646 LSBase->getBasePtr()};
15647 return DAG.getMemIntrinsicNode(
15648 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15649 LSBase->getMemoryVT(), LSBase->getMemOperand());
15650 }
15651
15652 llvm_unreachable("Expected a load or store node here");
15653}
15654
15655static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15656 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15657 if (IntrinsicID == Intrinsic::ppc_stdcx)
15658 StoreWidth = 8;
15659 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15660 StoreWidth = 4;
15661 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15662 StoreWidth = 2;
15663 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15664 StoreWidth = 1;
15665 else
15666 return false;
15667 return true;
15668}
15669
15671 DAGCombinerInfo &DCI) const {
15672 SelectionDAG &DAG = DCI.DAG;
15673 SDLoc dl(N);
15674 switch (N->getOpcode()) {
15675 default: break;
15676 case ISD::ADD:
15677 return combineADD(N, DCI);
15678 case ISD::AND: {
15679 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15680 // original input as that will prevent us from selecting optimal rotates.
15681 // This only matters if the input to the extend is i32 widened to i64.
15682 SDValue Op1 = N->getOperand(0);
15683 SDValue Op2 = N->getOperand(1);
15684 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15685 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15686 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15687 Op1.getOperand(0).getValueType() != MVT::i32)
15688 break;
15689 SDValue NarrowOp = Op1.getOperand(0);
15690 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15691 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15692 break;
15693
15694 uint64_t Imm = Op2->getAsZExtVal();
15695 // Make sure that the constant is narrow enough to fit in the narrow type.
15696 if (!isUInt<32>(Imm))
15697 break;
15698 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15699 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15700 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15701 }
15702 case ISD::SHL:
15703 return combineSHL(N, DCI);
15704 case ISD::SRA:
15705 return combineSRA(N, DCI);
15706 case ISD::SRL:
15707 return combineSRL(N, DCI);
15708 case ISD::MUL:
15709 return combineMUL(N, DCI);
15710 case ISD::FMA:
15711 case PPCISD::FNMSUB:
15712 return combineFMALike(N, DCI);
15713 case PPCISD::SHL:
15714 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15715 return N->getOperand(0);
15716 break;
15717 case PPCISD::SRL:
15718 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15719 return N->getOperand(0);
15720 break;
15721 case PPCISD::SRA:
15722 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15723 if (C->isZero() || // 0 >>s V -> 0.
15724 C->isAllOnes()) // -1 >>s V -> -1.
15725 return N->getOperand(0);
15726 }
15727 break;
15728 case ISD::SIGN_EXTEND:
15729 case ISD::ZERO_EXTEND:
15730 case ISD::ANY_EXTEND:
15731 return DAGCombineExtBoolTrunc(N, DCI);
15732 case ISD::TRUNCATE:
15733 return combineTRUNCATE(N, DCI);
15734 case ISD::SETCC:
15735 if (SDValue CSCC = combineSetCC(N, DCI))
15736 return CSCC;
15737 [[fallthrough]];
15738 case ISD::SELECT_CC:
15739 return DAGCombineTruncBoolExt(N, DCI);
15740 case ISD::SINT_TO_FP:
15741 case ISD::UINT_TO_FP:
15742 return combineFPToIntToFP(N, DCI);
15744 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15745 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15746 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15747 }
15748 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15749 case ISD::STORE: {
15750
15751 EVT Op1VT = N->getOperand(1).getValueType();
15752 unsigned Opcode = N->getOperand(1).getOpcode();
15753
15754 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15755 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15756 SDValue Val = combineStoreFPToInt(N, DCI);
15757 if (Val)
15758 return Val;
15759 }
15760
15761 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15762 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15763 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15764 if (Val)
15765 return Val;
15766 }
15767
15768 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15769 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15770 N->getOperand(1).getNode()->hasOneUse() &&
15771 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15772 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15773
15774 // STBRX can only handle simple types and it makes no sense to store less
15775 // two bytes in byte-reversed order.
15776 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15777 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15778 break;
15779
15780 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15781 // Do an any-extend to 32-bits if this is a half-word input.
15782 if (BSwapOp.getValueType() == MVT::i16)
15783 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15784
15785 // If the type of BSWAP operand is wider than stored memory width
15786 // it need to be shifted to the right side before STBRX.
15787 if (Op1VT.bitsGT(mVT)) {
15788 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15789 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15790 DAG.getConstant(Shift, dl, MVT::i32));
15791 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15792 if (Op1VT == MVT::i64)
15793 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15794 }
15795
15796 SDValue Ops[] = {
15797 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15798 };
15799 return
15800 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15801 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15802 cast<StoreSDNode>(N)->getMemOperand());
15803 }
15804
15805 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15806 // So it can increase the chance of CSE constant construction.
15807 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15808 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15809 // Need to sign-extended to 64-bits to handle negative values.
15810 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15811 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15812 MemVT.getSizeInBits());
15813 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15814
15815 // DAG.getTruncStore() can't be used here because it doesn't accept
15816 // the general (base + offset) addressing mode.
15817 // So we use UpdateNodeOperands and setTruncatingStore instead.
15818 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15819 N->getOperand(3));
15820 cast<StoreSDNode>(N)->setTruncatingStore(true);
15821 return SDValue(N, 0);
15822 }
15823
15824 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15825 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15826 if (Op1VT.isSimple()) {
15827 MVT StoreVT = Op1VT.getSimpleVT();
15828 if (Subtarget.needsSwapsForVSXMemOps() &&
15829 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15830 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15831 return expandVSXStoreForLE(N, DCI);
15832 }
15833 break;
15834 }
15835 case ISD::LOAD: {
15836 LoadSDNode *LD = cast<LoadSDNode>(N);
15837 EVT VT = LD->getValueType(0);
15838
15839 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15840 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15841 if (VT.isSimple()) {
15842 MVT LoadVT = VT.getSimpleVT();
15843 if (Subtarget.needsSwapsForVSXMemOps() &&
15844 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15845 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15846 return expandVSXLoadForLE(N, DCI);
15847 }
15848
15849 // We sometimes end up with a 64-bit integer load, from which we extract
15850 // two single-precision floating-point numbers. This happens with
15851 // std::complex<float>, and other similar structures, because of the way we
15852 // canonicalize structure copies. However, if we lack direct moves,
15853 // then the final bitcasts from the extracted integer values to the
15854 // floating-point numbers turn into store/load pairs. Even with direct moves,
15855 // just loading the two floating-point numbers is likely better.
15856 auto ReplaceTwoFloatLoad = [&]() {
15857 if (VT != MVT::i64)
15858 return false;
15859
15860 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15861 LD->isVolatile())
15862 return false;
15863
15864 // We're looking for a sequence like this:
15865 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15866 // t16: i64 = srl t13, Constant:i32<32>
15867 // t17: i32 = truncate t16
15868 // t18: f32 = bitcast t17
15869 // t19: i32 = truncate t13
15870 // t20: f32 = bitcast t19
15871
15872 if (!LD->hasNUsesOfValue(2, 0))
15873 return false;
15874
15875 auto UI = LD->use_begin();
15876 while (UI.getUse().getResNo() != 0) ++UI;
15877 SDNode *Trunc = *UI++;
15878 while (UI.getUse().getResNo() != 0) ++UI;
15879 SDNode *RightShift = *UI;
15880 if (Trunc->getOpcode() != ISD::TRUNCATE)
15881 std::swap(Trunc, RightShift);
15882
15883 if (Trunc->getOpcode() != ISD::TRUNCATE ||
15884 Trunc->getValueType(0) != MVT::i32 ||
15885 !Trunc->hasOneUse())
15886 return false;
15887 if (RightShift->getOpcode() != ISD::SRL ||
15888 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15889 RightShift->getConstantOperandVal(1) != 32 ||
15890 !RightShift->hasOneUse())
15891 return false;
15892
15893 SDNode *Trunc2 = *RightShift->use_begin();
15894 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15895 Trunc2->getValueType(0) != MVT::i32 ||
15896 !Trunc2->hasOneUse())
15897 return false;
15898
15899 SDNode *Bitcast = *Trunc->use_begin();
15900 SDNode *Bitcast2 = *Trunc2->use_begin();
15901
15902 if (Bitcast->getOpcode() != ISD::BITCAST ||
15903 Bitcast->getValueType(0) != MVT::f32)
15904 return false;
15905 if (Bitcast2->getOpcode() != ISD::BITCAST ||
15906 Bitcast2->getValueType(0) != MVT::f32)
15907 return false;
15908
15909 if (Subtarget.isLittleEndian())
15910 std::swap(Bitcast, Bitcast2);
15911
15912 // Bitcast has the second float (in memory-layout order) and Bitcast2
15913 // has the first one.
15914
15915 SDValue BasePtr = LD->getBasePtr();
15916 if (LD->isIndexed()) {
15917 assert(LD->getAddressingMode() == ISD::PRE_INC &&
15918 "Non-pre-inc AM on PPC?");
15919 BasePtr =
15920 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15921 LD->getOffset());
15922 }
15923
15924 auto MMOFlags =
15925 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15926 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15927 LD->getPointerInfo(), LD->getAlign(),
15928 MMOFlags, LD->getAAInfo());
15929 SDValue AddPtr =
15930 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15931 BasePtr, DAG.getIntPtrConstant(4, dl));
15932 SDValue FloatLoad2 = DAG.getLoad(
15933 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15934 LD->getPointerInfo().getWithOffset(4),
15935 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
15936
15937 if (LD->isIndexed()) {
15938 // Note that DAGCombine should re-form any pre-increment load(s) from
15939 // what is produced here if that makes sense.
15940 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15941 }
15942
15943 DCI.CombineTo(Bitcast2, FloatLoad);
15944 DCI.CombineTo(Bitcast, FloatLoad2);
15945
15946 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15947 SDValue(FloatLoad2.getNode(), 1));
15948 return true;
15949 };
15950
15951 if (ReplaceTwoFloatLoad())
15952 return SDValue(N, 0);
15953
15954 EVT MemVT = LD->getMemoryVT();
15955 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15956 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15957 if (LD->isUnindexed() && VT.isVector() &&
15958 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15959 // P8 and later hardware should just use LOAD.
15960 !Subtarget.hasP8Vector() &&
15961 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15962 VT == MVT::v4f32))) &&
15963 LD->getAlign() < ABIAlignment) {
15964 // This is a type-legal unaligned Altivec load.
15965 SDValue Chain = LD->getChain();
15966 SDValue Ptr = LD->getBasePtr();
15967 bool isLittleEndian = Subtarget.isLittleEndian();
15968
15969 // This implements the loading of unaligned vectors as described in
15970 // the venerable Apple Velocity Engine overview. Specifically:
15971 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15972 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15973 //
15974 // The general idea is to expand a sequence of one or more unaligned
15975 // loads into an alignment-based permutation-control instruction (lvsl
15976 // or lvsr), a series of regular vector loads (which always truncate
15977 // their input address to an aligned address), and a series of
15978 // permutations. The results of these permutations are the requested
15979 // loaded values. The trick is that the last "extra" load is not taken
15980 // from the address you might suspect (sizeof(vector) bytes after the
15981 // last requested load), but rather sizeof(vector) - 1 bytes after the
15982 // last requested vector. The point of this is to avoid a page fault if
15983 // the base address happened to be aligned. This works because if the
15984 // base address is aligned, then adding less than a full vector length
15985 // will cause the last vector in the sequence to be (re)loaded.
15986 // Otherwise, the next vector will be fetched as you might suspect was
15987 // necessary.
15988
15989 // We might be able to reuse the permutation generation from
15990 // a different base address offset from this one by an aligned amount.
15991 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15992 // optimization later.
15993 Intrinsic::ID Intr, IntrLD, IntrPerm;
15994 MVT PermCntlTy, PermTy, LDTy;
15995 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15996 : Intrinsic::ppc_altivec_lvsl;
15997 IntrLD = Intrinsic::ppc_altivec_lvx;
15998 IntrPerm = Intrinsic::ppc_altivec_vperm;
15999 PermCntlTy = MVT::v16i8;
16000 PermTy = MVT::v4i32;
16001 LDTy = MVT::v4i32;
16002
16003 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16004
16005 // Create the new MMO for the new base load. It is like the original MMO,
16006 // but represents an area in memory almost twice the vector size centered
16007 // on the original address. If the address is unaligned, we might start
16008 // reading up to (sizeof(vector)-1) bytes below the address of the
16009 // original unaligned load.
16011 MachineMemOperand *BaseMMO =
16012 MF.getMachineMemOperand(LD->getMemOperand(),
16013 -(int64_t)MemVT.getStoreSize()+1,
16014 2*MemVT.getStoreSize()-1);
16015
16016 // Create the new base load.
16017 SDValue LDXIntID =
16018 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16019 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16020 SDValue BaseLoad =
16022 DAG.getVTList(PermTy, MVT::Other),
16023 BaseLoadOps, LDTy, BaseMMO);
16024
16025 // Note that the value of IncOffset (which is provided to the next
16026 // load's pointer info offset value, and thus used to calculate the
16027 // alignment), and the value of IncValue (which is actually used to
16028 // increment the pointer value) are different! This is because we
16029 // require the next load to appear to be aligned, even though it
16030 // is actually offset from the base pointer by a lesser amount.
16031 int IncOffset = VT.getSizeInBits() / 8;
16032 int IncValue = IncOffset;
16033
16034 // Walk (both up and down) the chain looking for another load at the real
16035 // (aligned) offset (the alignment of the other load does not matter in
16036 // this case). If found, then do not use the offset reduction trick, as
16037 // that will prevent the loads from being later combined (as they would
16038 // otherwise be duplicates).
16039 if (!findConsecutiveLoad(LD, DAG))
16040 --IncValue;
16041
16042 SDValue Increment =
16043 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16044 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16045
16046 MachineMemOperand *ExtraMMO =
16047 MF.getMachineMemOperand(LD->getMemOperand(),
16048 1, 2*MemVT.getStoreSize()-1);
16049 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16050 SDValue ExtraLoad =
16052 DAG.getVTList(PermTy, MVT::Other),
16053 ExtraLoadOps, LDTy, ExtraMMO);
16054
16055 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16056 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16057
16058 // Because vperm has a big-endian bias, we must reverse the order
16059 // of the input vectors and complement the permute control vector
16060 // when generating little endian code. We have already handled the
16061 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16062 // and ExtraLoad here.
16063 SDValue Perm;
16064 if (isLittleEndian)
16065 Perm = BuildIntrinsicOp(IntrPerm,
16066 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16067 else
16068 Perm = BuildIntrinsicOp(IntrPerm,
16069 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16070
16071 if (VT != PermTy)
16072 Perm = Subtarget.hasAltivec()
16073 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16074 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16075 DAG.getTargetConstant(1, dl, MVT::i64));
16076 // second argument is 1 because this rounding
16077 // is always exact.
16078
16079 // The output of the permutation is our loaded result, the TokenFactor is
16080 // our new chain.
16081 DCI.CombineTo(N, Perm, TF);
16082 return SDValue(N, 0);
16083 }
16084 }
16085 break;
16087 bool isLittleEndian = Subtarget.isLittleEndian();
16088 unsigned IID = N->getConstantOperandVal(0);
16089 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16090 : Intrinsic::ppc_altivec_lvsl);
16091 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16092 SDValue Add = N->getOperand(1);
16093
16094 int Bits = 4 /* 16 byte alignment */;
16095
16096 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16097 APInt::getAllOnes(Bits /* alignment */)
16098 .zext(Add.getScalarValueSizeInBits()))) {
16099 SDNode *BasePtr = Add->getOperand(0).getNode();
16100 for (SDNode *U : BasePtr->uses()) {
16101 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16102 U->getConstantOperandVal(0) == IID) {
16103 // We've found another LVSL/LVSR, and this address is an aligned
16104 // multiple of that one. The results will be the same, so use the
16105 // one we've just found instead.
16106
16107 return SDValue(U, 0);
16108 }
16109 }
16110 }
16111
16112 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16113 SDNode *BasePtr = Add->getOperand(0).getNode();
16114 for (SDNode *U : BasePtr->uses()) {
16115 if (U->getOpcode() == ISD::ADD &&
16116 isa<ConstantSDNode>(U->getOperand(1)) &&
16117 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16118 (1ULL << Bits) ==
16119 0) {
16120 SDNode *OtherAdd = U;
16121 for (SDNode *V : OtherAdd->uses()) {
16122 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16123 V->getConstantOperandVal(0) == IID) {
16124 return SDValue(V, 0);
16125 }
16126 }
16127 }
16128 }
16129 }
16130 }
16131
16132 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16133 // Expose the vabsduw/h/b opportunity for down stream
16134 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16135 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16136 IID == Intrinsic::ppc_altivec_vmaxsh ||
16137 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16138 SDValue V1 = N->getOperand(1);
16139 SDValue V2 = N->getOperand(2);
16140 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16141 V1.getSimpleValueType() == MVT::v8i16 ||
16142 V1.getSimpleValueType() == MVT::v16i8) &&
16143 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16144 // (0-a, a)
16145 if (V1.getOpcode() == ISD::SUB &&
16147 V1.getOperand(1) == V2) {
16148 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16149 }
16150 // (a, 0-a)
16151 if (V2.getOpcode() == ISD::SUB &&
16152 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16153 V2.getOperand(1) == V1) {
16154 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16155 }
16156 // (x-y, y-x)
16157 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16158 V1.getOperand(0) == V2.getOperand(1) &&
16159 V1.getOperand(1) == V2.getOperand(0)) {
16160 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16161 }
16162 }
16163 }
16164 }
16165
16166 break;
16168 switch (N->getConstantOperandVal(1)) {
16169 default:
16170 break;
16171 case Intrinsic::ppc_altivec_vsum4sbs:
16172 case Intrinsic::ppc_altivec_vsum4shs:
16173 case Intrinsic::ppc_altivec_vsum4ubs: {
16174 // These sum-across intrinsics only have a chain due to the side effect
16175 // that they may set the SAT bit. If we know the SAT bit will not be set
16176 // for some inputs, we can replace any uses of their chain with the
16177 // input chain.
16178 if (BuildVectorSDNode *BVN =
16179 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16180 APInt APSplatBits, APSplatUndef;
16181 unsigned SplatBitSize;
16182 bool HasAnyUndefs;
16183 bool BVNIsConstantSplat = BVN->isConstantSplat(
16184 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16185 !Subtarget.isLittleEndian());
16186 // If the constant splat vector is 0, the SAT bit will not be set.
16187 if (BVNIsConstantSplat && APSplatBits == 0)
16188 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16189 }
16190 return SDValue();
16191 }
16192 case Intrinsic::ppc_vsx_lxvw4x:
16193 case Intrinsic::ppc_vsx_lxvd2x:
16194 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16195 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16196 if (Subtarget.needsSwapsForVSXMemOps())
16197 return expandVSXLoadForLE(N, DCI);
16198 break;
16199 }
16200 break;
16202 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16203 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16204 if (Subtarget.needsSwapsForVSXMemOps()) {
16205 switch (N->getConstantOperandVal(1)) {
16206 default:
16207 break;
16208 case Intrinsic::ppc_vsx_stxvw4x:
16209 case Intrinsic::ppc_vsx_stxvd2x:
16210 return expandVSXStoreForLE(N, DCI);
16211 }
16212 }
16213 break;
16214 case ISD::BSWAP: {
16215 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16216 // For subtargets without LDBRX, we can still do better than the default
16217 // expansion even for 64-bit BSWAP (LOAD).
16218 bool Is64BitBswapOn64BitTgt =
16219 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16220 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16221 N->getOperand(0).hasOneUse();
16222 if (IsSingleUseNormalLd &&
16223 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16224 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16225 SDValue Load = N->getOperand(0);
16226 LoadSDNode *LD = cast<LoadSDNode>(Load);
16227 // Create the byte-swapping load.
16228 SDValue Ops[] = {
16229 LD->getChain(), // Chain
16230 LD->getBasePtr(), // Ptr
16231 DAG.getValueType(N->getValueType(0)) // VT
16232 };
16233 SDValue BSLoad =
16235 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16236 MVT::i64 : MVT::i32, MVT::Other),
16237 Ops, LD->getMemoryVT(), LD->getMemOperand());
16238
16239 // If this is an i16 load, insert the truncate.
16240 SDValue ResVal = BSLoad;
16241 if (N->getValueType(0) == MVT::i16)
16242 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16243
16244 // First, combine the bswap away. This makes the value produced by the
16245 // load dead.
16246 DCI.CombineTo(N, ResVal);
16247
16248 // Next, combine the load away, we give it a bogus result value but a real
16249 // chain result. The result value is dead because the bswap is dead.
16250 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16251
16252 // Return N so it doesn't get rechecked!
16253 return SDValue(N, 0);
16254 }
16255 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16256 // before legalization so that the BUILD_PAIR is handled correctly.
16257 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16258 !IsSingleUseNormalLd)
16259 return SDValue();
16260 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16261
16262 // Can't split volatile or atomic loads.
16263 if (!LD->isSimple())
16264 return SDValue();
16265 SDValue BasePtr = LD->getBasePtr();
16266 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16267 LD->getPointerInfo(), LD->getAlign());
16268 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16269 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16270 DAG.getIntPtrConstant(4, dl));
16272 LD->getMemOperand(), 4, 4);
16273 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16274 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16275 SDValue Res;
16276 if (Subtarget.isLittleEndian())
16277 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16278 else
16279 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16280 SDValue TF =
16281 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16282 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16283 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16284 return Res;
16285 }
16286 case PPCISD::VCMP:
16287 // If a VCMP_rec node already exists with exactly the same operands as this
16288 // node, use its result instead of this node (VCMP_rec computes both a CR6
16289 // and a normal output).
16290 //
16291 if (!N->getOperand(0).hasOneUse() &&
16292 !N->getOperand(1).hasOneUse() &&
16293 !N->getOperand(2).hasOneUse()) {
16294
16295 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16296 SDNode *VCMPrecNode = nullptr;
16297
16298 SDNode *LHSN = N->getOperand(0).getNode();
16299 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16300 UI != E; ++UI)
16301 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16302 UI->getOperand(1) == N->getOperand(1) &&
16303 UI->getOperand(2) == N->getOperand(2) &&
16304 UI->getOperand(0) == N->getOperand(0)) {
16305 VCMPrecNode = *UI;
16306 break;
16307 }
16308
16309 // If there is no VCMP_rec node, or if the flag value has a single use,
16310 // don't transform this.
16311 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16312 break;
16313
16314 // Look at the (necessarily single) use of the flag value. If it has a
16315 // chain, this transformation is more complex. Note that multiple things
16316 // could use the value result, which we should ignore.
16317 SDNode *FlagUser = nullptr;
16318 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16319 FlagUser == nullptr; ++UI) {
16320 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16321 SDNode *User = *UI;
16322 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16323 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16324 FlagUser = User;
16325 break;
16326 }
16327 }
16328 }
16329
16330 // If the user is a MFOCRF instruction, we know this is safe.
16331 // Otherwise we give up for right now.
16332 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16333 return SDValue(VCMPrecNode, 0);
16334 }
16335 break;
16336 case ISD::BR_CC: {
16337 // If this is a branch on an altivec predicate comparison, lower this so
16338 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16339 // lowering is done pre-legalize, because the legalizer lowers the predicate
16340 // compare down to code that is difficult to reassemble.
16341 // This code also handles branches that depend on the result of a store
16342 // conditional.
16343 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16344 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16345
16346 int CompareOpc;
16347 bool isDot;
16348
16349 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16350 break;
16351
16352 // Since we are doing this pre-legalize, the RHS can be a constant of
16353 // arbitrary bitwidth which may cause issues when trying to get the value
16354 // from the underlying APInt.
16355 auto RHSAPInt = RHS->getAsAPIntVal();
16356 if (!RHSAPInt.isIntN(64))
16357 break;
16358
16359 unsigned Val = RHSAPInt.getZExtValue();
16360 auto isImpossibleCompare = [&]() {
16361 // If this is a comparison against something other than 0/1, then we know
16362 // that the condition is never/always true.
16363 if (Val != 0 && Val != 1) {
16364 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16365 return N->getOperand(0);
16366 // Always !=, turn it into an unconditional branch.
16367 return DAG.getNode(ISD::BR, dl, MVT::Other,
16368 N->getOperand(0), N->getOperand(4));
16369 }
16370 return SDValue();
16371 };
16372 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16373 unsigned StoreWidth = 0;
16374 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16375 isStoreConditional(LHS, StoreWidth)) {
16376 if (SDValue Impossible = isImpossibleCompare())
16377 return Impossible;
16378 PPC::Predicate CompOpc;
16379 // eq 0 => ne
16380 // ne 0 => eq
16381 // eq 1 => eq
16382 // ne 1 => ne
16383 if (Val == 0)
16384 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16385 else
16386 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16387
16388 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16389 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16390 auto *MemNode = cast<MemSDNode>(LHS);
16391 SDValue ConstSt = DAG.getMemIntrinsicNode(
16393 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16394 MemNode->getMemoryVT(), MemNode->getMemOperand());
16395
16396 SDValue InChain;
16397 // Unchain the branch from the original store conditional.
16398 if (N->getOperand(0) == LHS.getValue(1))
16399 InChain = LHS.getOperand(0);
16400 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16401 SmallVector<SDValue, 4> InChains;
16402 SDValue InTF = N->getOperand(0);
16403 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16404 if (InTF.getOperand(i) != LHS.getValue(1))
16405 InChains.push_back(InTF.getOperand(i));
16406 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16407 }
16408
16409 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16410 DAG.getConstant(CompOpc, dl, MVT::i32),
16411 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16412 ConstSt.getValue(2));
16413 }
16414
16415 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16416 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16417 assert(isDot && "Can't compare against a vector result!");
16418
16419 if (SDValue Impossible = isImpossibleCompare())
16420 return Impossible;
16421
16422 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16423 // Create the PPCISD altivec 'dot' comparison node.
16424 SDValue Ops[] = {
16425 LHS.getOperand(2), // LHS of compare
16426 LHS.getOperand(3), // RHS of compare
16427 DAG.getConstant(CompareOpc, dl, MVT::i32)
16428 };
16429 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16430 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16431
16432 // Unpack the result based on how the target uses it.
16433 PPC::Predicate CompOpc;
16434 switch (LHS.getConstantOperandVal(1)) {
16435 default: // Can't happen, don't crash on invalid number though.
16436 case 0: // Branch on the value of the EQ bit of CR6.
16437 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16438 break;
16439 case 1: // Branch on the inverted value of the EQ bit of CR6.
16440 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16441 break;
16442 case 2: // Branch on the value of the LT bit of CR6.
16443 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16444 break;
16445 case 3: // Branch on the inverted value of the LT bit of CR6.
16446 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16447 break;
16448 }
16449
16450 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16451 DAG.getConstant(CompOpc, dl, MVT::i32),
16452 DAG.getRegister(PPC::CR6, MVT::i32),
16453 N->getOperand(4), CompNode.getValue(1));
16454 }
16455 break;
16456 }
16457 case ISD::BUILD_VECTOR:
16458 return DAGCombineBuildVector(N, DCI);
16459 }
16460
16461 return SDValue();
16462}
16463
16464SDValue
16466 SelectionDAG &DAG,
16467 SmallVectorImpl<SDNode *> &Created) const {
16468 // fold (sdiv X, pow2)
16469 EVT VT = N->getValueType(0);
16470 if (VT == MVT::i64 && !Subtarget.isPPC64())
16471 return SDValue();
16472 if ((VT != MVT::i32 && VT != MVT::i64) ||
16473 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16474 return SDValue();
16475
16476 SDLoc DL(N);
16477 SDValue N0 = N->getOperand(0);
16478
16479 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16480 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16481 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16482
16483 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16484 Created.push_back(Op.getNode());
16485
16486 if (IsNegPow2) {
16487 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16488 Created.push_back(Op.getNode());
16489 }
16490
16491 return Op;
16492}
16493
16494//===----------------------------------------------------------------------===//
16495// Inline Assembly Support
16496//===----------------------------------------------------------------------===//
16497
16499 KnownBits &Known,
16500 const APInt &DemandedElts,
16501 const SelectionDAG &DAG,
16502 unsigned Depth) const {
16503 Known.resetAll();
16504 switch (Op.getOpcode()) {
16505 default: break;
16506 case PPCISD::LBRX: {
16507 // lhbrx is known to have the top bits cleared out.
16508 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16509 Known.Zero = 0xFFFF0000;
16510 break;
16511 }
16513 switch (Op.getConstantOperandVal(0)) {
16514 default: break;
16515 case Intrinsic::ppc_altivec_vcmpbfp_p:
16516 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16517 case Intrinsic::ppc_altivec_vcmpequb_p:
16518 case Intrinsic::ppc_altivec_vcmpequh_p:
16519 case Intrinsic::ppc_altivec_vcmpequw_p:
16520 case Intrinsic::ppc_altivec_vcmpequd_p:
16521 case Intrinsic::ppc_altivec_vcmpequq_p:
16522 case Intrinsic::ppc_altivec_vcmpgefp_p:
16523 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16524 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16525 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16526 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16527 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16528 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16529 case Intrinsic::ppc_altivec_vcmpgtub_p:
16530 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16531 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16532 case Intrinsic::ppc_altivec_vcmpgtud_p:
16533 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16534 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16535 break;
16536 }
16537 break;
16538 }
16540 switch (Op.getConstantOperandVal(1)) {
16541 default:
16542 break;
16543 case Intrinsic::ppc_load2r:
16544 // Top bits are cleared for load2r (which is the same as lhbrx).
16545 Known.Zero = 0xFFFF0000;
16546 break;
16547 }
16548 break;
16549 }
16550 }
16551}
16552
16554 switch (Subtarget.getCPUDirective()) {
16555 default: break;
16556 case PPC::DIR_970:
16557 case PPC::DIR_PWR4:
16558 case PPC::DIR_PWR5:
16559 case PPC::DIR_PWR5X:
16560 case PPC::DIR_PWR6:
16561 case PPC::DIR_PWR6X:
16562 case PPC::DIR_PWR7:
16563 case PPC::DIR_PWR8:
16564 case PPC::DIR_PWR9:
16565 case PPC::DIR_PWR10:
16566 case PPC::DIR_PWR_FUTURE: {
16567 if (!ML)
16568 break;
16569
16571 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16572 // so that we can decrease cache misses and branch-prediction misses.
16573 // Actual alignment of the loop will depend on the hotness check and other
16574 // logic in alignBlocks.
16575 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16576 return Align(32);
16577 }
16578
16579 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16580
16581 // For small loops (between 5 and 8 instructions), align to a 32-byte
16582 // boundary so that the entire loop fits in one instruction-cache line.
16583 uint64_t LoopSize = 0;
16584 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16585 for (const MachineInstr &J : **I) {
16586 LoopSize += TII->getInstSizeInBytes(J);
16587 if (LoopSize > 32)
16588 break;
16589 }
16590
16591 if (LoopSize > 16 && LoopSize <= 32)
16592 return Align(32);
16593
16594 break;
16595 }
16596 }
16597
16599}
16600
16601/// getConstraintType - Given a constraint, return the type of
16602/// constraint it is for this target.
16605 if (Constraint.size() == 1) {
16606 switch (Constraint[0]) {
16607 default: break;
16608 case 'b':
16609 case 'r':
16610 case 'f':
16611 case 'd':
16612 case 'v':
16613 case 'y':
16614 return C_RegisterClass;
16615 case 'Z':
16616 // FIXME: While Z does indicate a memory constraint, it specifically
16617 // indicates an r+r address (used in conjunction with the 'y' modifier
16618 // in the replacement string). Currently, we're forcing the base
16619 // register to be r0 in the asm printer (which is interpreted as zero)
16620 // and forming the complete address in the second register. This is
16621 // suboptimal.
16622 return C_Memory;
16623 }
16624 } else if (Constraint == "wc") { // individual CR bits.
16625 return C_RegisterClass;
16626 } else if (Constraint == "wa" || Constraint == "wd" ||
16627 Constraint == "wf" || Constraint == "ws" ||
16628 Constraint == "wi" || Constraint == "ww") {
16629 return C_RegisterClass; // VSX registers.
16630 }
16631 return TargetLowering::getConstraintType(Constraint);
16632}
16633
16634/// Examine constraint type and operand type and determine a weight value.
16635/// This object must already have been set up with the operand type
16636/// and the current alternative constraint selected.
16639 AsmOperandInfo &info, const char *constraint) const {
16641 Value *CallOperandVal = info.CallOperandVal;
16642 // If we don't have a value, we can't do a match,
16643 // but allow it at the lowest weight.
16644 if (!CallOperandVal)
16645 return CW_Default;
16646 Type *type = CallOperandVal->getType();
16647
16648 // Look at the constraint type.
16649 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16650 return CW_Register; // an individual CR bit.
16651 else if ((StringRef(constraint) == "wa" ||
16652 StringRef(constraint) == "wd" ||
16653 StringRef(constraint) == "wf") &&
16654 type->isVectorTy())
16655 return CW_Register;
16656 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16657 return CW_Register; // just hold 64-bit integers data.
16658 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16659 return CW_Register;
16660 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16661 return CW_Register;
16662
16663 switch (*constraint) {
16664 default:
16666 break;
16667 case 'b':
16668 if (type->isIntegerTy())
16669 weight = CW_Register;
16670 break;
16671 case 'f':
16672 if (type->isFloatTy())
16673 weight = CW_Register;
16674 break;
16675 case 'd':
16676 if (type->isDoubleTy())
16677 weight = CW_Register;
16678 break;
16679 case 'v':
16680 if (type->isVectorTy())
16681 weight = CW_Register;
16682 break;
16683 case 'y':
16684 weight = CW_Register;
16685 break;
16686 case 'Z':
16687 weight = CW_Memory;
16688 break;
16689 }
16690 return weight;
16691}
16692
16693std::pair<unsigned, const TargetRegisterClass *>
16695 StringRef Constraint,
16696 MVT VT) const {
16697 if (Constraint.size() == 1) {
16698 // GCC RS6000 Constraint Letters
16699 switch (Constraint[0]) {
16700 case 'b': // R1-R31
16701 if (VT == MVT::i64 && Subtarget.isPPC64())
16702 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16703 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16704 case 'r': // R0-R31
16705 if (VT == MVT::i64 && Subtarget.isPPC64())
16706 return std::make_pair(0U, &PPC::G8RCRegClass);
16707 return std::make_pair(0U, &PPC::GPRCRegClass);
16708 // 'd' and 'f' constraints are both defined to be "the floating point
16709 // registers", where one is for 32-bit and the other for 64-bit. We don't
16710 // really care overly much here so just give them all the same reg classes.
16711 case 'd':
16712 case 'f':
16713 if (Subtarget.hasSPE()) {
16714 if (VT == MVT::f32 || VT == MVT::i32)
16715 return std::make_pair(0U, &PPC::GPRCRegClass);
16716 if (VT == MVT::f64 || VT == MVT::i64)
16717 return std::make_pair(0U, &PPC::SPERCRegClass);
16718 } else {
16719 if (VT == MVT::f32 || VT == MVT::i32)
16720 return std::make_pair(0U, &PPC::F4RCRegClass);
16721 if (VT == MVT::f64 || VT == MVT::i64)
16722 return std::make_pair(0U, &PPC::F8RCRegClass);
16723 }
16724 break;
16725 case 'v':
16726 if (Subtarget.hasAltivec() && VT.isVector())
16727 return std::make_pair(0U, &PPC::VRRCRegClass);
16728 else if (Subtarget.hasVSX())
16729 // Scalars in Altivec registers only make sense with VSX.
16730 return std::make_pair(0U, &PPC::VFRCRegClass);
16731 break;
16732 case 'y': // crrc
16733 return std::make_pair(0U, &PPC::CRRCRegClass);
16734 }
16735 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16736 // An individual CR bit.
16737 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16738 } else if ((Constraint == "wa" || Constraint == "wd" ||
16739 Constraint == "wf" || Constraint == "wi") &&
16740 Subtarget.hasVSX()) {
16741 // A VSX register for either a scalar (FP) or vector. There is no
16742 // support for single precision scalars on subtargets prior to Power8.
16743 if (VT.isVector())
16744 return std::make_pair(0U, &PPC::VSRCRegClass);
16745 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16746 return std::make_pair(0U, &PPC::VSSRCRegClass);
16747 return std::make_pair(0U, &PPC::VSFRCRegClass);
16748 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16749 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16750 return std::make_pair(0U, &PPC::VSSRCRegClass);
16751 else
16752 return std::make_pair(0U, &PPC::VSFRCRegClass);
16753 } else if (Constraint == "lr") {
16754 if (VT == MVT::i64)
16755 return std::make_pair(0U, &PPC::LR8RCRegClass);
16756 else
16757 return std::make_pair(0U, &PPC::LRRCRegClass);
16758 }
16759
16760 // Handle special cases of physical registers that are not properly handled
16761 // by the base class.
16762 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16763 // If we name a VSX register, we can't defer to the base class because it
16764 // will not recognize the correct register (their names will be VSL{0-31}
16765 // and V{0-31} so they won't match). So we match them here.
16766 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16767 int VSNum = atoi(Constraint.data() + 3);
16768 assert(VSNum >= 0 && VSNum <= 63 &&
16769 "Attempted to access a vsr out of range");
16770 if (VSNum < 32)
16771 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16772 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16773 }
16774
16775 // For float registers, we can't defer to the base class as it will match
16776 // the SPILLTOVSRRC class.
16777 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16778 int RegNum = atoi(Constraint.data() + 2);
16779 if (RegNum > 31 || RegNum < 0)
16780 report_fatal_error("Invalid floating point register number");
16781 if (VT == MVT::f32 || VT == MVT::i32)
16782 return Subtarget.hasSPE()
16783 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16784 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16785 if (VT == MVT::f64 || VT == MVT::i64)
16786 return Subtarget.hasSPE()
16787 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16788 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16789 }
16790 }
16791
16792 std::pair<unsigned, const TargetRegisterClass *> R =
16794
16795 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16796 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16797 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16798 // register.
16799 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16800 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16801 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16802 PPC::GPRCRegClass.contains(R.first))
16803 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16804 PPC::sub_32, &PPC::G8RCRegClass),
16805 &PPC::G8RCRegClass);
16806
16807 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16808 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16809 R.first = PPC::CR0;
16810 R.second = &PPC::CRRCRegClass;
16811 }
16812 // FIXME: This warning should ideally be emitted in the front end.
16813 const auto &TM = getTargetMachine();
16814 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16815 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16816 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16817 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16818 errs() << "warning: vector registers 20 to 32 are reserved in the "
16819 "default AIX AltiVec ABI and cannot be used\n";
16820 }
16821
16822 return R;
16823}
16824
16825/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16826/// vector. If it is invalid, don't add anything to Ops.
16828 StringRef Constraint,
16829 std::vector<SDValue> &Ops,
16830 SelectionDAG &DAG) const {
16831 SDValue Result;
16832
16833 // Only support length 1 constraints.
16834 if (Constraint.size() > 1)
16835 return;
16836
16837 char Letter = Constraint[0];
16838 switch (Letter) {
16839 default: break;
16840 case 'I':
16841 case 'J':
16842 case 'K':
16843 case 'L':
16844 case 'M':
16845 case 'N':
16846 case 'O':
16847 case 'P': {
16848 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16849 if (!CST) return; // Must be an immediate to match.
16850 SDLoc dl(Op);
16851 int64_t Value = CST->getSExtValue();
16852 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16853 // numbers are printed as such.
16854 switch (Letter) {
16855 default: llvm_unreachable("Unknown constraint letter!");
16856 case 'I': // "I" is a signed 16-bit constant.
16857 if (isInt<16>(Value))
16858 Result = DAG.getTargetConstant(Value, dl, TCVT);
16859 break;
16860 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16861 if (isShiftedUInt<16, 16>(Value))
16862 Result = DAG.getTargetConstant(Value, dl, TCVT);
16863 break;
16864 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16865 if (isShiftedInt<16, 16>(Value))
16866 Result = DAG.getTargetConstant(Value, dl, TCVT);
16867 break;
16868 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16869 if (isUInt<16>(Value))
16870 Result = DAG.getTargetConstant(Value, dl, TCVT);
16871 break;
16872 case 'M': // "M" is a constant that is greater than 31.
16873 if (Value > 31)
16874 Result = DAG.getTargetConstant(Value, dl, TCVT);
16875 break;
16876 case 'N': // "N" is a positive constant that is an exact power of two.
16877 if (Value > 0 && isPowerOf2_64(Value))
16878 Result = DAG.getTargetConstant(Value, dl, TCVT);
16879 break;
16880 case 'O': // "O" is the constant zero.
16881 if (Value == 0)
16882 Result = DAG.getTargetConstant(Value, dl, TCVT);
16883 break;
16884 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16885 if (isInt<16>(-Value))
16886 Result = DAG.getTargetConstant(Value, dl, TCVT);
16887 break;
16888 }
16889 break;
16890 }
16891 }
16892
16893 if (Result.getNode()) {
16894 Ops.push_back(Result);
16895 return;
16896 }
16897
16898 // Handle standard constraint letters.
16899 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16900}
16901
16904 SelectionDAG &DAG) const {
16905 if (I.getNumOperands() <= 1)
16906 return;
16907 if (!isa<ConstantSDNode>(Ops[1].getNode()))
16908 return;
16909 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
16910 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
16911 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
16912 return;
16913
16914 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
16915 Ops.push_back(DAG.getMDNode(MDN));
16916}
16917
16918// isLegalAddressingMode - Return true if the addressing mode represented
16919// by AM is legal for this target, for a load/store of the specified type.
16921 const AddrMode &AM, Type *Ty,
16922 unsigned AS,
16923 Instruction *I) const {
16924 // Vector type r+i form is supported since power9 as DQ form. We don't check
16925 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16926 // imm form is preferred and the offset can be adjusted to use imm form later
16927 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16928 // max offset to check legal addressing mode, we should be a little aggressive
16929 // to contain other offsets for that LSRUse.
16930 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16931 return false;
16932
16933 // PPC allows a sign-extended 16-bit immediate field.
16934 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16935 return false;
16936
16937 // No global is ever allowed as a base.
16938 if (AM.BaseGV)
16939 return false;
16940
16941 // PPC only support r+r,
16942 switch (AM.Scale) {
16943 case 0: // "r+i" or just "i", depending on HasBaseReg.
16944 break;
16945 case 1:
16946 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16947 return false;
16948 // Otherwise we have r+r or r+i.
16949 break;
16950 case 2:
16951 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
16952 return false;
16953 // Allow 2*r as r+r.
16954 break;
16955 default:
16956 // No other scales are supported.
16957 return false;
16958 }
16959
16960 return true;
16961}
16962
16963SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
16964 SelectionDAG &DAG) const {
16966 MachineFrameInfo &MFI = MF.getFrameInfo();
16967 MFI.setReturnAddressIsTaken(true);
16968
16970 return SDValue();
16971
16972 SDLoc dl(Op);
16973 unsigned Depth = Op.getConstantOperandVal(0);
16974
16975 // Make sure the function does not optimize away the store of the RA to
16976 // the stack.
16977 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
16978 FuncInfo->setLRStoreRequired();
16979 bool isPPC64 = Subtarget.isPPC64();
16980 auto PtrVT = getPointerTy(MF.getDataLayout());
16981
16982 if (Depth > 0) {
16983 // The link register (return address) is saved in the caller's frame
16984 // not the callee's stack frame. So we must get the caller's frame
16985 // address and load the return address at the LR offset from there.
16986 SDValue FrameAddr =
16987 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16988 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
16989 SDValue Offset =
16990 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
16991 isPPC64 ? MVT::i64 : MVT::i32);
16992 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
16993 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
16995 }
16996
16997 // Just load the return address off the stack.
16998 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16999 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17001}
17002
17003SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17004 SelectionDAG &DAG) const {
17005 SDLoc dl(Op);
17006 unsigned Depth = Op.getConstantOperandVal(0);
17007
17009 MachineFrameInfo &MFI = MF.getFrameInfo();
17010 MFI.setFrameAddressIsTaken(true);
17011
17012 EVT PtrVT = getPointerTy(MF.getDataLayout());
17013 bool isPPC64 = PtrVT == MVT::i64;
17014
17015 // Naked functions never have a frame pointer, and so we use r1. For all
17016 // other functions, this decision must be delayed until during PEI.
17017 unsigned FrameReg;
17018 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17019 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17020 else
17021 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17022
17023 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17024 PtrVT);
17025 while (Depth--)
17026 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17027 FrameAddr, MachinePointerInfo());
17028 return FrameAddr;
17029}
17030
17031// FIXME? Maybe this could be a TableGen attribute on some registers and
17032// this table could be generated automatically from RegInfo.
17034 const MachineFunction &MF) const {
17035 bool isPPC64 = Subtarget.isPPC64();
17036
17037 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17038 if (!is64Bit && VT != LLT::scalar(32))
17039 report_fatal_error("Invalid register global variable type");
17040
17042 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17043 .Case("r2", isPPC64 ? Register() : PPC::R2)
17044 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17045 .Default(Register());
17046
17047 if (Reg)
17048 return Reg;
17049 report_fatal_error("Invalid register name global variable");
17050}
17051
17053 // 32-bit SVR4 ABI access everything as got-indirect.
17054 if (Subtarget.is32BitELFABI())
17055 return true;
17056
17057 // AIX accesses everything indirectly through the TOC, which is similar to
17058 // the GOT.
17059 if (Subtarget.isAIXABI())
17060 return true;
17061
17063 // If it is small or large code model, module locals are accessed
17064 // indirectly by loading their address from .toc/.got.
17065 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17066 return true;
17067
17068 // JumpTable and BlockAddress are accessed as got-indirect.
17069 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17070 return true;
17071
17072 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17073 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17074
17075 return false;
17076}
17077
17078bool
17080 // The PowerPC target isn't yet aware of offsets.
17081 return false;
17082}
17083
17085 const CallInst &I,
17086 MachineFunction &MF,
17087 unsigned Intrinsic) const {
17088 switch (Intrinsic) {
17089 case Intrinsic::ppc_atomicrmw_xchg_i128:
17090 case Intrinsic::ppc_atomicrmw_add_i128:
17091 case Intrinsic::ppc_atomicrmw_sub_i128:
17092 case Intrinsic::ppc_atomicrmw_nand_i128:
17093 case Intrinsic::ppc_atomicrmw_and_i128:
17094 case Intrinsic::ppc_atomicrmw_or_i128:
17095 case Intrinsic::ppc_atomicrmw_xor_i128:
17096 case Intrinsic::ppc_cmpxchg_i128:
17098 Info.memVT = MVT::i128;
17099 Info.ptrVal = I.getArgOperand(0);
17100 Info.offset = 0;
17101 Info.align = Align(16);
17104 return true;
17105 case Intrinsic::ppc_atomic_load_i128:
17107 Info.memVT = MVT::i128;
17108 Info.ptrVal = I.getArgOperand(0);
17109 Info.offset = 0;
17110 Info.align = Align(16);
17112 return true;
17113 case Intrinsic::ppc_atomic_store_i128:
17115 Info.memVT = MVT::i128;
17116 Info.ptrVal = I.getArgOperand(2);
17117 Info.offset = 0;
17118 Info.align = Align(16);
17120 return true;
17121 case Intrinsic::ppc_altivec_lvx:
17122 case Intrinsic::ppc_altivec_lvxl:
17123 case Intrinsic::ppc_altivec_lvebx:
17124 case Intrinsic::ppc_altivec_lvehx:
17125 case Intrinsic::ppc_altivec_lvewx:
17126 case Intrinsic::ppc_vsx_lxvd2x:
17127 case Intrinsic::ppc_vsx_lxvw4x:
17128 case Intrinsic::ppc_vsx_lxvd2x_be:
17129 case Intrinsic::ppc_vsx_lxvw4x_be:
17130 case Intrinsic::ppc_vsx_lxvl:
17131 case Intrinsic::ppc_vsx_lxvll: {
17132 EVT VT;
17133 switch (Intrinsic) {
17134 case Intrinsic::ppc_altivec_lvebx:
17135 VT = MVT::i8;
17136 break;
17137 case Intrinsic::ppc_altivec_lvehx:
17138 VT = MVT::i16;
17139 break;
17140 case Intrinsic::ppc_altivec_lvewx:
17141 VT = MVT::i32;
17142 break;
17143 case Intrinsic::ppc_vsx_lxvd2x:
17144 case Intrinsic::ppc_vsx_lxvd2x_be:
17145 VT = MVT::v2f64;
17146 break;
17147 default:
17148 VT = MVT::v4i32;
17149 break;
17150 }
17151
17153 Info.memVT = VT;
17154 Info.ptrVal = I.getArgOperand(0);
17155 Info.offset = -VT.getStoreSize()+1;
17156 Info.size = 2*VT.getStoreSize()-1;
17157 Info.align = Align(1);
17159 return true;
17160 }
17161 case Intrinsic::ppc_altivec_stvx:
17162 case Intrinsic::ppc_altivec_stvxl:
17163 case Intrinsic::ppc_altivec_stvebx:
17164 case Intrinsic::ppc_altivec_stvehx:
17165 case Intrinsic::ppc_altivec_stvewx:
17166 case Intrinsic::ppc_vsx_stxvd2x:
17167 case Intrinsic::ppc_vsx_stxvw4x:
17168 case Intrinsic::ppc_vsx_stxvd2x_be:
17169 case Intrinsic::ppc_vsx_stxvw4x_be:
17170 case Intrinsic::ppc_vsx_stxvl:
17171 case Intrinsic::ppc_vsx_stxvll: {
17172 EVT VT;
17173 switch (Intrinsic) {
17174 case Intrinsic::ppc_altivec_stvebx:
17175 VT = MVT::i8;
17176 break;
17177 case Intrinsic::ppc_altivec_stvehx:
17178 VT = MVT::i16;
17179 break;
17180 case Intrinsic::ppc_altivec_stvewx:
17181 VT = MVT::i32;
17182 break;
17183 case Intrinsic::ppc_vsx_stxvd2x:
17184 case Intrinsic::ppc_vsx_stxvd2x_be:
17185 VT = MVT::v2f64;
17186 break;
17187 default:
17188 VT = MVT::v4i32;
17189 break;
17190 }
17191
17193 Info.memVT = VT;
17194 Info.ptrVal = I.getArgOperand(1);
17195 Info.offset = -VT.getStoreSize()+1;
17196 Info.size = 2*VT.getStoreSize()-1;
17197 Info.align = Align(1);
17199 return true;
17200 }
17201 case Intrinsic::ppc_stdcx:
17202 case Intrinsic::ppc_stwcx:
17203 case Intrinsic::ppc_sthcx:
17204 case Intrinsic::ppc_stbcx: {
17205 EVT VT;
17206 auto Alignment = Align(8);
17207 switch (Intrinsic) {
17208 case Intrinsic::ppc_stdcx:
17209 VT = MVT::i64;
17210 break;
17211 case Intrinsic::ppc_stwcx:
17212 VT = MVT::i32;
17213 Alignment = Align(4);
17214 break;
17215 case Intrinsic::ppc_sthcx:
17216 VT = MVT::i16;
17217 Alignment = Align(2);
17218 break;
17219 case Intrinsic::ppc_stbcx:
17220 VT = MVT::i8;
17221 Alignment = Align(1);
17222 break;
17223 }
17225 Info.memVT = VT;
17226 Info.ptrVal = I.getArgOperand(0);
17227 Info.offset = 0;
17228 Info.align = Alignment;
17230 return true;
17231 }
17232 default:
17233 break;
17234 }
17235
17236 return false;
17237}
17238
17239/// It returns EVT::Other if the type should be determined using generic
17240/// target-independent logic.
17242 const MemOp &Op, const AttributeList &FuncAttributes) const {
17243 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17244 // We should use Altivec/VSX loads and stores when available. For unaligned
17245 // addresses, unaligned VSX loads are only fast starting with the P8.
17246 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17247 if (Op.isMemset() && Subtarget.hasVSX()) {
17248 uint64_t TailSize = Op.size() % 16;
17249 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17250 // element if vector element type matches tail store. For tail size
17251 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17252 if (TailSize > 2 && TailSize <= 4) {
17253 return MVT::v8i16;
17254 }
17255 return MVT::v4i32;
17256 }
17257 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17258 return MVT::v4i32;
17259 }
17260 }
17261
17262 if (Subtarget.isPPC64()) {
17263 return MVT::i64;
17264 }
17265
17266 return MVT::i32;
17267}
17268
17269/// Returns true if it is beneficial to convert a load of a constant
17270/// to just the constant itself.
17272 Type *Ty) const {
17273 assert(Ty->isIntegerTy());
17274
17275 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17276 return !(BitSize == 0 || BitSize > 64);
17277}
17278
17280 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17281 return false;
17282 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17283 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17284 return NumBits1 == 64 && NumBits2 == 32;
17285}
17286
17288 if (!VT1.isInteger() || !VT2.isInteger())
17289 return false;
17290 unsigned NumBits1 = VT1.getSizeInBits();
17291 unsigned NumBits2 = VT2.getSizeInBits();
17292 return NumBits1 == 64 && NumBits2 == 32;
17293}
17294
17296 // Generally speaking, zexts are not free, but they are free when they can be
17297 // folded with other operations.
17298 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17299 EVT MemVT = LD->getMemoryVT();
17300 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17301 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17302 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17303 LD->getExtensionType() == ISD::ZEXTLOAD))
17304 return true;
17305 }
17306
17307 // FIXME: Add other cases...
17308 // - 32-bit shifts with a zext to i64
17309 // - zext after ctlz, bswap, etc.
17310 // - zext after and by a constant mask
17311
17312 return TargetLowering::isZExtFree(Val, VT2);
17313}
17314
17315bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17316 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17317 "invalid fpext types");
17318 // Extending to float128 is not free.
17319 if (DestVT == MVT::f128)
17320 return false;
17321 return true;
17322}
17323
17325 return isInt<16>(Imm) || isUInt<16>(Imm);
17326}
17327
17329 return isInt<16>(Imm) || isUInt<16>(Imm);
17330}
17331
17334 unsigned *Fast) const {
17336 return false;
17337
17338 // PowerPC supports unaligned memory access for simple non-vector types.
17339 // Although accessing unaligned addresses is not as efficient as accessing
17340 // aligned addresses, it is generally more efficient than manual expansion,
17341 // and generally only traps for software emulation when crossing page
17342 // boundaries.
17343
17344 if (!VT.isSimple())
17345 return false;
17346
17347 if (VT.isFloatingPoint() && !VT.isVector() &&
17348 !Subtarget.allowsUnalignedFPAccess())
17349 return false;
17350
17351 if (VT.getSimpleVT().isVector()) {
17352 if (Subtarget.hasVSX()) {
17353 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17354 VT != MVT::v4f32 && VT != MVT::v4i32)
17355 return false;
17356 } else {
17357 return false;
17358 }
17359 }
17360
17361 if (VT == MVT::ppcf128)
17362 return false;
17363
17364 if (Fast)
17365 *Fast = 1;
17366
17367 return true;
17368}
17369
17371 SDValue C) const {
17372 // Check integral scalar types.
17373 if (!VT.isScalarInteger())
17374 return false;
17375 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17376 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17377 return false;
17378 // This transformation will generate >= 2 operations. But the following
17379 // cases will generate <= 2 instructions during ISEL. So exclude them.
17380 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17381 // HW instruction, ie. MULLI
17382 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17383 // instruction is needed than case 1, ie. MULLI and RLDICR
17384 int64_t Imm = ConstNode->getSExtValue();
17385 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17386 Imm >>= Shift;
17387 if (isInt<16>(Imm))
17388 return false;
17389 uint64_t UImm = static_cast<uint64_t>(Imm);
17390 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17391 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17392 return true;
17393 }
17394 return false;
17395}
17396
17398 EVT VT) const {
17401}
17402
17404 Type *Ty) const {
17405 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17406 return false;
17407 switch (Ty->getScalarType()->getTypeID()) {
17408 case Type::FloatTyID:
17409 case Type::DoubleTyID:
17410 return true;
17411 case Type::FP128TyID:
17412 return Subtarget.hasP9Vector();
17413 default:
17414 return false;
17415 }
17416}
17417
17418// FIXME: add more patterns which are not profitable to hoist.
17420 if (!I->hasOneUse())
17421 return true;
17422
17423 Instruction *User = I->user_back();
17424 assert(User && "A single use instruction with no uses.");
17425
17426 switch (I->getOpcode()) {
17427 case Instruction::FMul: {
17428 // Don't break FMA, PowerPC prefers FMA.
17429 if (User->getOpcode() != Instruction::FSub &&
17430 User->getOpcode() != Instruction::FAdd)
17431 return true;
17432
17434 const Function *F = I->getFunction();
17435 const DataLayout &DL = F->getParent()->getDataLayout();
17436 Type *Ty = User->getOperand(0)->getType();
17437
17438 return !(
17441 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17442 }
17443 case Instruction::Load: {
17444 // Don't break "store (load float*)" pattern, this pattern will be combined
17445 // to "store (load int32)" in later InstCombine pass. See function
17446 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17447 // cycles than loading a 32 bit integer.
17448 LoadInst *LI = cast<LoadInst>(I);
17449 // For the loads that combineLoadToOperationType does nothing, like
17450 // ordered load, it should be profitable to hoist them.
17451 // For swifterror load, it can only be used for pointer to pointer type, so
17452 // later type check should get rid of this case.
17453 if (!LI->isUnordered())
17454 return true;
17455
17456 if (User->getOpcode() != Instruction::Store)
17457 return true;
17458
17459 if (I->getType()->getTypeID() != Type::FloatTyID)
17460 return true;
17461
17462 return false;
17463 }
17464 default:
17465 return true;
17466 }
17467 return true;
17468}
17469
17470const MCPhysReg *
17472 // LR is a callee-save register, but we must treat it as clobbered by any call
17473 // site. Hence we include LR in the scratch registers, which are in turn added
17474 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17475 // to CTR, which is used by any indirect call.
17476 static const MCPhysReg ScratchRegs[] = {
17477 PPC::X12, PPC::LR8, PPC::CTR8, 0
17478 };
17479
17480 return ScratchRegs;
17481}
17482
17484 const Constant *PersonalityFn) const {
17485 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17486}
17487
17489 const Constant *PersonalityFn) const {
17490 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17491}
17492
17493bool
17495 EVT VT , unsigned DefinedValues) const {
17496 if (VT == MVT::v2i64)
17497 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17498
17499 if (Subtarget.hasVSX())
17500 return true;
17501
17503}
17504
17506 if (DisableILPPref || Subtarget.enableMachineScheduler())
17508
17509 return Sched::ILP;
17510}
17511
17512// Create a fast isel object.
17513FastISel *
17515 const TargetLibraryInfo *LibInfo) const {
17516 return PPC::createFastISel(FuncInfo, LibInfo);
17517}
17518
17519// 'Inverted' means the FMA opcode after negating one multiplicand.
17520// For example, (fma -a b c) = (fnmsub a b c)
17521static unsigned invertFMAOpcode(unsigned Opc) {
17522 switch (Opc) {
17523 default:
17524 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17525 case ISD::FMA:
17526 return PPCISD::FNMSUB;
17527 case PPCISD::FNMSUB:
17528 return ISD::FMA;
17529 }
17530}
17531
17533 bool LegalOps, bool OptForSize,
17535 unsigned Depth) const {
17537 return SDValue();
17538
17539 unsigned Opc = Op.getOpcode();
17540 EVT VT = Op.getValueType();
17541 SDNodeFlags Flags = Op.getNode()->getFlags();
17542
17543 switch (Opc) {
17544 case PPCISD::FNMSUB:
17545 if (!Op.hasOneUse() || !isTypeLegal(VT))
17546 break;
17547
17549 SDValue N0 = Op.getOperand(0);
17550 SDValue N1 = Op.getOperand(1);
17551 SDValue N2 = Op.getOperand(2);
17552 SDLoc Loc(Op);
17553
17555 SDValue NegN2 =
17556 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17557
17558 if (!NegN2)
17559 return SDValue();
17560
17561 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17562 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17563 // These transformations may change sign of zeroes. For example,
17564 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17565 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17566 // Try and choose the cheaper one to negate.
17568 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17569 N0Cost, Depth + 1);
17570
17572 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17573 N1Cost, Depth + 1);
17574
17575 if (NegN0 && N0Cost <= N1Cost) {
17576 Cost = std::min(N0Cost, N2Cost);
17577 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17578 } else if (NegN1) {
17579 Cost = std::min(N1Cost, N2Cost);
17580 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17581 }
17582 }
17583
17584 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17585 if (isOperationLegal(ISD::FMA, VT)) {
17586 Cost = N2Cost;
17587 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17588 }
17589
17590 break;
17591 }
17592
17593 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17594 Cost, Depth);
17595}
17596
17597// Override to enable LOAD_STACK_GUARD lowering on Linux.
17599 if (!Subtarget.isTargetLinux())
17601 return true;
17602}
17603
17604// Override to disable global variable loading on Linux and insert AIX canary
17605// word declaration.
17607 if (Subtarget.isAIXABI()) {
17608 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17609 PointerType::getUnqual(M.getContext()));
17610 return;
17611 }
17612 if (!Subtarget.isTargetLinux())
17614}
17615
17617 if (Subtarget.isAIXABI())
17618 return M.getGlobalVariable(AIXSSPCanaryWordName);
17620}
17621
17623 bool ForCodeSize) const {
17624 if (!VT.isSimple() || !Subtarget.hasVSX())
17625 return false;
17626
17627 switch(VT.getSimpleVT().SimpleTy) {
17628 default:
17629 // For FP types that are currently not supported by PPC backend, return
17630 // false. Examples: f16, f80.
17631 return false;
17632 case MVT::f32:
17633 case MVT::f64: {
17634 if (Subtarget.hasPrefixInstrs()) {
17635 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17636 return true;
17637 }
17638 bool IsExact;
17639 APSInt IntResult(16, false);
17640 // The rounding mode doesn't really matter because we only care about floats
17641 // that can be converted to integers exactly.
17642 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17643 // For exact values in the range [-16, 15] we can materialize the float.
17644 if (IsExact && IntResult <= 15 && IntResult >= -16)
17645 return true;
17646 return Imm.isZero();
17647 }
17648 case MVT::ppcf128:
17649 return Imm.isPosZero();
17650 }
17651}
17652
17653// For vector shift operation op, fold
17654// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17656 SelectionDAG &DAG) {
17657 SDValue N0 = N->getOperand(0);
17658 SDValue N1 = N->getOperand(1);
17659 EVT VT = N0.getValueType();
17660 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17661 unsigned Opcode = N->getOpcode();
17662 unsigned TargetOpcode;
17663
17664 switch (Opcode) {
17665 default:
17666 llvm_unreachable("Unexpected shift operation");
17667 case ISD::SHL:
17668 TargetOpcode = PPCISD::SHL;
17669 break;
17670 case ISD::SRL:
17671 TargetOpcode = PPCISD::SRL;
17672 break;
17673 case ISD::SRA:
17674 TargetOpcode = PPCISD::SRA;
17675 break;
17676 }
17677
17678 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17679 N1->getOpcode() == ISD::AND)
17680 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17681 if (Mask->getZExtValue() == OpSizeInBits - 1)
17682 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17683
17684 return SDValue();
17685}
17686
17687SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17688 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17689 return Value;
17690
17691 SDValue N0 = N->getOperand(0);
17692 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17693 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17694 N0.getOpcode() != ISD::SIGN_EXTEND ||
17695 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17696 N->getValueType(0) != MVT::i64)
17697 return SDValue();
17698
17699 // We can't save an operation here if the value is already extended, and
17700 // the existing shift is easier to combine.
17701 SDValue ExtsSrc = N0.getOperand(0);
17702 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17703 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17704 return SDValue();
17705
17706 SDLoc DL(N0);
17707 SDValue ShiftBy = SDValue(CN1, 0);
17708 // We want the shift amount to be i32 on the extswli, but the shift could
17709 // have an i64.
17710 if (ShiftBy.getValueType() == MVT::i64)
17711 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17712
17713 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17714 ShiftBy);
17715}
17716
17717SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17718 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17719 return Value;
17720
17721 return SDValue();
17722}
17723
17724SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17725 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17726 return Value;
17727
17728 return SDValue();
17729}
17730
17731// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17732// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17733// When C is zero, the equation (addi Z, -C) can be simplified to Z
17734// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17736 const PPCSubtarget &Subtarget) {
17737 if (!Subtarget.isPPC64())
17738 return SDValue();
17739
17740 SDValue LHS = N->getOperand(0);
17741 SDValue RHS = N->getOperand(1);
17742
17743 auto isZextOfCompareWithConstant = [](SDValue Op) {
17744 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17745 Op.getValueType() != MVT::i64)
17746 return false;
17747
17748 SDValue Cmp = Op.getOperand(0);
17749 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17750 Cmp.getOperand(0).getValueType() != MVT::i64)
17751 return false;
17752
17753 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17754 int64_t NegConstant = 0 - Constant->getSExtValue();
17755 // Due to the limitations of the addi instruction,
17756 // -C is required to be [-32768, 32767].
17757 return isInt<16>(NegConstant);
17758 }
17759
17760 return false;
17761 };
17762
17763 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17764 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17765
17766 // If there is a pattern, canonicalize a zext operand to the RHS.
17767 if (LHSHasPattern && !RHSHasPattern)
17768 std::swap(LHS, RHS);
17769 else if (!LHSHasPattern && !RHSHasPattern)
17770 return SDValue();
17771
17772 SDLoc DL(N);
17773 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17774 SDValue Cmp = RHS.getOperand(0);
17775 SDValue Z = Cmp.getOperand(0);
17776 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17777 int64_t NegConstant = 0 - Constant->getSExtValue();
17778
17779 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17780 default: break;
17781 case ISD::SETNE: {
17782 // when C == 0
17783 // --> addze X, (addic Z, -1).carry
17784 // /
17785 // add X, (zext(setne Z, C))--
17786 // \ when -32768 <= -C <= 32767 && C != 0
17787 // --> addze X, (addic (addi Z, -C), -1).carry
17788 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17789 DAG.getConstant(NegConstant, DL, MVT::i64));
17790 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17791 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17792 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17793 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17794 SDValue(Addc.getNode(), 1));
17795 }
17796 case ISD::SETEQ: {
17797 // when C == 0
17798 // --> addze X, (subfic Z, 0).carry
17799 // /
17800 // add X, (zext(sete Z, C))--
17801 // \ when -32768 <= -C <= 32767 && C != 0
17802 // --> addze X, (subfic (addi Z, -C), 0).carry
17803 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17804 DAG.getConstant(NegConstant, DL, MVT::i64));
17805 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17806 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17807 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17808 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17809 SDValue(Subc.getNode(), 1));
17810 }
17811 }
17812
17813 return SDValue();
17814}
17815
17816// Transform
17817// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17818// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17819// In this case both C1 and C2 must be known constants.
17820// C1+C2 must fit into a 34 bit signed integer.
17822 const PPCSubtarget &Subtarget) {
17823 if (!Subtarget.isUsingPCRelativeCalls())
17824 return SDValue();
17825
17826 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17827 // If we find that node try to cast the Global Address and the Constant.
17828 SDValue LHS = N->getOperand(0);
17829 SDValue RHS = N->getOperand(1);
17830
17831 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17832 std::swap(LHS, RHS);
17833
17834 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17835 return SDValue();
17836
17837 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17838 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17839 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17840
17841 // Check that both casts succeeded.
17842 if (!GSDN || !ConstNode)
17843 return SDValue();
17844
17845 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17846 SDLoc DL(GSDN);
17847
17848 // The signed int offset needs to fit in 34 bits.
17849 if (!isInt<34>(NewOffset))
17850 return SDValue();
17851
17852 // The new global address is a copy of the old global address except
17853 // that it has the updated Offset.
17854 SDValue GA =
17855 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17856 NewOffset, GSDN->getTargetFlags());
17857 SDValue MatPCRel =
17858 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17859 return MatPCRel;
17860}
17861
17862SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17863 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17864 return Value;
17865
17866 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17867 return Value;
17868
17869 return SDValue();
17870}
17871
17872// Detect TRUNCATE operations on bitcasts of float128 values.
17873// What we are looking for here is the situtation where we extract a subset
17874// of bits from a 128 bit float.
17875// This can be of two forms:
17876// 1) BITCAST of f128 feeding TRUNCATE
17877// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17878// The reason this is required is because we do not have a legal i128 type
17879// and so we want to prevent having to store the f128 and then reload part
17880// of it.
17881SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17882 DAGCombinerInfo &DCI) const {
17883 // If we are using CRBits then try that first.
17884 if (Subtarget.useCRBits()) {
17885 // Check if CRBits did anything and return that if it did.
17886 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17887 return CRTruncValue;
17888 }
17889
17890 SDLoc dl(N);
17891 SDValue Op0 = N->getOperand(0);
17892
17893 // Looking for a truncate of i128 to i64.
17894 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17895 return SDValue();
17896
17897 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17898
17899 // SRL feeding TRUNCATE.
17900 if (Op0.getOpcode() == ISD::SRL) {
17901 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
17902 // The right shift has to be by 64 bits.
17903 if (!ConstNode || ConstNode->getZExtValue() != 64)
17904 return SDValue();
17905
17906 // Switch the element number to extract.
17907 EltToExtract = EltToExtract ? 0 : 1;
17908 // Update Op0 past the SRL.
17909 Op0 = Op0.getOperand(0);
17910 }
17911
17912 // BITCAST feeding a TRUNCATE possibly via SRL.
17913 if (Op0.getOpcode() == ISD::BITCAST &&
17914 Op0.getValueType() == MVT::i128 &&
17915 Op0.getOperand(0).getValueType() == MVT::f128) {
17916 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17917 return DCI.DAG.getNode(
17918 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
17919 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17920 }
17921 return SDValue();
17922}
17923
17924SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17925 SelectionDAG &DAG = DCI.DAG;
17926
17927 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
17928 if (!ConstOpOrElement)
17929 return SDValue();
17930
17931 // An imul is usually smaller than the alternative sequence for legal type.
17933 isOperationLegal(ISD::MUL, N->getValueType(0)))
17934 return SDValue();
17935
17936 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17937 switch (this->Subtarget.getCPUDirective()) {
17938 default:
17939 // TODO: enhance the condition for subtarget before pwr8
17940 return false;
17941 case PPC::DIR_PWR8:
17942 // type mul add shl
17943 // scalar 4 1 1
17944 // vector 7 2 2
17945 return true;
17946 case PPC::DIR_PWR9:
17947 case PPC::DIR_PWR10:
17949 // type mul add shl
17950 // scalar 5 2 2
17951 // vector 7 2 2
17952
17953 // The cycle RATIO of related operations are showed as a table above.
17954 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17955 // scalar and vector type. For 2 instrs patterns, add/sub + shl
17956 // are 4, it is always profitable; but for 3 instrs patterns
17957 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17958 // So we should only do it for vector type.
17959 return IsAddOne && IsNeg ? VT.isVector() : true;
17960 }
17961 };
17962
17963 EVT VT = N->getValueType(0);
17964 SDLoc DL(N);
17965
17966 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
17967 bool IsNeg = MulAmt.isNegative();
17968 APInt MulAmtAbs = MulAmt.abs();
17969
17970 if ((MulAmtAbs - 1).isPowerOf2()) {
17971 // (mul x, 2^N + 1) => (add (shl x, N), x)
17972 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
17973
17974 if (!IsProfitable(IsNeg, true, VT))
17975 return SDValue();
17976
17977 SDValue Op0 = N->getOperand(0);
17978 SDValue Op1 =
17979 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17980 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
17981 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
17982
17983 if (!IsNeg)
17984 return Res;
17985
17986 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
17987 } else if ((MulAmtAbs + 1).isPowerOf2()) {
17988 // (mul x, 2^N - 1) => (sub (shl x, N), x)
17989 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
17990
17991 if (!IsProfitable(IsNeg, false, VT))
17992 return SDValue();
17993
17994 SDValue Op0 = N->getOperand(0);
17995 SDValue Op1 =
17996 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17997 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
17998
17999 if (!IsNeg)
18000 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18001 else
18002 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18003
18004 } else {
18005 return SDValue();
18006 }
18007}
18008
18009// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18010// in combiner since we need to check SD flags and other subtarget features.
18011SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18012 DAGCombinerInfo &DCI) const {
18013 SDValue N0 = N->getOperand(0);
18014 SDValue N1 = N->getOperand(1);
18015 SDValue N2 = N->getOperand(2);
18016 SDNodeFlags Flags = N->getFlags();
18017 EVT VT = N->getValueType(0);
18018 SelectionDAG &DAG = DCI.DAG;
18020 unsigned Opc = N->getOpcode();
18021 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18022 bool LegalOps = !DCI.isBeforeLegalizeOps();
18023 SDLoc Loc(N);
18024
18025 if (!isOperationLegal(ISD::FMA, VT))
18026 return SDValue();
18027
18028 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18029 // since (fnmsub a b c)=-0 while c-ab=+0.
18030 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18031 return SDValue();
18032
18033 // (fma (fneg a) b c) => (fnmsub a b c)
18034 // (fnmsub (fneg a) b c) => (fma a b c)
18035 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18036 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18037
18038 // (fma a (fneg b) c) => (fnmsub a b c)
18039 // (fnmsub a (fneg b) c) => (fma a b c)
18040 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18041 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18042
18043 return SDValue();
18044}
18045
18046bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18047 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18048 if (!Subtarget.is64BitELFABI())
18049 return false;
18050
18051 // If not a tail call then no need to proceed.
18052 if (!CI->isTailCall())
18053 return false;
18054
18055 // If sibling calls have been disabled and tail-calls aren't guaranteed
18056 // there is no reason to duplicate.
18057 auto &TM = getTargetMachine();
18058 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18059 return false;
18060
18061 // Can't tail call a function called indirectly, or if it has variadic args.
18062 const Function *Callee = CI->getCalledFunction();
18063 if (!Callee || Callee->isVarArg())
18064 return false;
18065
18066 // Make sure the callee and caller calling conventions are eligible for tco.
18067 const Function *Caller = CI->getParent()->getParent();
18068 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18069 CI->getCallingConv()))
18070 return false;
18071
18072 // If the function is local then we have a good chance at tail-calling it
18073 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18074}
18075
18076bool PPCTargetLowering::
18077isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18078 const Value *Mask = AndI.getOperand(1);
18079 // If the mask is suitable for andi. or andis. we should sink the and.
18080 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18081 // Can't handle constants wider than 64-bits.
18082 if (CI->getBitWidth() > 64)
18083 return false;
18084 int64_t ConstVal = CI->getZExtValue();
18085 return isUInt<16>(ConstVal) ||
18086 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18087 }
18088
18089 // For non-constant masks, we can always use the record-form and.
18090 return true;
18091}
18092
18093/// getAddrModeForFlags - Based on the set of address flags, select the most
18094/// optimal instruction format to match by.
18095PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18096 // This is not a node we should be handling here.
18097 if (Flags == PPC::MOF_None)
18098 return PPC::AM_None;
18099 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18100 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18101 if ((Flags & FlagSet) == FlagSet)
18102 return PPC::AM_DForm;
18103 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18104 if ((Flags & FlagSet) == FlagSet)
18105 return PPC::AM_DSForm;
18106 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18107 if ((Flags & FlagSet) == FlagSet)
18108 return PPC::AM_DQForm;
18109 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18110 if ((Flags & FlagSet) == FlagSet)
18111 return PPC::AM_PrefixDForm;
18112 // If no other forms are selected, return an X-Form as it is the most
18113 // general addressing mode.
18114 return PPC::AM_XForm;
18115}
18116
18117/// Set alignment flags based on whether or not the Frame Index is aligned.
18118/// Utilized when computing flags for address computation when selecting
18119/// load and store instructions.
18120static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18121 SelectionDAG &DAG) {
18122 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18123 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18124 if (!FI)
18125 return;
18127 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18128 // If this is (add $FI, $S16Imm), the alignment flags are already set
18129 // based on the immediate. We just need to clear the alignment flags
18130 // if the FI alignment is weaker.
18131 if ((FrameIndexAlign % 4) != 0)
18132 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18133 if ((FrameIndexAlign % 16) != 0)
18134 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18135 // If the address is a plain FrameIndex, set alignment flags based on
18136 // FI alignment.
18137 if (!IsAdd) {
18138 if ((FrameIndexAlign % 4) == 0)
18139 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18140 if ((FrameIndexAlign % 16) == 0)
18141 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18142 }
18143}
18144
18145/// Given a node, compute flags that are used for address computation when
18146/// selecting load and store instructions. The flags computed are stored in
18147/// FlagSet. This function takes into account whether the node is a constant,
18148/// an ADD, OR, or a constant, and computes the address flags accordingly.
18149static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18150 SelectionDAG &DAG) {
18151 // Set the alignment flags for the node depending on if the node is
18152 // 4-byte or 16-byte aligned.
18153 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18154 if ((Imm & 0x3) == 0)
18155 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18156 if ((Imm & 0xf) == 0)
18157 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18158 };
18159
18160 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18161 // All 32-bit constants can be computed as LIS + Disp.
18162 const APInt &ConstImm = CN->getAPIntValue();
18163 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18164 FlagSet |= PPC::MOF_AddrIsSImm32;
18165 SetAlignFlagsForImm(ConstImm.getZExtValue());
18166 setAlignFlagsForFI(N, FlagSet, DAG);
18167 }
18168 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18169 FlagSet |= PPC::MOF_RPlusSImm34;
18170 else // Let constant materialization handle large constants.
18171 FlagSet |= PPC::MOF_NotAddNorCst;
18172 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18173 // This address can be represented as an addition of:
18174 // - Register + Imm16 (possibly a multiple of 4/16)
18175 // - Register + Imm34
18176 // - Register + PPCISD::Lo
18177 // - Register + Register
18178 // In any case, we won't have to match this as Base + Zero.
18179 SDValue RHS = N.getOperand(1);
18180 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18181 const APInt &ConstImm = CN->getAPIntValue();
18182 if (ConstImm.isSignedIntN(16)) {
18183 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18184 SetAlignFlagsForImm(ConstImm.getZExtValue());
18185 setAlignFlagsForFI(N, FlagSet, DAG);
18186 }
18187 if (ConstImm.isSignedIntN(34))
18188 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18189 else
18190 FlagSet |= PPC::MOF_RPlusR; // Register.
18191 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18192 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18193 else
18194 FlagSet |= PPC::MOF_RPlusR;
18195 } else { // The address computation is not a constant or an addition.
18196 setAlignFlagsForFI(N, FlagSet, DAG);
18197 FlagSet |= PPC::MOF_NotAddNorCst;
18198 }
18199}
18200
18201static bool isPCRelNode(SDValue N) {
18202 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18203 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18204 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18205 isValidPCRelNode<JumpTableSDNode>(N) ||
18206 isValidPCRelNode<BlockAddressSDNode>(N));
18207}
18208
18209/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18210/// the address flags of the load/store instruction that is to be matched.
18211unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18212 SelectionDAG &DAG) const {
18213 unsigned FlagSet = PPC::MOF_None;
18214
18215 // Compute subtarget flags.
18216 if (!Subtarget.hasP9Vector())
18217 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18218 else {
18219 FlagSet |= PPC::MOF_SubtargetP9;
18220 if (Subtarget.hasPrefixInstrs())
18221 FlagSet |= PPC::MOF_SubtargetP10;
18222 }
18223 if (Subtarget.hasSPE())
18224 FlagSet |= PPC::MOF_SubtargetSPE;
18225
18226 // Check if we have a PCRel node and return early.
18227 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18228 return FlagSet;
18229
18230 // If the node is the paired load/store intrinsics, compute flags for
18231 // address computation and return early.
18232 unsigned ParentOp = Parent->getOpcode();
18233 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18234 (ParentOp == ISD::INTRINSIC_VOID))) {
18235 unsigned ID = Parent->getConstantOperandVal(1);
18236 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18237 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18238 ? Parent->getOperand(2)
18239 : Parent->getOperand(3);
18240 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18241 FlagSet |= PPC::MOF_Vector;
18242 return FlagSet;
18243 }
18244 }
18245
18246 // Mark this as something we don't want to handle here if it is atomic
18247 // or pre-increment instruction.
18248 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18249 if (LSB->isIndexed())
18250 return PPC::MOF_None;
18251
18252 // Compute in-memory type flags. This is based on if there are scalars,
18253 // floats or vectors.
18254 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18255 assert(MN && "Parent should be a MemSDNode!");
18256 EVT MemVT = MN->getMemoryVT();
18257 unsigned Size = MemVT.getSizeInBits();
18258 if (MemVT.isScalarInteger()) {
18259 assert(Size <= 128 &&
18260 "Not expecting scalar integers larger than 16 bytes!");
18261 if (Size < 32)
18262 FlagSet |= PPC::MOF_SubWordInt;
18263 else if (Size == 32)
18264 FlagSet |= PPC::MOF_WordInt;
18265 else
18266 FlagSet |= PPC::MOF_DoubleWordInt;
18267 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18268 if (Size == 128)
18269 FlagSet |= PPC::MOF_Vector;
18270 else if (Size == 256) {
18271 assert(Subtarget.pairedVectorMemops() &&
18272 "256-bit vectors are only available when paired vector memops is "
18273 "enabled!");
18274 FlagSet |= PPC::MOF_Vector;
18275 } else
18276 llvm_unreachable("Not expecting illegal vectors!");
18277 } else { // Floating point type: can be scalar, f128 or vector types.
18278 if (Size == 32 || Size == 64)
18279 FlagSet |= PPC::MOF_ScalarFloat;
18280 else if (MemVT == MVT::f128 || MemVT.isVector())
18281 FlagSet |= PPC::MOF_Vector;
18282 else
18283 llvm_unreachable("Not expecting illegal scalar floats!");
18284 }
18285
18286 // Compute flags for address computation.
18287 computeFlagsForAddressComputation(N, FlagSet, DAG);
18288
18289 // Compute type extension flags.
18290 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18291 switch (LN->getExtensionType()) {
18292 case ISD::SEXTLOAD:
18293 FlagSet |= PPC::MOF_SExt;
18294 break;
18295 case ISD::EXTLOAD:
18296 case ISD::ZEXTLOAD:
18297 FlagSet |= PPC::MOF_ZExt;
18298 break;
18299 case ISD::NON_EXTLOAD:
18300 FlagSet |= PPC::MOF_NoExt;
18301 break;
18302 }
18303 } else
18304 FlagSet |= PPC::MOF_NoExt;
18305
18306 // For integers, no extension is the same as zero extension.
18307 // We set the extension mode to zero extension so we don't have
18308 // to add separate entries in AddrModesMap for loads and stores.
18309 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18310 FlagSet |= PPC::MOF_ZExt;
18311 FlagSet &= ~PPC::MOF_NoExt;
18312 }
18313
18314 // If we don't have prefixed instructions, 34-bit constants should be
18315 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18316 bool IsNonP1034BitConst =
18318 FlagSet) == PPC::MOF_RPlusSImm34;
18319 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18320 IsNonP1034BitConst)
18321 FlagSet |= PPC::MOF_NotAddNorCst;
18322
18323 return FlagSet;
18324}
18325
18326/// SelectForceXFormMode - Given the specified address, force it to be
18327/// represented as an indexed [r+r] operation (an XForm instruction).
18329 SDValue &Base,
18330 SelectionDAG &DAG) const {
18331
18333 int16_t ForceXFormImm = 0;
18334 if (provablyDisjointOr(DAG, N) &&
18335 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18336 Disp = N.getOperand(0);
18337 Base = N.getOperand(1);
18338 return Mode;
18339 }
18340
18341 // If the address is the result of an add, we will utilize the fact that the
18342 // address calculation includes an implicit add. However, we can reduce
18343 // register pressure if we do not materialize a constant just for use as the
18344 // index register. We only get rid of the add if it is not an add of a
18345 // value and a 16-bit signed constant and both have a single use.
18346 if (N.getOpcode() == ISD::ADD &&
18347 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18348 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18349 Disp = N.getOperand(0);
18350 Base = N.getOperand(1);
18351 return Mode;
18352 }
18353
18354 // Otherwise, use R0 as the base register.
18355 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18356 N.getValueType());
18357 Base = N;
18358
18359 return Mode;
18360}
18361
18363 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18364 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18365 EVT ValVT = Val.getValueType();
18366 // If we are splitting a scalar integer into f64 parts (i.e. so they
18367 // can be placed into VFRC registers), we need to zero extend and
18368 // bitcast the values. This will ensure the value is placed into a
18369 // VSR using direct moves or stack operations as needed.
18370 if (PartVT == MVT::f64 &&
18371 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18372 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18373 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18374 Parts[0] = Val;
18375 return true;
18376 }
18377 return false;
18378}
18379
18380SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18381 SelectionDAG &DAG) const {
18382 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18384 EVT RetVT = Op.getValueType();
18385 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18386 SDValue Callee =
18387 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18388 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18391 for (const SDValue &N : Op->op_values()) {
18392 EVT ArgVT = N.getValueType();
18393 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18394 Entry.Node = N;
18395 Entry.Ty = ArgTy;
18396 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18397 Entry.IsZExt = !Entry.IsSExt;
18398 Args.push_back(Entry);
18399 }
18400
18401 SDValue InChain = DAG.getEntryNode();
18402 SDValue TCChain = InChain;
18403 const Function &F = DAG.getMachineFunction().getFunction();
18404 bool isTailCall =
18405 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18406 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18407 if (isTailCall)
18408 InChain = TCChain;
18409 CLI.setDebugLoc(SDLoc(Op))
18410 .setChain(InChain)
18411 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18412 .setTailCall(isTailCall)
18413 .setSExtResult(SignExtend)
18414 .setZExtResult(!SignExtend)
18416 return TLI.LowerCallTo(CLI).first;
18417}
18418
18419SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18420 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18421 SelectionDAG &DAG) const {
18422 if (Op.getValueType() == MVT::f32)
18423 return lowerToLibCall(LibCallFloatName, Op, DAG);
18424
18425 if (Op.getValueType() == MVT::f64)
18426 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18427
18428 return SDValue();
18429}
18430
18431bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18432 SDNodeFlags Flags = Op.getNode()->getFlags();
18433 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18434 Flags.hasNoNaNs() && Flags.hasNoInfs();
18435}
18436
18437bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18438 return Op.getNode()->getFlags().hasApproximateFuncs();
18439}
18440
18441bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18443}
18444
18445SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18446 const char *LibCallFloatName,
18447 const char *LibCallDoubleNameFinite,
18448 const char *LibCallFloatNameFinite,
18449 SDValue Op,
18450 SelectionDAG &DAG) const {
18451 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18452 return SDValue();
18453
18454 if (!isLowringToMASSFiniteSafe(Op))
18455 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18456 DAG);
18457
18458 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18459 LibCallDoubleNameFinite, Op, DAG);
18460}
18461
18462SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18463 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18464 "__xl_powf_finite", Op, DAG);
18465}
18466
18467SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18468 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18469 "__xl_sinf_finite", Op, DAG);
18470}
18471
18472SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18473 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18474 "__xl_cosf_finite", Op, DAG);
18475}
18476
18477SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18478 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18479 "__xl_logf_finite", Op, DAG);
18480}
18481
18482SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18483 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18484 "__xl_log10f_finite", Op, DAG);
18485}
18486
18487SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18488 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18489 "__xl_expf_finite", Op, DAG);
18490}
18491
18492// If we happen to match to an aligned D-Form, check if the Frame Index is
18493// adequately aligned. If it is not, reset the mode to match to X-Form.
18494static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18495 PPC::AddrMode &Mode) {
18496 if (!isa<FrameIndexSDNode>(N))
18497 return;
18498 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18499 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18500 Mode = PPC::AM_XForm;
18501}
18502
18503/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18504/// compute the address flags of the node, get the optimal address mode based
18505/// on the flags, and set the Base and Disp based on the address mode.
18507 SDValue N, SDValue &Disp,
18508 SDValue &Base,
18509 SelectionDAG &DAG,
18510 MaybeAlign Align) const {
18511 SDLoc DL(Parent);
18512
18513 // Compute the address flags.
18514 unsigned Flags = computeMOFlags(Parent, N, DAG);
18515
18516 // Get the optimal address mode based on the Flags.
18517 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18518
18519 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18520 // Select an X-Form load if it is not.
18521 setXFormForUnalignedFI(N, Flags, Mode);
18522
18523 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18524 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18525 assert(Subtarget.isUsingPCRelativeCalls() &&
18526 "Must be using PC-Relative calls when a valid PC-Relative node is "
18527 "present!");
18528 Mode = PPC::AM_PCRel;
18529 }
18530
18531 // Set Base and Disp accordingly depending on the address mode.
18532 switch (Mode) {
18533 case PPC::AM_DForm:
18534 case PPC::AM_DSForm:
18535 case PPC::AM_DQForm: {
18536 // This is a register plus a 16-bit immediate. The base will be the
18537 // register and the displacement will be the immediate unless it
18538 // isn't sufficiently aligned.
18539 if (Flags & PPC::MOF_RPlusSImm16) {
18540 SDValue Op0 = N.getOperand(0);
18541 SDValue Op1 = N.getOperand(1);
18542 int16_t Imm = Op1->getAsZExtVal();
18543 if (!Align || isAligned(*Align, Imm)) {
18544 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18545 Base = Op0;
18546 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18547 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18548 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18549 }
18550 break;
18551 }
18552 }
18553 // This is a register plus the @lo relocation. The base is the register
18554 // and the displacement is the global address.
18555 else if (Flags & PPC::MOF_RPlusLo) {
18556 Disp = N.getOperand(1).getOperand(0); // The global address.
18561 Base = N.getOperand(0);
18562 break;
18563 }
18564 // This is a constant address at most 32 bits. The base will be
18565 // zero or load-immediate-shifted and the displacement will be
18566 // the low 16 bits of the address.
18567 else if (Flags & PPC::MOF_AddrIsSImm32) {
18568 auto *CN = cast<ConstantSDNode>(N);
18569 EVT CNType = CN->getValueType(0);
18570 uint64_t CNImm = CN->getZExtValue();
18571 // If this address fits entirely in a 16-bit sext immediate field, codegen
18572 // this as "d, 0".
18573 int16_t Imm;
18574 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18575 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18576 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18577 CNType);
18578 break;
18579 }
18580 // Handle 32-bit sext immediate with LIS + Addr mode.
18581 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18582 (!Align || isAligned(*Align, CNImm))) {
18583 int32_t Addr = (int32_t)CNImm;
18584 // Otherwise, break this down into LIS + Disp.
18585 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18586 Base =
18587 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18588 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18589 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18590 break;
18591 }
18592 }
18593 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18594 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18595 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18596 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18597 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18598 } else
18599 Base = N;
18600 break;
18601 }
18602 case PPC::AM_PrefixDForm: {
18603 int64_t Imm34 = 0;
18604 unsigned Opcode = N.getOpcode();
18605 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18606 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18607 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18608 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18609 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18610 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18611 else
18612 Base = N.getOperand(0);
18613 } else if (isIntS34Immediate(N, Imm34)) {
18614 // The address is a 34-bit signed immediate.
18615 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18616 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18617 }
18618 break;
18619 }
18620 case PPC::AM_PCRel: {
18621 // When selecting PC-Relative instructions, "Base" is not utilized as
18622 // we select the address as [PC+imm].
18623 Disp = N;
18624 break;
18625 }
18626 case PPC::AM_None:
18627 break;
18628 default: { // By default, X-Form is always available to be selected.
18629 // When a frame index is not aligned, we also match by XForm.
18630 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18631 Base = FI ? N : N.getOperand(1);
18632 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18633 N.getValueType())
18634 : N.getOperand(0);
18635 break;
18636 }
18637 }
18638 return Mode;
18639}
18640
18642 bool Return,
18643 bool IsVarArg) const {
18644 switch (CC) {
18645 case CallingConv::Cold:
18646 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18647 default:
18648 return CC_PPC64_ELF;
18649 }
18650}
18651
18653 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18654}
18655
18658 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18659 if (shouldInlineQuadwordAtomics() && Size == 128)
18661
18662 switch (AI->getOperation()) {
18666 default:
18668 }
18669
18670 llvm_unreachable("unreachable atomicrmw operation");
18671}
18672
18675 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18676 if (shouldInlineQuadwordAtomics() && Size == 128)
18679}
18680
18681static Intrinsic::ID
18683 switch (BinOp) {
18684 default:
18685 llvm_unreachable("Unexpected AtomicRMW BinOp");
18687 return Intrinsic::ppc_atomicrmw_xchg_i128;
18688 case AtomicRMWInst::Add:
18689 return Intrinsic::ppc_atomicrmw_add_i128;
18690 case AtomicRMWInst::Sub:
18691 return Intrinsic::ppc_atomicrmw_sub_i128;
18692 case AtomicRMWInst::And:
18693 return Intrinsic::ppc_atomicrmw_and_i128;
18694 case AtomicRMWInst::Or:
18695 return Intrinsic::ppc_atomicrmw_or_i128;
18696 case AtomicRMWInst::Xor:
18697 return Intrinsic::ppc_atomicrmw_xor_i128;
18699 return Intrinsic::ppc_atomicrmw_nand_i128;
18700 }
18701}
18702
18704 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18705 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18706 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18707 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18708 Type *ValTy = Incr->getType();
18709 assert(ValTy->getPrimitiveSizeInBits() == 128);
18712 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18713 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18714 Value *IncrHi =
18715 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18716 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18717 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18718 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18719 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18720 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18721 return Builder.CreateOr(
18722 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18723}
18724
18726 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18727 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18728 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18729 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18730 Type *ValTy = CmpVal->getType();
18731 assert(ValTy->getPrimitiveSizeInBits() == 128);
18732 Function *IntCmpXchg =
18733 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18734 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18735 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18736 Value *CmpHi =
18737 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18738 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18739 Value *NewHi =
18740 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18741 emitLeadingFence(Builder, CI, Ord);
18742 Value *LoHi =
18743 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18744 emitTrailingFence(Builder, CI, Ord);
18745 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18746 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18747 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18748 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18749 return Builder.CreateOr(
18750 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18751}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const char LLVMTargetMachineRef TM
pre isel intrinsic Pre ISel Intrinsic Lowering
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
bool isDenormal() const
Definition: APFloat.h:1296
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1737
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1671
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
BinOp getOperation() const
Definition: Instructions.h:845
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:889
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1461
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1709
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:2195
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1767
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1629
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1702
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1635
unsigned arg_size() const
Definition: InstrTypes.h:1652
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:900
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:865
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:680
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:715
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:677
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:338
arg_iterator arg_begin()
Definition: Function.h:813
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:350
size_t arg_size() const
Definition: Function.h:846
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:205
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:213
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:556
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:267
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:174
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:630
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2001
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2005
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2390
const BasicBlock * getParent() const
Definition: Instruction.h:152
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:184
bool isUnordered() const
Definition: Instructions.h:274
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:76
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:287
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:259
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool useSoftFloat() const
Definition: PPCSubtarget.h:174
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:202
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:253
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:271
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:206
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:277
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:289
bool is64BitELFABI() const
Definition: PPCSubtarget.h:218
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:295
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:265
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:727
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:246
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1126
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1122
@ TargetConstantPool
Definition: ISDOpcodes.h:168
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1155
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1241
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:979
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1199
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:939
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1047
@ TargetJumpTable
Definition: ISDOpcodes.h:167
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1221
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:988
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1056
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1237
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1151
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:978
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:424
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:425
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1248
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1041
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1097
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1182
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1208
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1094
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1146
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1070
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1140
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1320
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1205
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:165
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1556
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1472
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1562
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1459
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:96
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1923
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:436
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:252
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)