LLVM 23.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
167 unsigned OpIdx, bool IsByte,
168 const PPCInstrInfo *TII);
169
170// A faster local-[exec|dynamic] TLS access sequence (enabled with the
171// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
172// variables; consistent with the IBM XL compiler, we apply a max size of
173// slightly under 32KB.
175
176// FIXME: Remove this once the bug has been fixed!
178
180 const PPCSubtarget &STI)
181 : TargetLowering(TM, STI), Subtarget(STI) {
182 // Initialize map that relates the PPC addressing modes to the computed flags
183 // of a load/store instruction. The map is used to determine the optimal
184 // addressing mode when selecting load and stores.
185 initializeAddrModeMap();
186 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
187 // arguments are at least 4/8 bytes aligned.
188 bool isPPC64 = Subtarget.isPPC64();
189 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
190 const MVT RegVT = Subtarget.getScalarIntVT();
191
192 // Set up the register classes.
193 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
194 if (!useSoftFloat()) {
195 if (hasSPE()) {
196 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
197 // EFPU2 APU only supports f32
198 if (!Subtarget.hasEFPU2())
199 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
200 } else {
201 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
202 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
203 }
204 }
205
208
209 // PowerPC uses addo_carry,subo_carry to propagate carry.
212
213 // On P10, the default lowering generates better code using the
214 // setbc instruction.
215 if (!Subtarget.hasP10Vector()) {
218 if (isPPC64) {
221 }
222 }
223
224 // Match BITREVERSE to customized fast code sequence in the td file.
227
228 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
230
231 // Custom lower inline assembly to check for special registers.
234
235 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
236 for (MVT VT : MVT::integer_valuetypes()) {
239 }
240
241 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
243
244 if (Subtarget.isISA3_0()) {
245 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
246 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
247 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
248 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
249 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
250 } else {
251 // No extending loads from f16 or HW conversions back and forth.
252 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
254 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
257 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
260 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
261 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
262 }
263
264 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
265
266 // PowerPC has pre-inc load and store's.
277 if (!Subtarget.hasSPE()) {
282 }
283
284 if (Subtarget.useCRBits()) {
286
287 if (isPPC64 || Subtarget.hasFPCVT()) {
292
294 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
296 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
297
302
304 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
306 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
307 } else {
312 }
313
314 // PowerPC does not support direct load/store of condition registers.
317
318 // FIXME: Remove this once the ANDI glue bug is fixed:
319 if (ANDIGlueBug)
321
322 for (MVT VT : MVT::integer_valuetypes()) {
325 setTruncStoreAction(VT, MVT::i1, Expand);
326 }
327
328 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
329 }
330
331 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
332 // PPC (the libcall is not available).
337
338 // We do not currently implement these libm ops for PowerPC.
339 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
341 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
342 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
344 setOperationAction(ISD::FREM, MVT::ppcf128, LibCall);
345
346 // PowerPC has no SREM/UREM instructions unless we are on P9
347 // On P9 we may use a hardware instruction to compute the remainder.
348 // When the result of both the remainder and the division is required it is
349 // more efficient to compute the remainder from the result of the division
350 // rather than use the remainder instruction. The instructions are legalized
351 // directly because the DivRemPairsPass performs the transformation at the IR
352 // level.
353 if (Subtarget.isISA3_0()) {
358 } else {
363 }
364
365 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
374
375 // Handle constrained floating-point operations of scalar.
376 // TODO: Handle SPE specific operation.
382
387
388 if (!Subtarget.hasSPE()) {
391 }
392
393 if (Subtarget.hasVSX()) {
396 }
397
398 if (Subtarget.hasFSQRT()) {
401 }
402
403 if (Subtarget.hasFPRND()) {
408
413 }
414
415 // We don't support sin/cos/sqrt/fmod/pow
426
427 // MASS transformation for LLVM intrinsics with replicating fast-math flag
428 // to be consistent to PPCGenScalarMASSEntries pass
429 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
442 }
443
444 if (Subtarget.hasSPE()) {
447 } else {
448 setOperationAction(ISD::FMA , MVT::f64, Legal);
449 setOperationAction(ISD::FMA , MVT::f32, Legal);
452 }
453
454 if (Subtarget.hasSPE())
455 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
456
457 // If we're enabling GP optimizations, use hardware square root
458 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
460
461 if (!Subtarget.hasFSQRT() &&
462 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
464
465 if (Subtarget.hasFCPSGN()) {
468 } else {
471 }
472
473 if (Subtarget.hasFPRND()) {
478
483 }
484
485 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
486 // instruction xxbrd to speed up scalar BSWAP64.
487 if (Subtarget.isISA3_1()) {
490 } else {
493 ((Subtarget.hasP8Vector()) && isPPC64) ? Custom
494 : Expand);
495 }
496
497 // CTPOP or CTTZ were introduced in P8/P9 respectively
498 if (Subtarget.isISA3_0()) {
499 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
500 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
501 } else {
502 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
503 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
504 }
505
506 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
509 } else {
512 }
513
514 // PowerPC does not have ROTR
517
518 if (!Subtarget.useCRBits()) {
519 // PowerPC does not have Select
524 }
525
526 // PowerPC wants to turn select_cc of FP into fsel when possible.
529
530 // PowerPC wants to optimize integer setcc a bit
531 if (!Subtarget.useCRBits())
533
534 if (Subtarget.hasFPU()) {
538
542 }
543
544 // PowerPC does not have BRCOND which requires SetCC
545 if (!Subtarget.useCRBits())
547
549
550 if (Subtarget.hasSPE()) {
551 // SPE has built-in conversions
558
559 // SPE supports signaling compare of f32/f64.
560 // But it doesn't comply IEEE-754 rules for comparing
561 // special values like NaNs, Infs.
570 } else {
571 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
574
575 // PowerPC does not have [U|S]INT_TO_FP
580 }
581
582 if (Subtarget.hasDirectMove() && isPPC64) {
587
596 } else {
601 }
602
603 // We cannot sextinreg(i1). Expand to shifts.
605
606 // Custom handling for PowerPC ucmp instruction
607 if (isPPC64) {
608 // UCMP involves using carries, which only works in 64-bit
611 } else {
614 }
615
616 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
617 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
618 // support continuation, user-level threading, and etc.. As a result, no
619 // other SjLj exception interfaces are implemented and please don't build
620 // your own exception handling based on them.
621 // LLVM/Clang supports zero-cost DWARF exception handling.
624
625 // We want to legalize GlobalAddress and ConstantPool nodes into the
626 // appropriate instructions to materialize the address.
637
638 // TRAP is legal.
639 setOperationAction(ISD::TRAP, MVT::Other, Legal);
640
641 // TRAMPOLINE is custom lowered.
644
645 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
647
648 if (Subtarget.is64BitELFABI()) {
649 // VAARG always uses double-word chunks, so promote anything smaller.
651 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
653 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
655 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
657 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
659 } else if (Subtarget.is32BitELFABI()) {
660 // VAARG is custom lowered with the 32-bit SVR4 ABI.
663 } else
665
666 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
667 if (Subtarget.is32BitELFABI())
669 else
671
672 // Use the default implementation.
673 setOperationAction(ISD::VAEND , MVT::Other, Expand);
682
683 if (Subtarget.isISA3_0() && isPPC64) {
684 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
685 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
686 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
687 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
688 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
689 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
690 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
691 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
692 }
693
694 // We want to custom lower some of our intrinsics.
700
701 // To handle counter-based loop conditions.
704
709
710 // Comparisons that require checking two conditions.
711 if (Subtarget.hasSPE()) {
716 }
729
732
733 if (Subtarget.has64BitSupport()) {
734 // They also have instructions for converting between i64 and fp.
743 // This is just the low 32 bits of a (signed) fp->i64 conversion.
744 // We cannot do this with Promote because i64 is not a legal type.
747
748 if (Subtarget.hasLFIWAX() || isPPC64) {
751 }
752 } else {
753 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
754 if (Subtarget.hasSPE()) {
757 } else {
760 }
761 }
762
763 // With the instructions enabled under FPCVT, we can do everything.
764 if (Subtarget.hasFPCVT()) {
765 if (Subtarget.has64BitSupport()) {
774 }
775
784 }
785
786 if (Subtarget.use64BitRegs()) {
787 // 64-bit PowerPC implementations can support i64 types directly
788 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
789 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
791 // 64-bit PowerPC wants to expand i128 shifts itself.
795 } else {
796 // 32-bit PowerPC wants to expand i64 shifts itself.
800 }
801
802 // PowerPC has better expansions for funnel shifts than the generic
803 // TargetLowering::expandFunnelShift.
804 if (Subtarget.has64BitSupport()) {
807 }
810
811 if (Subtarget.hasVSX()) {
822 }
823
824 if (Subtarget.hasAltivec()) {
825 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
832 }
833 // First set operation action for all vector types to expand. Then we
834 // will selectively turn on ones that can be effectively codegen'd.
836 // add/sub are legal for all supported vector VT's.
839
840 // For v2i64, these are only valid with P8Vector. This is corrected after
841 // the loop.
842 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
847 }
848 else {
853 }
854
855 if (Subtarget.hasVSX()) {
861 }
862
863 // Vector instructions introduced in P8
864 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
867 }
868 else {
871 }
872
873 // Vector instructions introduced in P9
874 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
876 else
878
879 // We promote all shuffles to v16i8.
881 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
882
883 // We promote all non-typed operations to v4i32.
885 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
887 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
889 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
891 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
893 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
896 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
898 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
899
900 // No other operations are legal.
939
940 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
941 setTruncStoreAction(VT, InnerVT, Expand);
944 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
945 }
946 }
948 if (!Subtarget.hasP8Vector()) {
949 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
950 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
951 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
952 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
953 }
954
955 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
956 // with merges, splats, etc.
958
959 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
960 // are cheap, so handle them before they get expanded to scalar.
966
967 setOperationAction(ISD::AND , MVT::v4i32, Legal);
968 setOperationAction(ISD::OR , MVT::v4i32, Legal);
969 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
970 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
972 Subtarget.useCRBits() ? Legal : Expand);
973 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
983 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
986
987 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
988 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
989 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
990 if (Subtarget.hasAltivec())
991 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
993 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
994 if (Subtarget.hasP8Altivec())
995 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
996
997 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
998 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
999 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
1000 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
1001
1002 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
1003 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
1004
1005 if (Subtarget.hasVSX()) {
1006 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
1007 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1009 }
1010
1011 if (Subtarget.hasP8Altivec())
1012 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1013 else
1014 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1015
1016 if (Subtarget.isISA3_1()) {
1017 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1018 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
1019 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
1020 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
1021 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
1022 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
1023 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
1024 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
1025 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
1026 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
1027 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1028 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1029 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1030 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1031 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1032 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1033 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1034 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1035 }
1036
1037 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1038 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1039
1042 // LE is P8+/64-bit so direct moves are supported and these operations
1043 // are legal. The custom transformation requires 64-bit since we need a
1044 // pair of stores that will cover a 128-bit load for P10.
1045 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1049 }
1050
1055
1056 // Altivec does not contain unordered floating-point compare instructions
1057 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1058 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1059 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1060 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1061
1062 if (Subtarget.hasVSX()) {
1065 if (Subtarget.hasP8Vector()) {
1068 }
1069 if (Subtarget.hasDirectMove() && isPPC64) {
1078 }
1080
1081 // The nearbyint variants are not allowed to raise the inexact exception
1082 // so we can only code-gen them with fpexcept.ignore.
1087
1088 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1089 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1090 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1091 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1092 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1095
1096 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1097 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1100
1101 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1102 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1103
1104 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1105 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1106
1107 // Share the Altivec comparison restrictions.
1108 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1109 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1110 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1111 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1112
1113 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1114 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1115
1117
1118 if (Subtarget.hasP8Vector())
1119 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1120
1121 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1122
1123 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1124 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1125 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1126
1127 if (Subtarget.hasP8Altivec()) {
1128 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1129 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1130 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1131
1132 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1133 // SRL, but not for SRA because of the instructions available:
1134 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1135 // doing
1136 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1137 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1138 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1139
1140 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1141 }
1142 else {
1143 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1144 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1145 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1146
1147 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1148
1149 // VSX v2i64 only supports non-arithmetic operations.
1150 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1151 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1152 }
1153
1154 if (Subtarget.isISA3_1())
1155 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1156 else
1157 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1158
1159 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1160 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1162 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1163
1165
1174
1175 // Custom handling for partial vectors of integers converted to
1176 // floating point. We already have optimal handling for v2i32 through
1177 // the DAG combine, so those aren't necessary.
1194
1195 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1196 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1197 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1198 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1201
1204
1205 // Handle constrained floating-point operations of vector.
1206 // The predictor is `hasVSX` because altivec instruction has
1207 // no exception but VSX vector instruction has.
1221
1235
1236 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1237 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1238
1239 for (MVT FPT : MVT::fp_valuetypes())
1240 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1241
1242 // Expand the SELECT to SELECT_CC
1244
1245 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1246 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1247
1248 // No implementation for these ops for PowerPC.
1250 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1251 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1252 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1255 }
1256
1257 if (Subtarget.hasP8Altivec()) {
1258 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1259 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1260 }
1261
1262 if (Subtarget.hasP9Vector()) {
1265
1266 // Test data class instructions store results in CR bits.
1267 if (Subtarget.useCRBits()) {
1272 }
1273
1274 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1275 // SRL, but not for SRA because of the instructions available:
1276 // VS{RL} and VS{RL}O.
1277 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1278 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1279 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1280
1281 setOperationAction(ISD::FADD, MVT::f128, Legal);
1282 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1283 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1284 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1286
1287 setOperationAction(ISD::FMA, MVT::f128, Legal);
1294
1296 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1298 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1301
1305
1306 // Handle constrained floating-point operations of fp128
1323 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1324 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1325 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1326 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1327 } else if (Subtarget.hasVSX()) {
1330
1331 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1332 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1333
1334 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1335 // fp_to_uint and int_to_fp.
1338
1339 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1340 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1341 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1342 setOperationAction(ISD::FABS, MVT::f128, Expand);
1344 setOperationAction(ISD::FMA, MVT::f128, Expand);
1346
1347 // Expand the fp_extend if the target type is fp128.
1350
1351 // Expand the fp_round if the source type is fp128.
1352 for (MVT VT : {MVT::f32, MVT::f64}) {
1355 }
1356
1361
1362 // Lower following f128 select_cc pattern:
1363 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1365
1366 // We need to handle f128 SELECT_CC with integer result type.
1368 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1369 }
1370
1371 if (Subtarget.hasP9Altivec()) {
1372 if (Subtarget.isISA3_1()) {
1377 } else {
1380 }
1388
1389 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1390 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1391 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1392 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1393 }
1394
1395 if (Subtarget.hasP10Vector()) {
1397 }
1398
1401 Legal);
1403 Legal);
1405 Legal);
1407 Legal);
1408 }
1409
1410 if (Subtarget.pairedVectorMemops()) {
1411 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1412 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1413 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1414 }
1415 if (Subtarget.hasMMA()) {
1416 if (Subtarget.isISAFuture()) {
1417 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1418 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1419 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1420 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1421 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1422 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1423 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1424 } else {
1425 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1426 }
1427 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1428 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1430 }
1431
1432 if (Subtarget.has64BitSupport())
1434
1435 if (Subtarget.isISA3_1())
1436 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1437
1438 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1439
1440 if (!isPPC64) {
1443 }
1444
1449 }
1450
1452
1453 if (Subtarget.hasAltivec()) {
1454 // Altivec instructions set fields to all zeros or all ones.
1456 }
1457
1460 else if (isPPC64)
1462 else
1464
1465 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1466
1467 // We have target-specific dag combine patterns for the following nodes:
1471 if (Subtarget.hasFPCVT())
1474 if (Subtarget.useCRBits())
1478
1480
1482
1483 if (Subtarget.useCRBits()) {
1485 }
1486
1487 if (Subtarget.hasP8Vector())
1489
1490 // With 32 condition bits, we don't need to sink (and duplicate) compares
1491 // aggressively in CodeGenPrep.
1492 if (Subtarget.useCRBits()) {
1494 }
1495
1496 // TODO: The default entry number is set to 64. This stops most jump table
1497 // generation on PPC. But it is good for current PPC HWs because the indirect
1498 // branch instruction mtctr to the jump table may lead to bad branch predict.
1499 // Re-evaluate this value on future HWs that can do better with mtctr.
1501
1502 // The default minimum of largest number in a BitTest cluster is 3.
1504
1506 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1507
1508 auto CPUDirective = Subtarget.getCPUDirective();
1509 switch (CPUDirective) {
1510 default: break;
1511 case PPC::DIR_970:
1512 case PPC::DIR_A2:
1513 case PPC::DIR_E500:
1514 case PPC::DIR_E500mc:
1515 case PPC::DIR_E5500:
1516 case PPC::DIR_PWR4:
1517 case PPC::DIR_PWR5:
1518 case PPC::DIR_PWR5X:
1519 case PPC::DIR_PWR6:
1520 case PPC::DIR_PWR6X:
1521 case PPC::DIR_PWR7:
1522 case PPC::DIR_PWR8:
1523 case PPC::DIR_PWR9:
1524 case PPC::DIR_PWR10:
1525 case PPC::DIR_PWR11:
1529 break;
1530 }
1531
1532 if (Subtarget.enableMachineScheduler())
1534 else
1536
1538
1539 // The Freescale cores do better with aggressive inlining of memcpy and
1540 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1541 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1542 MaxStoresPerMemset = 32;
1544 MaxStoresPerMemcpy = 32;
1548 } else if (CPUDirective == PPC::DIR_A2) {
1549 // The A2 also benefits from (very) aggressive inlining of memcpy and
1550 // friends. The overhead of a the function call, even when warm, can be
1551 // over one hundred cycles.
1552 MaxStoresPerMemset = 128;
1553 MaxStoresPerMemcpy = 128;
1554 MaxStoresPerMemmove = 128;
1555 MaxLoadsPerMemcmp = 128;
1556 } else {
1559 }
1560
1561 // Enable generation of STXVP instructions by default for mcpu=future.
1562 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1563 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1564 DisableAutoPairedVecSt = false;
1565
1566 IsStrictFPEnabled = true;
1567
1568 // Let the subtarget (CPU) decide if a predictable select is more expensive
1569 // than the corresponding branch. This information is used in CGP to decide
1570 // when to convert selects into branches.
1571 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1572
1574}
1575
1576// *********************************** NOTE ************************************
1577// For selecting load and store instructions, the addressing modes are defined
1578// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1579// patterns to match the load the store instructions.
1580//
1581// The TD definitions for the addressing modes correspond to their respective
1582// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1583// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1584// address mode flags of a particular node. Afterwards, the computed address
1585// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1586// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1587// accordingly, based on the preferred addressing mode.
1588//
1589// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1590// MemOpFlags contains all the possible flags that can be used to compute the
1591// optimal addressing mode for load and store instructions.
1592// AddrMode contains all the possible load and store addressing modes available
1593// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1594//
1595// When adding new load and store instructions, it is possible that new address
1596// flags may need to be added into MemOpFlags, and a new addressing mode will
1597// need to be added to AddrMode. An entry of the new addressing mode (consisting
1598// of the minimal and main distinguishing address flags for the new load/store
1599// instructions) will need to be added into initializeAddrModeMap() below.
1600// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1601// need to be updated to account for selecting the optimal addressing mode.
1602// *****************************************************************************
1603/// Initialize the map that relates the different addressing modes of the load
1604/// and store instructions to a set of flags. This ensures the load/store
1605/// instruction is correctly matched during instruction selection.
1606void PPCTargetLowering::initializeAddrModeMap() {
1607 AddrModesMap[PPC::AM_DForm] = {
1608 // LWZ, STW
1613 // LBZ, LHZ, STB, STH
1618 // LHA
1623 // LFS, LFD, STFS, STFD
1628 };
1629 AddrModesMap[PPC::AM_DSForm] = {
1630 // LWA
1634 // LD, STD
1638 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1642 };
1643 AddrModesMap[PPC::AM_DQForm] = {
1644 // LXV, STXV
1648 };
1649 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1651 // TODO: Add mapping for quadword load/store.
1652}
1653
1654/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1655/// the desired ByVal argument alignment.
1656static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1657 if (MaxAlign == MaxMaxAlign)
1658 return;
1659 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1660 if (MaxMaxAlign >= 32 &&
1661 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1662 MaxAlign = Align(32);
1663 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1664 MaxAlign < 16)
1665 MaxAlign = Align(16);
1666 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1667 Align EltAlign;
1668 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1669 if (EltAlign > MaxAlign)
1670 MaxAlign = EltAlign;
1671 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1672 for (auto *EltTy : STy->elements()) {
1673 Align EltAlign;
1674 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1675 if (EltAlign > MaxAlign)
1676 MaxAlign = EltAlign;
1677 if (MaxAlign == MaxMaxAlign)
1678 break;
1679 }
1680 }
1681}
1682
1683/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1684/// function arguments in the caller parameter area.
1686 const DataLayout &DL) const {
1687 // 16byte and wider vectors are passed on 16byte boundary.
1688 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1689 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1690 if (Subtarget.hasAltivec())
1691 getMaxByValAlign(Ty, Alignment, Align(16));
1692 return Alignment;
1693}
1694
1696 return Subtarget.useSoftFloat();
1697}
1698
1700 return Subtarget.hasSPE();
1701}
1702
1704 return VT.isScalarInteger();
1705}
1706
1708 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1709 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1710 return false;
1711
1712 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1713 if (VTy->getScalarType()->isIntegerTy()) {
1714 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1715 if (ElemSizeInBits == 32) {
1716 Index = Subtarget.isLittleEndian() ? 2 : 1;
1717 return true;
1718 }
1719 if (ElemSizeInBits == 64) {
1720 Index = Subtarget.isLittleEndian() ? 1 : 0;
1721 return true;
1722 }
1723 }
1724 }
1725 return false;
1726}
1727
1729 EVT VT) const {
1730 if (!VT.isVector())
1731 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1732
1734}
1735
1737 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1738 return true;
1739}
1740
1741//===----------------------------------------------------------------------===//
1742// Node matching predicates, for use by the tblgen matching code.
1743//===----------------------------------------------------------------------===//
1744
1745/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1748 return CFP->getValueAPF().isZero();
1749 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1750 // Maybe this has already been legalized into the constant pool?
1751 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1752 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1753 return CFP->getValueAPF().isZero();
1754 }
1755 return false;
1756}
1757
1758/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1759/// true if Op is undef or if it matches the specified value.
1760static bool isConstantOrUndef(int Op, int Val) {
1761 return Op < 0 || Op == Val;
1762}
1763
1764/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1765/// VPKUHUM instruction.
1766/// The ShuffleKind distinguishes between big-endian operations with
1767/// two different inputs (0), either-endian operations with two identical
1768/// inputs (1), and little-endian operations with two different inputs (2).
1769/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1771 SelectionDAG &DAG) {
1772 bool IsLE = DAG.getDataLayout().isLittleEndian();
1773 if (ShuffleKind == 0) {
1774 if (IsLE)
1775 return false;
1776 for (unsigned i = 0; i != 16; ++i)
1777 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1778 return false;
1779 } else if (ShuffleKind == 2) {
1780 if (!IsLE)
1781 return false;
1782 for (unsigned i = 0; i != 16; ++i)
1783 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1784 return false;
1785 } else if (ShuffleKind == 1) {
1786 unsigned j = IsLE ? 0 : 1;
1787 for (unsigned i = 0; i != 8; ++i)
1788 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1789 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1790 return false;
1791 }
1792 return true;
1793}
1794
1795/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1796/// VPKUWUM instruction.
1797/// The ShuffleKind distinguishes between big-endian operations with
1798/// two different inputs (0), either-endian operations with two identical
1799/// inputs (1), and little-endian operations with two different inputs (2).
1800/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1802 SelectionDAG &DAG) {
1803 bool IsLE = DAG.getDataLayout().isLittleEndian();
1804 if (ShuffleKind == 0) {
1805 if (IsLE)
1806 return false;
1807 for (unsigned i = 0; i != 16; i += 2)
1808 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1809 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1810 return false;
1811 } else if (ShuffleKind == 2) {
1812 if (!IsLE)
1813 return false;
1814 for (unsigned i = 0; i != 16; i += 2)
1815 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1816 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1817 return false;
1818 } else if (ShuffleKind == 1) {
1819 unsigned j = IsLE ? 0 : 2;
1820 for (unsigned i = 0; i != 8; i += 2)
1821 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1822 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1823 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1824 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1825 return false;
1826 }
1827 return true;
1828}
1829
1830/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1831/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1832/// current subtarget.
1833///
1834/// The ShuffleKind distinguishes between big-endian operations with
1835/// two different inputs (0), either-endian operations with two identical
1836/// inputs (1), and little-endian operations with two different inputs (2).
1837/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1839 SelectionDAG &DAG) {
1840 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1841 if (!Subtarget.hasP8Vector())
1842 return false;
1843
1844 bool IsLE = DAG.getDataLayout().isLittleEndian();
1845 if (ShuffleKind == 0) {
1846 if (IsLE)
1847 return false;
1848 for (unsigned i = 0; i != 16; i += 4)
1849 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1850 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1851 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1852 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1853 return false;
1854 } else if (ShuffleKind == 2) {
1855 if (!IsLE)
1856 return false;
1857 for (unsigned i = 0; i != 16; i += 4)
1858 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1859 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1860 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1861 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1862 return false;
1863 } else if (ShuffleKind == 1) {
1864 unsigned j = IsLE ? 0 : 4;
1865 for (unsigned i = 0; i != 8; i += 4)
1866 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1867 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1868 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1869 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1870 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1871 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1872 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1873 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1874 return false;
1875 }
1876 return true;
1877}
1878
1879/// isVMerge - Common function, used to match vmrg* shuffles.
1880///
1881static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1882 unsigned LHSStart, unsigned RHSStart) {
1883 if (N->getValueType(0) != MVT::v16i8)
1884 return false;
1885 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1886 "Unsupported merge size!");
1887
1888 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1889 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1890 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1891 LHSStart+j+i*UnitSize) ||
1892 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1893 RHSStart+j+i*UnitSize))
1894 return false;
1895 }
1896 return true;
1897}
1898
1899/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1900/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1901/// The ShuffleKind distinguishes between big-endian merges with two
1902/// different inputs (0), either-endian merges with two identical inputs (1),
1903/// and little-endian merges with two different inputs (2). For the latter,
1904/// the input operands are swapped (see PPCInstrAltivec.td).
1906 unsigned ShuffleKind, SelectionDAG &DAG) {
1907 if (DAG.getDataLayout().isLittleEndian()) {
1908 if (ShuffleKind == 1) // unary
1909 return isVMerge(N, UnitSize, 0, 0);
1910 else if (ShuffleKind == 2) // swapped
1911 return isVMerge(N, UnitSize, 0, 16);
1912 else
1913 return false;
1914 } else {
1915 if (ShuffleKind == 1) // unary
1916 return isVMerge(N, UnitSize, 8, 8);
1917 else if (ShuffleKind == 0) // normal
1918 return isVMerge(N, UnitSize, 8, 24);
1919 else
1920 return false;
1921 }
1922}
1923
1924/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1925/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1926/// The ShuffleKind distinguishes between big-endian merges with two
1927/// different inputs (0), either-endian merges with two identical inputs (1),
1928/// and little-endian merges with two different inputs (2). For the latter,
1929/// the input operands are swapped (see PPCInstrAltivec.td).
1931 unsigned ShuffleKind, SelectionDAG &DAG) {
1932 if (DAG.getDataLayout().isLittleEndian()) {
1933 if (ShuffleKind == 1) // unary
1934 return isVMerge(N, UnitSize, 8, 8);
1935 else if (ShuffleKind == 2) // swapped
1936 return isVMerge(N, UnitSize, 8, 24);
1937 else
1938 return false;
1939 } else {
1940 if (ShuffleKind == 1) // unary
1941 return isVMerge(N, UnitSize, 0, 0);
1942 else if (ShuffleKind == 0) // normal
1943 return isVMerge(N, UnitSize, 0, 16);
1944 else
1945 return false;
1946 }
1947}
1948
1949/**
1950 * Common function used to match vmrgew and vmrgow shuffles
1951 *
1952 * The indexOffset determines whether to look for even or odd words in
1953 * the shuffle mask. This is based on the of the endianness of the target
1954 * machine.
1955 * - Little Endian:
1956 * - Use offset of 0 to check for odd elements
1957 * - Use offset of 4 to check for even elements
1958 * - Big Endian:
1959 * - Use offset of 0 to check for even elements
1960 * - Use offset of 4 to check for odd elements
1961 * A detailed description of the vector element ordering for little endian and
1962 * big endian can be found at
1963 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1964 * Targeting your applications - what little endian and big endian IBM XL C/C++
1965 * compiler differences mean to you
1966 *
1967 * The mask to the shuffle vector instruction specifies the indices of the
1968 * elements from the two input vectors to place in the result. The elements are
1969 * numbered in array-access order, starting with the first vector. These vectors
1970 * are always of type v16i8, thus each vector will contain 16 elements of size
1971 * 8. More info on the shuffle vector can be found in the
1972 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1973 * Language Reference.
1974 *
1975 * The RHSStartValue indicates whether the same input vectors are used (unary)
1976 * or two different input vectors are used, based on the following:
1977 * - If the instruction uses the same vector for both inputs, the range of the
1978 * indices will be 0 to 15. In this case, the RHSStart value passed should
1979 * be 0.
1980 * - If the instruction has two different vectors then the range of the
1981 * indices will be 0 to 31. In this case, the RHSStart value passed should
1982 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1983 * to 31 specify elements in the second vector).
1984 *
1985 * \param[in] N The shuffle vector SD Node to analyze
1986 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1987 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1988 * vector to the shuffle_vector instruction
1989 * \return true iff this shuffle vector represents an even or odd word merge
1990 */
1991static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1992 unsigned RHSStartValue) {
1993 if (N->getValueType(0) != MVT::v16i8)
1994 return false;
1995
1996 for (unsigned i = 0; i < 2; ++i)
1997 for (unsigned j = 0; j < 4; ++j)
1998 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1999 i*RHSStartValue+j+IndexOffset) ||
2000 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2001 i*RHSStartValue+j+IndexOffset+8))
2002 return false;
2003 return true;
2004}
2005
2006/**
2007 * Determine if the specified shuffle mask is suitable for the vmrgew or
2008 * vmrgow instructions.
2009 *
2010 * \param[in] N The shuffle vector SD Node to analyze
2011 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2012 * \param[in] ShuffleKind Identify the type of merge:
2013 * - 0 = big-endian merge with two different inputs;
2014 * - 1 = either-endian merge with two identical inputs;
2015 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2016 * little-endian merges).
2017 * \param[in] DAG The current SelectionDAG
2018 * \return true iff this shuffle mask
2019 */
2021 unsigned ShuffleKind, SelectionDAG &DAG) {
2022 if (DAG.getDataLayout().isLittleEndian()) {
2023 unsigned indexOffset = CheckEven ? 4 : 0;
2024 if (ShuffleKind == 1) // Unary
2025 return isVMerge(N, indexOffset, 0);
2026 else if (ShuffleKind == 2) // swapped
2027 return isVMerge(N, indexOffset, 16);
2028 else
2029 return false;
2030 }
2031 else {
2032 unsigned indexOffset = CheckEven ? 0 : 4;
2033 if (ShuffleKind == 1) // Unary
2034 return isVMerge(N, indexOffset, 0);
2035 else if (ShuffleKind == 0) // Normal
2036 return isVMerge(N, indexOffset, 16);
2037 else
2038 return false;
2039 }
2040 return false;
2041}
2042
2043/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2044/// amount, otherwise return -1.
2045/// The ShuffleKind distinguishes between big-endian operations with two
2046/// different inputs (0), either-endian operations with two identical inputs
2047/// (1), and little-endian operations with two different inputs (2). For the
2048/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2049int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2050 SelectionDAG &DAG) {
2051 if (N->getValueType(0) != MVT::v16i8)
2052 return -1;
2053
2055
2056 // Find the first non-undef value in the shuffle mask.
2057 unsigned i;
2058 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2059 /*search*/;
2060
2061 if (i == 16) return -1; // all undef.
2062
2063 // Otherwise, check to see if the rest of the elements are consecutively
2064 // numbered from this value.
2065 unsigned ShiftAmt = SVOp->getMaskElt(i);
2066 if (ShiftAmt < i) return -1;
2067
2068 ShiftAmt -= i;
2069 bool isLE = DAG.getDataLayout().isLittleEndian();
2070
2071 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2072 // Check the rest of the elements to see if they are consecutive.
2073 for (++i; i != 16; ++i)
2074 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2075 return -1;
2076 } else if (ShuffleKind == 1) {
2077 // Check the rest of the elements to see if they are consecutive.
2078 for (++i; i != 16; ++i)
2079 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2080 return -1;
2081 } else
2082 return -1;
2083
2084 if (isLE)
2085 ShiftAmt = 16 - ShiftAmt;
2086
2087 return ShiftAmt;
2088}
2089
2090/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2091/// specifies a splat of a single element that is suitable for input to
2092/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2094 EVT VT = N->getValueType(0);
2095 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2096 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2097
2098 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2099 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2100
2101 // The consecutive indices need to specify an element, not part of two
2102 // different elements. So abandon ship early if this isn't the case.
2103 if (N->getMaskElt(0) % EltSize != 0)
2104 return false;
2105
2106 // This is a splat operation if each element of the permute is the same, and
2107 // if the value doesn't reference the second vector.
2108 unsigned ElementBase = N->getMaskElt(0);
2109
2110 // FIXME: Handle UNDEF elements too!
2111 if (ElementBase >= 16)
2112 return false;
2113
2114 // Check that the indices are consecutive, in the case of a multi-byte element
2115 // splatted with a v16i8 mask.
2116 for (unsigned i = 1; i != EltSize; ++i)
2117 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2118 return false;
2119
2120 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2121 // An UNDEF element is a sequence of UNDEF bytes.
2122 if (N->getMaskElt(i) < 0) {
2123 for (unsigned j = 1; j != EltSize; ++j)
2124 if (N->getMaskElt(i + j) >= 0)
2125 return false;
2126 } else
2127 for (unsigned j = 0; j != EltSize; ++j)
2128 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2129 return false;
2130 }
2131 return true;
2132}
2133
2134/// Check that the mask is shuffling N byte elements. Within each N byte
2135/// element of the mask, the indices could be either in increasing or
2136/// decreasing order as long as they are consecutive.
2137/// \param[in] N the shuffle vector SD Node to analyze
2138/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2139/// Word/DoubleWord/QuadWord).
2140/// \param[in] StepLen the delta indices number among the N byte element, if
2141/// the mask is in increasing/decreasing order then it is 1/-1.
2142/// \return true iff the mask is shuffling N byte elements.
2143static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2144 int StepLen) {
2145 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2146 "Unexpected element width.");
2147 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2148
2149 unsigned NumOfElem = 16 / Width;
2150 unsigned MaskVal[16]; // Width is never greater than 16
2151 for (unsigned i = 0; i < NumOfElem; ++i) {
2152 MaskVal[0] = N->getMaskElt(i * Width);
2153 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2154 return false;
2155 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2156 return false;
2157 }
2158
2159 for (unsigned int j = 1; j < Width; ++j) {
2160 MaskVal[j] = N->getMaskElt(i * Width + j);
2161 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2162 return false;
2163 }
2164 }
2165 }
2166
2167 return true;
2168}
2169
2170bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2171 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2172 if (!isNByteElemShuffleMask(N, 4, 1))
2173 return false;
2174
2175 // Now we look at mask elements 0,4,8,12
2176 unsigned M0 = N->getMaskElt(0) / 4;
2177 unsigned M1 = N->getMaskElt(4) / 4;
2178 unsigned M2 = N->getMaskElt(8) / 4;
2179 unsigned M3 = N->getMaskElt(12) / 4;
2180 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2181 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2182
2183 // Below, let H and L be arbitrary elements of the shuffle mask
2184 // where H is in the range [4,7] and L is in the range [0,3].
2185 // H, 1, 2, 3 or L, 5, 6, 7
2186 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2187 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2188 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2189 InsertAtByte = IsLE ? 12 : 0;
2190 Swap = M0 < 4;
2191 return true;
2192 }
2193 // 0, H, 2, 3 or 4, L, 6, 7
2194 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2195 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2196 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2197 InsertAtByte = IsLE ? 8 : 4;
2198 Swap = M1 < 4;
2199 return true;
2200 }
2201 // 0, 1, H, 3 or 4, 5, L, 7
2202 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2203 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2204 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2205 InsertAtByte = IsLE ? 4 : 8;
2206 Swap = M2 < 4;
2207 return true;
2208 }
2209 // 0, 1, 2, H or 4, 5, 6, L
2210 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2211 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2212 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2213 InsertAtByte = IsLE ? 0 : 12;
2214 Swap = M3 < 4;
2215 return true;
2216 }
2217
2218 // If both vector operands for the shuffle are the same vector, the mask will
2219 // contain only elements from the first one and the second one will be undef.
2220 if (N->getOperand(1).isUndef()) {
2221 ShiftElts = 0;
2222 Swap = true;
2223 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2224 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2225 InsertAtByte = IsLE ? 12 : 0;
2226 return true;
2227 }
2228 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2229 InsertAtByte = IsLE ? 8 : 4;
2230 return true;
2231 }
2232 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2233 InsertAtByte = IsLE ? 4 : 8;
2234 return true;
2235 }
2236 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2237 InsertAtByte = IsLE ? 0 : 12;
2238 return true;
2239 }
2240 }
2241
2242 return false;
2243}
2244
2246 bool &Swap, bool IsLE) {
2247 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2248 // Ensure each byte index of the word is consecutive.
2249 if (!isNByteElemShuffleMask(N, 4, 1))
2250 return false;
2251
2252 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2253 unsigned M0 = N->getMaskElt(0) / 4;
2254 unsigned M1 = N->getMaskElt(4) / 4;
2255 unsigned M2 = N->getMaskElt(8) / 4;
2256 unsigned M3 = N->getMaskElt(12) / 4;
2257
2258 // If both vector operands for the shuffle are the same vector, the mask will
2259 // contain only elements from the first one and the second one will be undef.
2260 if (N->getOperand(1).isUndef()) {
2261 assert(M0 < 4 && "Indexing into an undef vector?");
2262 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2263 return false;
2264
2265 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2266 Swap = false;
2267 return true;
2268 }
2269
2270 // Ensure each word index of the ShuffleVector Mask is consecutive.
2271 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2272 return false;
2273
2274 if (IsLE) {
2275 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2276 // Input vectors don't need to be swapped if the leading element
2277 // of the result is one of the 3 left elements of the second vector
2278 // (or if there is no shift to be done at all).
2279 Swap = false;
2280 ShiftElts = (8 - M0) % 8;
2281 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2282 // Input vectors need to be swapped if the leading element
2283 // of the result is one of the 3 left elements of the first vector
2284 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2285 Swap = true;
2286 ShiftElts = (4 - M0) % 4;
2287 }
2288
2289 return true;
2290 } else { // BE
2291 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2292 // Input vectors don't need to be swapped if the leading element
2293 // of the result is one of the 4 elements of the first vector.
2294 Swap = false;
2295 ShiftElts = M0;
2296 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2297 // Input vectors need to be swapped if the leading element
2298 // of the result is one of the 4 elements of the right vector.
2299 Swap = true;
2300 ShiftElts = M0 - 4;
2301 }
2302
2303 return true;
2304 }
2305}
2306
2308 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2309
2310 if (!isNByteElemShuffleMask(N, Width, -1))
2311 return false;
2312
2313 for (int i = 0; i < 16; i += Width)
2314 if (N->getMaskElt(i) != i + Width - 1)
2315 return false;
2316
2317 return true;
2318}
2319
2323
2327
2331
2335
2336/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2337/// if the inputs to the instruction should be swapped and set \p DM to the
2338/// value for the immediate.
2339/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2340/// AND element 0 of the result comes from the first input (LE) or second input
2341/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2342/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2343/// mask.
2345 bool &Swap, bool IsLE) {
2346 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2347
2348 // Ensure each byte index of the double word is consecutive.
2349 if (!isNByteElemShuffleMask(N, 8, 1))
2350 return false;
2351
2352 unsigned M0 = N->getMaskElt(0) / 8;
2353 unsigned M1 = N->getMaskElt(8) / 8;
2354 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2355
2356 // If both vector operands for the shuffle are the same vector, the mask will
2357 // contain only elements from the first one and the second one will be undef.
2358 if (N->getOperand(1).isUndef()) {
2359 if ((M0 | M1) < 2) {
2360 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2361 Swap = false;
2362 return true;
2363 } else
2364 return false;
2365 }
2366
2367 if (IsLE) {
2368 if (M0 > 1 && M1 < 2) {
2369 Swap = false;
2370 } else if (M0 < 2 && M1 > 1) {
2371 M0 = (M0 + 2) % 4;
2372 M1 = (M1 + 2) % 4;
2373 Swap = true;
2374 } else
2375 return false;
2376
2377 // Note: if control flow comes here that means Swap is already set above
2378 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2379 return true;
2380 } else { // BE
2381 if (M0 < 2 && M1 > 1) {
2382 Swap = false;
2383 } else if (M0 > 1 && M1 < 2) {
2384 M0 = (M0 + 2) % 4;
2385 M1 = (M1 + 2) % 4;
2386 Swap = true;
2387 } else
2388 return false;
2389
2390 // Note: if control flow comes here that means Swap is already set above
2391 DM = (M0 << 1) + (M1 & 1);
2392 return true;
2393 }
2394}
2395
2396
2397/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2398/// appropriate for PPC mnemonics (which have a big endian bias - namely
2399/// elements are counted from the left of the vector register).
2400unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2401 SelectionDAG &DAG) {
2403 assert(isSplatShuffleMask(SVOp, EltSize));
2404 EVT VT = SVOp->getValueType(0);
2405
2406 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2407 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2408 : SVOp->getMaskElt(0);
2409
2410 if (DAG.getDataLayout().isLittleEndian())
2411 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2412 else
2413 return SVOp->getMaskElt(0) / EltSize;
2414}
2415
2416/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2417/// by using a vspltis[bhw] instruction of the specified element size, return
2418/// the constant being splatted. The ByteSize field indicates the number of
2419/// bytes of each element [124] -> [bhw].
2421 SDValue OpVal;
2422
2423 // If ByteSize of the splat is bigger than the element size of the
2424 // build_vector, then we have a case where we are checking for a splat where
2425 // multiple elements of the buildvector are folded together into a single
2426 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2427 unsigned EltSize = 16/N->getNumOperands();
2428 if (EltSize < ByteSize) {
2429 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2430 SDValue UniquedVals[4];
2431 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2432
2433 // See if all of the elements in the buildvector agree across.
2434 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2435 if (N->getOperand(i).isUndef()) continue;
2436 // If the element isn't a constant, bail fully out.
2437 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2438
2439 if (!UniquedVals[i&(Multiple-1)].getNode())
2440 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2441 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2442 return SDValue(); // no match.
2443 }
2444
2445 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2446 // either constant or undef values that are identical for each chunk. See
2447 // if these chunks can form into a larger vspltis*.
2448
2449 // Check to see if all of the leading entries are either 0 or -1. If
2450 // neither, then this won't fit into the immediate field.
2451 bool LeadingZero = true;
2452 bool LeadingOnes = true;
2453 for (unsigned i = 0; i != Multiple-1; ++i) {
2454 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2455
2456 LeadingZero &= isNullConstant(UniquedVals[i]);
2457 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2458 }
2459 // Finally, check the least significant entry.
2460 if (LeadingZero) {
2461 if (!UniquedVals[Multiple-1].getNode())
2462 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2463 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2464 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2465 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2466 }
2467 if (LeadingOnes) {
2468 if (!UniquedVals[Multiple-1].getNode())
2469 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2470 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2471 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2472 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2473 }
2474
2475 return SDValue();
2476 }
2477
2478 // Check to see if this buildvec has a single non-undef value in its elements.
2479 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2480 if (N->getOperand(i).isUndef()) continue;
2481 if (!OpVal.getNode())
2482 OpVal = N->getOperand(i);
2483 else if (OpVal != N->getOperand(i))
2484 return SDValue();
2485 }
2486
2487 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2488
2489 unsigned ValSizeInBytes = EltSize;
2490 uint64_t Value = 0;
2491 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2492 Value = CN->getZExtValue();
2493 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2494 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2495 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2496 }
2497
2498 // If the splat value is larger than the element value, then we can never do
2499 // this splat. The only case that we could fit the replicated bits into our
2500 // immediate field for would be zero, and we prefer to use vxor for it.
2501 if (ValSizeInBytes < ByteSize) return SDValue();
2502
2503 // If the element value is larger than the splat value, check if it consists
2504 // of a repeated bit pattern of size ByteSize.
2505 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2506 return SDValue();
2507
2508 // Properly sign extend the value.
2509 int MaskVal = SignExtend32(Value, ByteSize * 8);
2510
2511 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2512 if (MaskVal == 0) return SDValue();
2513
2514 // Finally, if this value fits in a 5 bit sext field, return it
2515 if (SignExtend32<5>(MaskVal) == MaskVal)
2516 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2517 return SDValue();
2518}
2519
2520//===----------------------------------------------------------------------===//
2521// Addressing Mode Selection
2522//===----------------------------------------------------------------------===//
2523
2524/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2525/// or 64-bit immediate, and if the value can be accurately represented as a
2526/// sign extension from a 16-bit value. If so, this returns true and the
2527/// immediate.
2528bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2529 if (!isa<ConstantSDNode>(N))
2530 return false;
2531
2532 Imm = (int16_t)N->getAsZExtVal();
2533 if (N->getValueType(0) == MVT::i32)
2534 return Imm == (int32_t)N->getAsZExtVal();
2535 else
2536 return Imm == (int64_t)N->getAsZExtVal();
2537}
2539 return isIntS16Immediate(Op.getNode(), Imm);
2540}
2541
2542/// Used when computing address flags for selecting loads and stores.
2543/// If we have an OR, check if the LHS and RHS are provably disjoint.
2544/// An OR of two provably disjoint values is equivalent to an ADD.
2545/// Most PPC load/store instructions compute the effective address as a sum,
2546/// so doing this conversion is useful.
2547static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2548 if (N.getOpcode() != ISD::OR)
2549 return false;
2550 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2551 if (!LHSKnown.Zero.getBoolValue())
2552 return false;
2553 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2554 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2555}
2556
2557/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2558/// be represented as an indexed [r+r] operation.
2560 SDValue &Index,
2561 SelectionDAG &DAG) const {
2562 for (SDNode *U : N->users()) {
2563 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2564 if (Memop->getMemoryVT() == MVT::f64) {
2565 Base = N.getOperand(0);
2566 Index = N.getOperand(1);
2567 return true;
2568 }
2569 }
2570 }
2571 return false;
2572}
2573
2574/// isIntS34Immediate - This method tests if value of node given can be
2575/// accurately represented as a sign extension from a 34-bit value. If so,
2576/// this returns true and the immediate.
2577bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2578 if (!isa<ConstantSDNode>(N))
2579 return false;
2580
2581 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2582 return isInt<34>(Imm);
2583}
2585 return isIntS34Immediate(Op.getNode(), Imm);
2586}
2587
2588/// SelectAddressRegReg - Given the specified addressed, check to see if it
2589/// can be represented as an indexed [r+r] operation. Returns false if it
2590/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2591/// non-zero and N can be represented by a base register plus a signed 16-bit
2592/// displacement, make a more precise judgement by checking (displacement % \p
2593/// EncodingAlignment).
2595 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2596 MaybeAlign EncodingAlignment) const {
2597 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2598 // a [pc+imm].
2600 return false;
2601
2602 int16_t Imm = 0;
2603 if (N.getOpcode() == ISD::ADD) {
2604 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2605 // SPE load/store can only handle 8-bit offsets.
2606 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2607 return true;
2608 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2609 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2610 return false; // r+i
2611 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2612 return false; // r+i
2613
2614 Base = N.getOperand(0);
2615 Index = N.getOperand(1);
2616 return true;
2617 } else if (N.getOpcode() == ISD::OR) {
2618 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2619 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2620 return false; // r+i can fold it if we can.
2621
2622 // If this is an or of disjoint bitfields, we can codegen this as an add
2623 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2624 // disjoint.
2625 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2626
2627 if (LHSKnown.Zero.getBoolValue()) {
2628 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2629 // If all of the bits are known zero on the LHS or RHS, the add won't
2630 // carry.
2631 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2632 Base = N.getOperand(0);
2633 Index = N.getOperand(1);
2634 return true;
2635 }
2636 }
2637 }
2638
2639 return false;
2640}
2641
2642// If we happen to be doing an i64 load or store into a stack slot that has
2643// less than a 4-byte alignment, then the frame-index elimination may need to
2644// use an indexed load or store instruction (because the offset may not be a
2645// multiple of 4). The extra register needed to hold the offset comes from the
2646// register scavenger, and it is possible that the scavenger will need to use
2647// an emergency spill slot. As a result, we need to make sure that a spill slot
2648// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2649// stack slot.
2650static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2651 // FIXME: This does not handle the LWA case.
2652 if (VT != MVT::i64)
2653 return;
2654
2655 // NOTE: We'll exclude negative FIs here, which come from argument
2656 // lowering, because there are no known test cases triggering this problem
2657 // using packed structures (or similar). We can remove this exclusion if
2658 // we find such a test case. The reason why this is so test-case driven is
2659 // because this entire 'fixup' is only to prevent crashes (from the
2660 // register scavenger) on not-really-valid inputs. For example, if we have:
2661 // %a = alloca i1
2662 // %b = bitcast i1* %a to i64*
2663 // store i64* a, i64 b
2664 // then the store should really be marked as 'align 1', but is not. If it
2665 // were marked as 'align 1' then the indexed form would have been
2666 // instruction-selected initially, and the problem this 'fixup' is preventing
2667 // won't happen regardless.
2668 if (FrameIdx < 0)
2669 return;
2670
2672 MachineFrameInfo &MFI = MF.getFrameInfo();
2673
2674 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2675 return;
2676
2677 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2678 FuncInfo->setHasNonRISpills();
2679}
2680
2681/// Returns true if the address N can be represented by a base register plus
2682/// a signed 16-bit displacement [r+imm], and if it is not better
2683/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2684/// displacements that are multiples of that value.
2686 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2687 MaybeAlign EncodingAlignment) const {
2688 // FIXME dl should come from parent load or store, not from address
2689 SDLoc dl(N);
2690
2691 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2692 // a [pc+imm].
2694 return false;
2695
2696 // If this can be more profitably realized as r+r, fail.
2697 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2698 return false;
2699
2700 if (N.getOpcode() == ISD::ADD) {
2701 int16_t imm = 0;
2702 if (isIntS16Immediate(N.getOperand(1), imm) &&
2703 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2704 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2705 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2706 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2707 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2708 } else {
2709 Base = N.getOperand(0);
2710 }
2711 return true; // [r+i]
2712 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2713 // Match LOAD (ADD (X, Lo(G))).
2714 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2715 "Cannot handle constant offsets yet!");
2716 Disp = N.getOperand(1).getOperand(0); // The global address.
2721 Base = N.getOperand(0);
2722 return true; // [&g+r]
2723 }
2724 } else if (N.getOpcode() == ISD::OR) {
2725 int16_t imm = 0;
2726 if (isIntS16Immediate(N.getOperand(1), imm) &&
2727 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2728 // If this is an or of disjoint bitfields, we can codegen this as an add
2729 // (for better address arithmetic) if the LHS and RHS of the OR are
2730 // provably disjoint.
2731 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2732
2733 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2734 // If all of the bits are known zero on the LHS or RHS, the add won't
2735 // carry.
2736 if (FrameIndexSDNode *FI =
2737 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2738 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2739 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2740 } else {
2741 Base = N.getOperand(0);
2742 }
2743 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2744 return true;
2745 }
2746 }
2747 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2748 // Loading from a constant address.
2749
2750 // If this address fits entirely in a 16-bit sext immediate field, codegen
2751 // this as "d, 0"
2752 int16_t Imm;
2753 if (isIntS16Immediate(CN, Imm) &&
2754 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2755 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2756 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2757 CN->getValueType(0));
2758 return true;
2759 }
2760
2761 // Handle 32-bit sext immediates with LIS + addr mode.
2762 if ((CN->getValueType(0) == MVT::i32 ||
2763 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2764 (!EncodingAlignment ||
2765 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2766 int Addr = (int)CN->getZExtValue();
2767
2768 // Otherwise, break this down into an LIS + disp.
2769 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2770
2771 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2772 MVT::i32);
2773 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2774 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2775 return true;
2776 }
2777 }
2778
2779 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2781 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2782 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2783 } else
2784 Base = N;
2785 return true; // [r+0]
2786}
2787
2788/// Similar to the 16-bit case but for instructions that take a 34-bit
2789/// displacement field (prefixed loads/stores).
2791 SDValue &Base,
2792 SelectionDAG &DAG) const {
2793 // Only on 64-bit targets.
2794 if (N.getValueType() != MVT::i64)
2795 return false;
2796
2797 SDLoc dl(N);
2798 int64_t Imm = 0;
2799
2800 if (N.getOpcode() == ISD::ADD) {
2801 if (!isIntS34Immediate(N.getOperand(1), Imm))
2802 return false;
2803 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2804 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2805 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2806 else
2807 Base = N.getOperand(0);
2808 return true;
2809 }
2810
2811 if (N.getOpcode() == ISD::OR) {
2812 if (!isIntS34Immediate(N.getOperand(1), Imm))
2813 return false;
2814 // If this is an or of disjoint bitfields, we can codegen this as an add
2815 // (for better address arithmetic) if the LHS and RHS of the OR are
2816 // provably disjoint.
2817 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2818 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2819 return false;
2820 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2821 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2822 else
2823 Base = N.getOperand(0);
2824 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2825 return true;
2826 }
2827
2828 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2829 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2830 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2831 return true;
2832 }
2833
2834 return false;
2835}
2836
2837/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2838/// represented as an indexed [r+r] operation.
2840 SDValue &Index,
2841 SelectionDAG &DAG) const {
2842 // Check to see if we can easily represent this as an [r+r] address. This
2843 // will fail if it thinks that the address is more profitably represented as
2844 // reg+imm, e.g. where imm = 0.
2845 if (SelectAddressRegReg(N, Base, Index, DAG))
2846 return true;
2847
2848 // If the address is the result of an add, we will utilize the fact that the
2849 // address calculation includes an implicit add. However, we can reduce
2850 // register pressure if we do not materialize a constant just for use as the
2851 // index register. We only get rid of the add if it is not an add of a
2852 // value and a 16-bit signed constant and both have a single use.
2853 int16_t imm = 0;
2854 if (N.getOpcode() == ISD::ADD &&
2855 (!isIntS16Immediate(N.getOperand(1), imm) ||
2856 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2857 Base = N.getOperand(0);
2858 Index = N.getOperand(1);
2859 return true;
2860 }
2861
2862 // Otherwise, do it the hard way, using R0 as the base register.
2863 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2864 N.getValueType());
2865 Index = N;
2866 return true;
2867}
2868
2869template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2870 Ty *PCRelCand = dyn_cast<Ty>(N);
2871 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2872}
2873
2874/// Returns true if this address is a PC Relative address.
2875/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2876/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2878 // This is a materialize PC Relative node. Always select this as PC Relative.
2879 Base = N;
2880 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2881 return true;
2886 return true;
2887 return false;
2888}
2889
2890/// Returns true if we should use a direct load into vector instruction
2891/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2892static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2893
2894 // If there are any other uses other than scalar to vector, then we should
2895 // keep it as a scalar load -> direct move pattern to prevent multiple
2896 // loads.
2898 if (!LD)
2899 return false;
2900
2901 EVT MemVT = LD->getMemoryVT();
2902 if (!MemVT.isSimple())
2903 return false;
2904 switch(MemVT.getSimpleVT().SimpleTy) {
2905 case MVT::i64:
2906 break;
2907 case MVT::i32:
2908 if (!ST.hasP8Vector())
2909 return false;
2910 break;
2911 case MVT::i16:
2912 case MVT::i8:
2913 if (!ST.hasP9Vector())
2914 return false;
2915 break;
2916 default:
2917 return false;
2918 }
2919
2920 SDValue LoadedVal(N, 0);
2921 if (!LoadedVal.hasOneUse())
2922 return false;
2923
2924 for (SDUse &Use : LD->uses())
2925 if (Use.getResNo() == 0 &&
2926 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2927 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2928 return false;
2929
2930 return true;
2931}
2932
2933/// getPreIndexedAddressParts - returns true by value, base pointer and
2934/// offset pointer and addressing mode by reference if the node's address
2935/// can be legally represented as pre-indexed load / store address.
2937 SDValue &Offset,
2939 SelectionDAG &DAG) const {
2940 if (DisablePPCPreinc) return false;
2941
2942 bool isLoad = true;
2943 SDValue Ptr;
2944 EVT VT;
2945 Align Alignment;
2946 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2947 Ptr = LD->getBasePtr();
2948 VT = LD->getMemoryVT();
2949 Alignment = LD->getAlign();
2950 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2951 Ptr = ST->getBasePtr();
2952 VT = ST->getMemoryVT();
2953 Alignment = ST->getAlign();
2954 isLoad = false;
2955 } else
2956 return false;
2957
2958 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2959 // instructions because we can fold these into a more efficient instruction
2960 // instead, (such as LXSD).
2961 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2962 return false;
2963 }
2964
2965 // PowerPC doesn't have preinc load/store instructions for vectors
2966 if (VT.isVector())
2967 return false;
2968
2969 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2970 // Common code will reject creating a pre-inc form if the base pointer
2971 // is a frame index, or if N is a store and the base pointer is either
2972 // the same as or a predecessor of the value being stored. Check for
2973 // those situations here, and try with swapped Base/Offset instead.
2974 bool Swap = false;
2975
2977 Swap = true;
2978 else if (!isLoad) {
2979 SDValue Val = cast<StoreSDNode>(N)->getValue();
2980 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2981 Swap = true;
2982 }
2983
2984 if (Swap)
2986
2987 AM = ISD::PRE_INC;
2988 return true;
2989 }
2990
2991 // LDU/STU can only handle immediates that are a multiple of 4.
2992 if (VT != MVT::i64) {
2993 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2994 return false;
2995 } else {
2996 // LDU/STU need an address with at least 4-byte alignment.
2997 if (Alignment < Align(4))
2998 return false;
2999
3000 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3001 return false;
3002 }
3003
3004 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3005 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3006 // sext i32 to i64 when addr mode is r+i.
3007 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3008 LD->getExtensionType() == ISD::SEXTLOAD &&
3010 return false;
3011 }
3012
3013 AM = ISD::PRE_INC;
3014 return true;
3015}
3016
3017//===----------------------------------------------------------------------===//
3018// LowerOperation implementation
3019//===----------------------------------------------------------------------===//
3020
3021/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3022/// and LoOpFlags to the target MO flags.
3023static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3024 unsigned &HiOpFlags, unsigned &LoOpFlags,
3025 const GlobalValue *GV = nullptr) {
3026 HiOpFlags = PPCII::MO_HA;
3027 LoOpFlags = PPCII::MO_LO;
3028
3029 // Don't use the pic base if not in PIC relocation model.
3030 if (IsPIC) {
3031 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3032 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3033 }
3034}
3035
3036static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3037 SelectionDAG &DAG) {
3038 SDLoc DL(HiPart);
3039 EVT PtrVT = HiPart.getValueType();
3040 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3041
3042 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3043 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3044
3045 // With PIC, the first instruction is actually "GR+hi(&G)".
3046 if (isPIC)
3047 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3048 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3049
3050 // Generate non-pic code that has direct accesses to the constant pool.
3051 // The address of the global is just (hi(&g)+lo(&g)).
3052 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3053}
3054
3056 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3057 FuncInfo->setUsesTOCBasePtr();
3058}
3059
3063
3064SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3065 SDValue GA) const {
3066 EVT VT = Subtarget.getScalarIntVT();
3067 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3068 : Subtarget.isAIXABI()
3069 ? DAG.getRegister(PPC::R2, VT)
3070 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3071 SDValue Ops[] = { GA, Reg };
3072 return DAG.getMemIntrinsicNode(
3073 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3076}
3077
3078SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3079 SelectionDAG &DAG) const {
3080 EVT PtrVT = Op.getValueType();
3081 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3082 const Constant *C = CP->getConstVal();
3083
3084 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3085 // The actual address of the GlobalValue is stored in the TOC.
3086 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3087 if (Subtarget.isUsingPCRelativeCalls()) {
3088 SDLoc DL(CP);
3089 EVT Ty = getPointerTy(DAG.getDataLayout());
3090 SDValue ConstPool = DAG.getTargetConstantPool(
3091 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3092 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3093 }
3094 setUsesTOCBasePtr(DAG);
3095 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3096 return getTOCEntry(DAG, SDLoc(CP), GA);
3097 }
3098
3099 unsigned MOHiFlag, MOLoFlag;
3100 bool IsPIC = isPositionIndependent();
3101 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3102
3103 if (IsPIC && Subtarget.isSVR4ABI()) {
3104 SDValue GA =
3106 return getTOCEntry(DAG, SDLoc(CP), GA);
3107 }
3108
3109 SDValue CPIHi =
3110 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3111 SDValue CPILo =
3112 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3113 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3114}
3115
3116// For 64-bit PowerPC, prefer the more compact relative encodings.
3117// This trades 32 bits per jump table entry for one or two instructions
3118// on the jump site.
3125
3128 return false;
3129 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3130 return true;
3132}
3133
3135 SelectionDAG &DAG) const {
3136 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3138
3139 switch (getTargetMachine().getCodeModel()) {
3140 case CodeModel::Small:
3141 case CodeModel::Medium:
3143 default:
3144 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3146 }
3147}
3148
3149const MCExpr *
3151 unsigned JTI,
3152 MCContext &Ctx) const {
3153 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3155
3156 switch (getTargetMachine().getCodeModel()) {
3157 case CodeModel::Small:
3158 case CodeModel::Medium:
3160 default:
3161 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3162 }
3163}
3164
3165SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3166 EVT PtrVT = Op.getValueType();
3168
3169 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3170 if (Subtarget.isUsingPCRelativeCalls()) {
3171 SDLoc DL(JT);
3172 EVT Ty = getPointerTy(DAG.getDataLayout());
3173 SDValue GA =
3175 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3176 return MatAddr;
3177 }
3178
3179 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3180 // The actual address of the GlobalValue is stored in the TOC.
3181 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3182 setUsesTOCBasePtr(DAG);
3183 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3184 return getTOCEntry(DAG, SDLoc(JT), GA);
3185 }
3186
3187 unsigned MOHiFlag, MOLoFlag;
3188 bool IsPIC = isPositionIndependent();
3189 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3190
3191 if (IsPIC && Subtarget.isSVR4ABI()) {
3192 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3194 return getTOCEntry(DAG, SDLoc(GA), GA);
3195 }
3196
3197 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3198 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3199 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3200}
3201
3202SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3203 SelectionDAG &DAG) const {
3204 EVT PtrVT = Op.getValueType();
3205 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3206 const BlockAddress *BA = BASDN->getBlockAddress();
3207
3208 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3209 if (Subtarget.isUsingPCRelativeCalls()) {
3210 SDLoc DL(BASDN);
3211 EVT Ty = getPointerTy(DAG.getDataLayout());
3212 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3214 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3215 return MatAddr;
3216 }
3217
3218 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3219 // The actual BlockAddress is stored in the TOC.
3220 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3221 setUsesTOCBasePtr(DAG);
3222 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3223 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3224 }
3225
3226 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3227 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3228 return getTOCEntry(
3229 DAG, SDLoc(BASDN),
3230 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3231
3232 unsigned MOHiFlag, MOLoFlag;
3233 bool IsPIC = isPositionIndependent();
3234 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3235 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3236 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3237 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3238}
3239
3240SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3241 SelectionDAG &DAG) const {
3242 if (Subtarget.isAIXABI())
3243 return LowerGlobalTLSAddressAIX(Op, DAG);
3244
3245 return LowerGlobalTLSAddressLinux(Op, DAG);
3246}
3247
3248/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3249/// and then apply the update.
3251 SelectionDAG &DAG,
3252 const TargetMachine &TM) {
3253 // Initialize TLS model opt setting lazily:
3254 // (1) Use initial-exec for single TLS var references within current function.
3255 // (2) Use local-dynamic for multiple TLS var references within current
3256 // function.
3257 PPCFunctionInfo *FuncInfo =
3259 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3261 // Iterate over all instructions within current function, collect all TLS
3262 // global variables (global variables taken as the first parameter to
3263 // Intrinsic::threadlocal_address).
3264 const Function &Func = DAG.getMachineFunction().getFunction();
3265 for (const BasicBlock &BB : Func)
3266 for (const Instruction &I : BB)
3267 if (I.getOpcode() == Instruction::Call)
3268 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3269 if (Function *CF = CI->getCalledFunction())
3270 if (CF->isDeclaration() &&
3271 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3272 if (const GlobalValue *GV =
3273 dyn_cast<GlobalValue>(I.getOperand(0))) {
3274 TLSModel::Model GVModel = TM.getTLSModel(GV);
3275 if (GVModel == TLSModel::LocalDynamic)
3276 TLSGV.insert(GV);
3277 }
3278
3279 unsigned TLSGVCnt = TLSGV.size();
3280 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3281 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3282 FuncInfo->setAIXFuncUseTLSIEForLD();
3284 }
3285
3286 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3287 LLVM_DEBUG(
3288 dbgs() << DAG.getMachineFunction().getName()
3289 << " function is using the TLS-IE model for TLS-LD access.\n");
3290 Model = TLSModel::InitialExec;
3291 }
3292}
3293
3294SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3295 SelectionDAG &DAG) const {
3296 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3297
3298 if (DAG.getTarget().useEmulatedTLS())
3299 report_fatal_error("Emulated TLS is not yet supported on AIX");
3300
3301 SDLoc dl(GA);
3302 const GlobalValue *GV = GA->getGlobal();
3303 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3304 bool Is64Bit = Subtarget.isPPC64();
3306
3307 // Apply update to the TLS model.
3308 if (Subtarget.hasAIXShLibTLSModelOpt())
3310
3311 // TLS variables are accessed through TOC entries.
3312 // To support this, set the DAG to use the TOC base pointer.
3313 setUsesTOCBasePtr(DAG);
3314
3315 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3316
3317 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3318 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3319 bool HasAIXSmallTLSGlobalAttr = false;
3320 SDValue VariableOffsetTGA =
3321 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3322 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3323 SDValue TLSReg;
3324
3325 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3326 if (GVar->hasAttribute("aix-small-tls"))
3327 HasAIXSmallTLSGlobalAttr = true;
3328
3329 if (Is64Bit) {
3330 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3331 // involves a load of the variable offset (from the TOC), followed by an
3332 // add of the loaded variable offset to R13 (the thread pointer).
3333 // This code sequence looks like:
3334 // ld reg1,var[TC](2)
3335 // add reg2, reg1, r13 // r13 contains the thread pointer
3336 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3337
3338 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3339 // global variable attribute, produce a faster access sequence for
3340 // local-exec TLS variables where the offset from the TLS base is encoded
3341 // as an immediate operand.
3342 //
3343 // We only utilize the faster local-exec access sequence when the TLS
3344 // variable has a size within the policy limit. We treat types that are
3345 // not sized or are empty as being over the policy size limit.
3346 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3347 IsTLSLocalExecModel) {
3348 Type *GVType = GV->getValueType();
3349 if (GVType->isSized() && !GVType->isEmptyTy() &&
3350 GV->getDataLayout().getTypeAllocSize(GVType) <=
3352 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3353 }
3354 } else {
3355 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3356 // involves loading the variable offset from the TOC, generating a call to
3357 // .__get_tpointer to get the thread pointer (which will be in R3), and
3358 // adding the two together:
3359 // lwz reg1,var[TC](2)
3360 // bla .__get_tpointer
3361 // add reg2, reg1, r3
3362 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3363
3364 // We do not implement the 32-bit version of the faster access sequence
3365 // for local-exec that is controlled by the -maix-small-local-exec-tls
3366 // option, or the "aix-small-tls" global variable attribute.
3367 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3368 report_fatal_error("The small-local-exec TLS access sequence is "
3369 "currently only supported on AIX (64-bit mode).");
3370 }
3371 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3372 }
3373
3374 if (Model == TLSModel::LocalDynamic) {
3375 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3376
3377 // We do not implement the 32-bit version of the faster access sequence
3378 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3379 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3380 report_fatal_error("The small-local-dynamic TLS access sequence is "
3381 "currently only supported on AIX (64-bit mode).");
3382
3383 // For local-dynamic on AIX, we need to generate one TOC entry for each
3384 // variable offset, and a single module-handle TOC entry for the entire
3385 // file.
3386
3387 SDValue VariableOffsetTGA =
3388 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3389 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3390
3392 GlobalVariable *TLSGV =
3393 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3394 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3396 assert(TLSGV && "Not able to create GV for _$TLSML.");
3397 SDValue ModuleHandleTGA =
3398 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3399 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3400 SDValue ModuleHandle =
3401 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3402
3403 // With the -maix-small-local-dynamic-tls option, produce a faster access
3404 // sequence for local-dynamic TLS variables where the offset from the
3405 // module-handle is encoded as an immediate operand.
3406 //
3407 // We only utilize the faster local-dynamic access sequence when the TLS
3408 // variable has a size within the policy limit. We treat types that are
3409 // not sized or are empty as being over the policy size limit.
3410 if (HasAIXSmallLocalDynamicTLS) {
3411 Type *GVType = GV->getValueType();
3412 if (GVType->isSized() && !GVType->isEmptyTy() &&
3413 GV->getDataLayout().getTypeAllocSize(GVType) <=
3415 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3416 ModuleHandle);
3417 }
3418
3419 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3420 }
3421
3422 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3423 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3424 // need to generate two TOC entries, one for the variable offset, one for the
3425 // region handle. The global address for the TOC entry of the region handle is
3426 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3427 // entry of the variable offset is created with MO_TLSGD_FLAG.
3428 SDValue VariableOffsetTGA =
3429 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3430 SDValue RegionHandleTGA =
3431 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3432 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3433 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3434 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3435 RegionHandle);
3436}
3437
3438SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3439 SelectionDAG &DAG) const {
3440 // FIXME: TLS addresses currently use medium model code sequences,
3441 // which is the most useful form. Eventually support for small and
3442 // large models could be added if users need it, at the cost of
3443 // additional complexity.
3444 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3445 if (DAG.getTarget().useEmulatedTLS())
3446 return LowerToTLSEmulatedModel(GA, DAG);
3447
3448 SDLoc dl(GA);
3449 const GlobalValue *GV = GA->getGlobal();
3450 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3451 bool is64bit = Subtarget.isPPC64();
3452 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3453 PICLevel::Level picLevel = M->getPICLevel();
3454
3455 const TargetMachine &TM = getTargetMachine();
3456 TLSModel::Model Model = TM.getTLSModel(GV);
3457
3458 if (Model == TLSModel::LocalExec) {
3459 if (Subtarget.isUsingPCRelativeCalls()) {
3460 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3461 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3463 SDValue MatAddr =
3464 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3465 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3466 }
3467
3468 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3470 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3472 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3473 : DAG.getRegister(PPC::R2, MVT::i32);
3474
3475 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3476 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3477 }
3478
3479 if (Model == TLSModel::InitialExec) {
3480 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3482 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3483 SDValue TGATLS = DAG.getTargetGlobalAddress(
3484 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3485 SDValue TPOffset;
3486 if (IsPCRel) {
3487 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3488 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3489 MachinePointerInfo());
3490 } else {
3491 SDValue GOTPtr;
3492 if (is64bit) {
3493 setUsesTOCBasePtr(DAG);
3494 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3495 GOTPtr =
3496 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3497 } else {
3498 if (!TM.isPositionIndependent())
3499 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3500 else if (picLevel == PICLevel::SmallPIC)
3501 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3502 else
3503 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3504 }
3505 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3506 }
3507 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3508 }
3509
3510 if (Model == TLSModel::GeneralDynamic) {
3511 if (Subtarget.isUsingPCRelativeCalls()) {
3512 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3514 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3515 }
3516
3517 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3518 SDValue GOTPtr;
3519 if (is64bit) {
3520 setUsesTOCBasePtr(DAG);
3521 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3522 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3523 GOTReg, TGA);
3524 } else {
3525 if (picLevel == PICLevel::SmallPIC)
3526 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3527 else
3528 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3529 }
3530 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3531 GOTPtr, TGA, TGA);
3532 }
3533
3534 if (Model == TLSModel::LocalDynamic) {
3535 if (Subtarget.isUsingPCRelativeCalls()) {
3536 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3538 SDValue MatPCRel =
3539 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3540 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3541 }
3542
3543 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3544 SDValue GOTPtr;
3545 if (is64bit) {
3546 setUsesTOCBasePtr(DAG);
3547 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3548 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3549 GOTReg, TGA);
3550 } else {
3551 if (picLevel == PICLevel::SmallPIC)
3552 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3553 else
3554 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3555 }
3556 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3557 PtrVT, GOTPtr, TGA, TGA);
3558 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3559 PtrVT, TLSAddr, TGA);
3560 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3561 }
3562
3563 llvm_unreachable("Unknown TLS model!");
3564}
3565
3566SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3567 SelectionDAG &DAG) const {
3568 EVT PtrVT = Op.getValueType();
3569 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3570 SDLoc DL(GSDN);
3571 const GlobalValue *GV = GSDN->getGlobal();
3572
3573 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3574 // The actual address of the GlobalValue is stored in the TOC.
3575 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3576 if (Subtarget.isUsingPCRelativeCalls()) {
3577 EVT Ty = getPointerTy(DAG.getDataLayout());
3579 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3581 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3582 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3583 MachinePointerInfo());
3584 return Load;
3585 } else {
3586 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3588 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3589 }
3590 }
3591 setUsesTOCBasePtr(DAG);
3592 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3593 return getTOCEntry(DAG, DL, GA);
3594 }
3595
3596 unsigned MOHiFlag, MOLoFlag;
3597 bool IsPIC = isPositionIndependent();
3598 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3599
3600 if (IsPIC && Subtarget.isSVR4ABI()) {
3601 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3602 GSDN->getOffset(),
3604 return getTOCEntry(DAG, DL, GA);
3605 }
3606
3607 SDValue GAHi =
3608 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3609 SDValue GALo =
3610 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3611
3612 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3613}
3614
3615SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3616 bool IsStrict = Op->isStrictFPOpcode();
3617 const SDNodeFlags Flags = Op.getNode()->getFlags();
3618 ISD::CondCode CC =
3619 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3620 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3621 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3622 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3623 EVT LHSVT = LHS.getValueType();
3624 SDLoc dl(Op);
3625
3626 // Soften the setcc with libcall if it is fp128 or it is SPE and fp32/fp64.
3627 if (LHSVT == MVT::f128 ||
3628 (Subtarget.hasSPE() && (LHSVT == MVT::f32 || LHSVT == MVT::f64) &&
3629 (!Flags.hasNoNaNs() || !Flags.hasNoInfs()))) {
3630 assert(!Subtarget.hasP9Vector() &&
3631 "SETCC for f128 is already legal under Power9!");
3632 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3633 Op->getOpcode() == ISD::STRICT_FSETCCS);
3634 if (RHS.getNode())
3635 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3636 DAG.getCondCode(CC));
3637 if (IsStrict)
3638 return DAG.getMergeValues({LHS, Chain}, dl);
3639 return LHS;
3640 } else if (LHSVT == MVT::f32 || LHSVT == MVT::f64) {
3641 return Op;
3642 }
3643
3644 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3645
3646 if (Op.getValueType() == MVT::v2i64) {
3647 // When the operands themselves are v2i64 values, we need to do something
3648 // special because VSX has no underlying comparison operations for these.
3649 if (LHS.getValueType() == MVT::v2i64) {
3650 // Equality can be handled by casting to the legal type for Altivec
3651 // comparisons, everything else needs to be expanded.
3652 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3653 return SDValue();
3654 SDValue SetCC32 = DAG.getSetCC(
3655 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3656 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3657 int ShuffV[] = {1, 0, 3, 2};
3658 SDValue Shuff =
3659 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3660 return DAG.getBitcast(MVT::v2i64,
3661 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3662 dl, MVT::v4i32, Shuff, SetCC32));
3663 }
3664
3665 // We handle most of these in the usual way.
3666 return Op;
3667 }
3668
3669 // If we're comparing for equality to zero, expose the fact that this is
3670 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3671 // fold the new nodes.
3672 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3673 return V;
3674
3675 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3676 // Leave comparisons against 0 and -1 alone for now, since they're usually
3677 // optimized. FIXME: revisit this when we can custom lower all setcc
3678 // optimizations.
3679 if (C->isAllOnes() || C->isZero())
3680 return SDValue();
3681 }
3682
3683 // If we have an integer seteq/setne, turn it into a compare against zero
3684 // by xor'ing the rhs with the lhs, which is faster than setting a
3685 // condition register, reading it back out, and masking the correct bit. The
3686 // normal approach here uses sub to do this instead of xor. Using xor exposes
3687 // the result to other bit-twiddling opportunities.
3688 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3689 EVT VT = Op.getValueType();
3690 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3691 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3692 }
3693 return SDValue();
3694}
3695
3696SDValue PPCTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3697 const SDNodeFlags Flags = Op->getFlags();
3698 SDValue Chain = Op.getOperand(0);
3699 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3700 SDValue LHS = Op.getOperand(2);
3701 SDValue RHS = Op.getOperand(3);
3702 SDValue Dest = Op.getOperand(4);
3703 EVT LHSVT = LHS.getValueType();
3704 SDLoc dl(Op);
3705
3706 assert(Subtarget.hasSPE() && "LowerBR_CC used only for targets with SPE");
3707
3708 if ((LHSVT == MVT::f32 || LHSVT == MVT::f64) && Flags.hasNoNaNs() &&
3709 Flags.hasNoInfs())
3710 return Op;
3711
3712 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS);
3713
3714 // If softenSetCCOperands returned a scalar, we need to compare the result
3715 // against zero to select between true and false values.
3716 if (!RHS) {
3717 RHS = DAG.getConstant(0, dl, LHSVT);
3718 CC = ISD::SETNE;
3719 }
3720
3721 return DAG.getNode(ISD::BR_CC, dl, Op.getValueType(), Chain,
3722 DAG.getCondCode(CC), LHS, RHS, Dest);
3723}
3724
3725SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3726 SDNode *Node = Op.getNode();
3727 EVT VT = Node->getValueType(0);
3728 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3729 SDValue InChain = Node->getOperand(0);
3730 SDValue VAListPtr = Node->getOperand(1);
3731 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3732 SDLoc dl(Node);
3733
3734 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3735
3736 // gpr_index
3737 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3738 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3739 InChain = GprIndex.getValue(1);
3740
3741 if (VT == MVT::i64) {
3742 // Check if GprIndex is even
3743 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3744 DAG.getConstant(1, dl, MVT::i32));
3745 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3746 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3747 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3748 DAG.getConstant(1, dl, MVT::i32));
3749 // Align GprIndex to be even if it isn't
3750 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3751 GprIndex);
3752 }
3753
3754 // fpr index is 1 byte after gpr
3755 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3756 DAG.getConstant(1, dl, MVT::i32));
3757
3758 // fpr
3759 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3760 FprPtr, MachinePointerInfo(SV), MVT::i8);
3761 InChain = FprIndex.getValue(1);
3762
3763 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3764 DAG.getConstant(8, dl, MVT::i32));
3765
3766 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3767 DAG.getConstant(4, dl, MVT::i32));
3768
3769 // areas
3770 SDValue OverflowArea =
3771 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3772 InChain = OverflowArea.getValue(1);
3773
3774 SDValue RegSaveArea =
3775 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3776 InChain = RegSaveArea.getValue(1);
3777
3778 // select overflow_area if index > 8
3779 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3780 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3781
3782 // adjustment constant gpr_index * 4/8
3783 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3784 VT.isInteger() ? GprIndex : FprIndex,
3785 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3786 MVT::i32));
3787
3788 // OurReg = RegSaveArea + RegConstant
3789 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3790 RegConstant);
3791
3792 // Floating types are 32 bytes into RegSaveArea
3793 if (VT.isFloatingPoint())
3794 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3795 DAG.getConstant(32, dl, MVT::i32));
3796
3797 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3798 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3799 VT.isInteger() ? GprIndex : FprIndex,
3800 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3801 MVT::i32));
3802
3803 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3804 VT.isInteger() ? VAListPtr : FprPtr,
3805 MachinePointerInfo(SV), MVT::i8);
3806
3807 // determine if we should load from reg_save_area or overflow_area
3808 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3809
3810 // increase overflow_area by 4/8 if gpr/fpr > 8
3811 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3812 DAG.getConstant(VT.isInteger() ? 4 : 8,
3813 dl, MVT::i32));
3814
3815 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3816 OverflowAreaPlusN);
3817
3818 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3819 MachinePointerInfo(), MVT::i32);
3820
3821 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3822}
3823
3824SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3825 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3826
3827 // We have to copy the entire va_list struct:
3828 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3829 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3830 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3831 Align(8), false, true, /*CI=*/nullptr, std::nullopt,
3832 MachinePointerInfo(), MachinePointerInfo());
3833}
3834
3835SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3836 SelectionDAG &DAG) const {
3837 return Op.getOperand(0);
3838}
3839
3840SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3841 MachineFunction &MF = DAG.getMachineFunction();
3842 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3843
3844 assert((Op.getOpcode() == ISD::INLINEASM ||
3845 Op.getOpcode() == ISD::INLINEASM_BR) &&
3846 "Expecting Inline ASM node.");
3847
3848 // If an LR store is already known to be required then there is not point in
3849 // checking this ASM as well.
3850 if (MFI.isLRStoreRequired())
3851 return Op;
3852
3853 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3854 // type MVT::Glue. We want to ignore this last operand if that is the case.
3855 unsigned NumOps = Op.getNumOperands();
3856 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3857 --NumOps;
3858
3859 // Check all operands that may contain the LR.
3860 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3861 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3862 unsigned NumVals = Flags.getNumOperandRegisters();
3863 ++i; // Skip the ID value.
3864
3865 switch (Flags.getKind()) {
3866 default:
3867 llvm_unreachable("Bad flags!");
3871 i += NumVals;
3872 break;
3876 for (; NumVals; --NumVals, ++i) {
3877 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3878 if (Reg != PPC::LR && Reg != PPC::LR8)
3879 continue;
3880 MFI.setLRStoreRequired();
3881 return Op;
3882 }
3883 break;
3884 }
3885 }
3886 }
3887
3888 return Op;
3889}
3890
3891SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3892 SelectionDAG &DAG) const {
3893 SDValue Chain = Op.getOperand(0);
3894 SDValue Trmp = Op.getOperand(1); // trampoline
3895 SDValue FPtr = Op.getOperand(2); // nested function
3896 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3897 SDLoc dl(Op);
3898
3899 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3900
3901 if (Subtarget.isAIXABI()) {
3902 // On AIX we create a trampoline descriptor by combining the
3903 // entry point and TOC from the global descriptor (FPtr) with the
3904 // nest argument as the environment pointer.
3905 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3906 MaybeAlign PointerAlign(PointerSize);
3907 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3910 : MachineMemOperand::MONone;
3911
3912 uint64_t TOCPointerOffset = 1 * PointerSize;
3913 uint64_t EnvPointerOffset = 2 * PointerSize;
3914 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3915 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3916
3917 const Value *TrampolineAddr =
3918 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3919 const Function *Func =
3920 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3921
3922 SDValue OutChains[3];
3923
3924 // Copy the entry point address from the global descriptor to the
3925 // trampoline buffer.
3926 SDValue LoadEntryPoint =
3927 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3928 PointerAlign, MMOFlags);
3929 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3930 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3931 MachinePointerInfo(TrampolineAddr, 0));
3932
3933 // Copy the TOC pointer from the global descriptor to the trampoline
3934 // buffer.
3935 SDValue TOCFromDescriptorPtr =
3936 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3937 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3938 MachinePointerInfo(Func, TOCPointerOffset),
3939 PointerAlign, MMOFlags);
3940 SDValue TrampolineTOCPointer =
3941 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3942 SDValue TOCLoadChain = TOCReg.getValue(1);
3943 OutChains[1] =
3944 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3945 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3946
3947 // Store the nest argument into the environment pointer in the trampoline
3948 // buffer.
3949 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3950 OutChains[2] =
3951 DAG.getStore(Chain, dl, Nest, EnvPointer,
3952 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3953
3955 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3956 return TokenFactor;
3957 }
3958
3959 bool isPPC64 = (PtrVT == MVT::i64);
3960 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3961
3963 Args.emplace_back(Trmp, IntPtrTy);
3964 // TrampSize == (isPPC64 ? 48 : 40);
3965 Args.emplace_back(
3966 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3967 IntPtrTy);
3968 Args.emplace_back(FPtr, IntPtrTy);
3969 Args.emplace_back(Nest, IntPtrTy);
3970
3971 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3972 TargetLowering::CallLoweringInfo CLI(DAG);
3973 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3975 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3976
3977 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3978 return CallResult.second;
3979}
3980
3981SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3982 MachineFunction &MF = DAG.getMachineFunction();
3983 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3984 EVT PtrVT = getPointerTy(MF.getDataLayout());
3985
3986 SDLoc dl(Op);
3987
3988 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3989 // vastart just stores the address of the VarArgsFrameIndex slot into the
3990 // memory location argument.
3991 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3992 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3993 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3994 MachinePointerInfo(SV));
3995 }
3996
3997 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3998 // We suppose the given va_list is already allocated.
3999 //
4000 // typedef struct {
4001 // char gpr; /* index into the array of 8 GPRs
4002 // * stored in the register save area
4003 // * gpr=0 corresponds to r3,
4004 // * gpr=1 to r4, etc.
4005 // */
4006 // char fpr; /* index into the array of 8 FPRs
4007 // * stored in the register save area
4008 // * fpr=0 corresponds to f1,
4009 // * fpr=1 to f2, etc.
4010 // */
4011 // char *overflow_arg_area;
4012 // /* location on stack that holds
4013 // * the next overflow argument
4014 // */
4015 // char *reg_save_area;
4016 // /* where r3:r10 and f1:f8 (if saved)
4017 // * are stored
4018 // */
4019 // } va_list[1];
4020
4021 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4022 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4023 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4024 PtrVT);
4025 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4026 PtrVT);
4027
4028 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4029 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4030
4031 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4032 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4033
4034 uint64_t FPROffset = 1;
4035 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4036
4037 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4038
4039 // Store first byte : number of int regs
4040 SDValue firstStore =
4041 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4042 MachinePointerInfo(SV), MVT::i8);
4043 uint64_t nextOffset = FPROffset;
4044 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4045 ConstFPROffset);
4046
4047 // Store second byte : number of float regs
4048 SDValue secondStore =
4049 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4050 MachinePointerInfo(SV, nextOffset), MVT::i8);
4051 nextOffset += StackOffset;
4052 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4053
4054 // Store second word : arguments given on stack
4055 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4056 MachinePointerInfo(SV, nextOffset));
4057 nextOffset += FrameOffset;
4058 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4059
4060 // Store third word : arguments given in registers
4061 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4062 MachinePointerInfo(SV, nextOffset));
4063}
4064
4065/// FPR - The set of FP registers that should be allocated for arguments
4066/// on Darwin and AIX.
4067static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4068 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4069 PPC::F11, PPC::F12, PPC::F13};
4070
4071/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4072/// the stack.
4073static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4074 unsigned PtrByteSize) {
4075 unsigned ArgSize = ArgVT.getStoreSize();
4076 if (Flags.isByVal())
4077 ArgSize = Flags.getByValSize();
4078
4079 // Round up to multiples of the pointer size, except for array members,
4080 // which are always packed.
4081 if (!Flags.isInConsecutiveRegs())
4082 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4083
4084 return ArgSize;
4085}
4086
4087/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4088/// on the stack.
4090 ISD::ArgFlagsTy Flags,
4091 unsigned PtrByteSize) {
4092 Align Alignment(PtrByteSize);
4093
4094 // Altivec parameters are padded to a 16 byte boundary.
4095 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4096 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4097 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4098 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4099 Alignment = Align(16);
4100
4101 // ByVal parameters are aligned as requested.
4102 if (Flags.isByVal()) {
4103 auto BVAlign = Flags.getNonZeroByValAlign();
4104 if (BVAlign > PtrByteSize) {
4105 if (BVAlign.value() % PtrByteSize != 0)
4107 "ByVal alignment is not a multiple of the pointer size");
4108
4109 Alignment = BVAlign;
4110 }
4111 }
4112
4113 // Array members are always packed to their original alignment.
4114 if (Flags.isInConsecutiveRegs()) {
4115 // If the array member was split into multiple registers, the first
4116 // needs to be aligned to the size of the full type. (Except for
4117 // ppcf128, which is only aligned as its f64 components.)
4118 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4119 Alignment = Align(OrigVT.getStoreSize());
4120 else
4121 Alignment = Align(ArgVT.getStoreSize());
4122 }
4123
4124 return Alignment;
4125}
4126
4127/// CalculateStackSlotUsed - Return whether this argument will use its
4128/// stack slot (instead of being passed in registers). ArgOffset,
4129/// AvailableFPRs, and AvailableVRs must hold the current argument
4130/// position, and will be updated to account for this argument.
4131static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4132 unsigned PtrByteSize, unsigned LinkageSize,
4133 unsigned ParamAreaSize, unsigned &ArgOffset,
4134 unsigned &AvailableFPRs,
4135 unsigned &AvailableVRs) {
4136 bool UseMemory = false;
4137
4138 // Respect alignment of argument on the stack.
4139 Align Alignment =
4140 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4141 ArgOffset = alignTo(ArgOffset, Alignment);
4142 // If there's no space left in the argument save area, we must
4143 // use memory (this check also catches zero-sized arguments).
4144 if (ArgOffset >= LinkageSize + ParamAreaSize)
4145 UseMemory = true;
4146
4147 // Allocate argument on the stack.
4148 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4149 if (Flags.isInConsecutiveRegsLast())
4150 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4151 // If we overran the argument save area, we must use memory
4152 // (this check catches arguments passed partially in memory)
4153 if (ArgOffset > LinkageSize + ParamAreaSize)
4154 UseMemory = true;
4155
4156 // However, if the argument is actually passed in an FPR or a VR,
4157 // we don't use memory after all.
4158 if (!Flags.isByVal()) {
4159 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4160 if (AvailableFPRs > 0) {
4161 --AvailableFPRs;
4162 return false;
4163 }
4164 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4165 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4166 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4167 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4168 if (AvailableVRs > 0) {
4169 --AvailableVRs;
4170 return false;
4171 }
4172 }
4173
4174 return UseMemory;
4175}
4176
4177/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4178/// ensure minimum alignment required for target.
4180 unsigned NumBytes) {
4181 return alignTo(NumBytes, Lowering->getStackAlign());
4182}
4183
4184SDValue PPCTargetLowering::LowerFormalArguments(
4185 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4186 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4187 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4188 if (Subtarget.isAIXABI())
4189 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4190 InVals);
4191 if (Subtarget.is64BitELFABI())
4192 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4193 InVals);
4194 assert(Subtarget.is32BitELFABI());
4195 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4196 InVals);
4197}
4198
4199SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4200 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4201 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4202 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4203
4204 // 32-bit SVR4 ABI Stack Frame Layout:
4205 // +-----------------------------------+
4206 // +--> | Back chain |
4207 // | +-----------------------------------+
4208 // | | Floating-point register save area |
4209 // | +-----------------------------------+
4210 // | | General register save area |
4211 // | +-----------------------------------+
4212 // | | CR save word |
4213 // | +-----------------------------------+
4214 // | | VRSAVE save word |
4215 // | +-----------------------------------+
4216 // | | Alignment padding |
4217 // | +-----------------------------------+
4218 // | | Vector register save area |
4219 // | +-----------------------------------+
4220 // | | Local variable space |
4221 // | +-----------------------------------+
4222 // | | Parameter list area |
4223 // | +-----------------------------------+
4224 // | | LR save word |
4225 // | +-----------------------------------+
4226 // SP--> +--- | Back chain |
4227 // +-----------------------------------+
4228 //
4229 // Specifications:
4230 // System V Application Binary Interface PowerPC Processor Supplement
4231 // AltiVec Technology Programming Interface Manual
4232
4233 MachineFunction &MF = DAG.getMachineFunction();
4234 MachineFrameInfo &MFI = MF.getFrameInfo();
4235 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4236
4237 EVT PtrVT = getPointerTy(MF.getDataLayout());
4238 // Potential tail calls could cause overwriting of argument stack slots.
4239 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4240 (CallConv == CallingConv::Fast));
4241 const Align PtrAlign(4);
4242
4243 // Assign locations to all of the incoming arguments.
4245 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4246 *DAG.getContext());
4247
4248 // Reserve space for the linkage area on the stack.
4249 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4250 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4251 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4252
4253 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4254 CCValAssign &VA = ArgLocs[i];
4255
4256 // Arguments stored in registers.
4257 if (VA.isRegLoc()) {
4258 const TargetRegisterClass *RC;
4259 EVT ValVT = VA.getValVT();
4260
4261 switch (ValVT.getSimpleVT().SimpleTy) {
4262 default:
4263 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4264 case MVT::i1:
4265 case MVT::i32:
4266 RC = &PPC::GPRCRegClass;
4267 break;
4268 case MVT::f32:
4269 if (Subtarget.hasP8Vector())
4270 RC = &PPC::VSSRCRegClass;
4271 else if (Subtarget.hasSPE())
4272 RC = &PPC::GPRCRegClass;
4273 else
4274 RC = &PPC::F4RCRegClass;
4275 break;
4276 case MVT::f64:
4277 if (Subtarget.hasVSX())
4278 RC = &PPC::VSFRCRegClass;
4279 else if (Subtarget.hasSPE())
4280 // SPE passes doubles in GPR pairs.
4281 RC = &PPC::GPRCRegClass;
4282 else
4283 RC = &PPC::F8RCRegClass;
4284 break;
4285 case MVT::v16i8:
4286 case MVT::v8i16:
4287 case MVT::v4i32:
4288 RC = &PPC::VRRCRegClass;
4289 break;
4290 case MVT::v4f32:
4291 RC = &PPC::VRRCRegClass;
4292 break;
4293 case MVT::v2f64:
4294 case MVT::v2i64:
4295 RC = &PPC::VRRCRegClass;
4296 break;
4297 }
4298
4299 SDValue ArgValue;
4300 // Transform the arguments stored in physical registers into
4301 // virtual ones.
4302 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4303 assert(i + 1 < e && "No second half of double precision argument");
4304 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4305 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4306 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4307 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4308 if (!Subtarget.isLittleEndian())
4309 std::swap (ArgValueLo, ArgValueHi);
4310 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4311 ArgValueHi);
4312 } else {
4313 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4314 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4315 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4316 if (ValVT == MVT::i1)
4317 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4318 }
4319
4320 InVals.push_back(ArgValue);
4321 } else {
4322 // Argument stored in memory.
4323 assert(VA.isMemLoc());
4324
4325 // Get the extended size of the argument type in stack
4326 unsigned ArgSize = VA.getLocVT().getStoreSize();
4327 // Get the actual size of the argument type
4328 unsigned ObjSize = VA.getValVT().getStoreSize();
4329 unsigned ArgOffset = VA.getLocMemOffset();
4330 // Stack objects in PPC32 are right justified.
4331 ArgOffset += ArgSize - ObjSize;
4332 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4333
4334 // Create load nodes to retrieve arguments from the stack.
4335 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4336 InVals.push_back(
4337 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4338 }
4339 }
4340
4341 // Assign locations to all of the incoming aggregate by value arguments.
4342 // Aggregates passed by value are stored in the local variable space of the
4343 // caller's stack frame, right above the parameter list area.
4344 SmallVector<CCValAssign, 16> ByValArgLocs;
4345 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4346 ByValArgLocs, *DAG.getContext());
4347
4348 // Reserve stack space for the allocations in CCInfo.
4349 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4350
4351 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4352
4353 // Area that is at least reserved in the caller of this function.
4354 unsigned MinReservedArea = CCByValInfo.getStackSize();
4355 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4356
4357 // Set the size that is at least reserved in caller of this function. Tail
4358 // call optimized function's reserved stack space needs to be aligned so that
4359 // taking the difference between two stack areas will result in an aligned
4360 // stack.
4361 MinReservedArea =
4362 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4363 FuncInfo->setMinReservedArea(MinReservedArea);
4364
4366
4367 // If the function takes variable number of arguments, make a frame index for
4368 // the start of the first vararg value... for expansion of llvm.va_start.
4369 if (isVarArg) {
4370 static const MCPhysReg GPArgRegs[] = {
4371 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4372 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4373 };
4374 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4375
4376 static const MCPhysReg FPArgRegs[] = {
4377 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4378 PPC::F8
4379 };
4380 unsigned NumFPArgRegs = std::size(FPArgRegs);
4381
4382 if (useSoftFloat() || hasSPE())
4383 NumFPArgRegs = 0;
4384
4385 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4386 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4387
4388 // Make room for NumGPArgRegs and NumFPArgRegs.
4389 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4390 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4391
4393 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4394
4395 FuncInfo->setVarArgsFrameIndex(
4396 MFI.CreateStackObject(Depth, Align(8), false));
4397 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4398
4399 // The fixed integer arguments of a variadic function are stored to the
4400 // VarArgsFrameIndex on the stack so that they may be loaded by
4401 // dereferencing the result of va_next.
4402 for (MCPhysReg GPArgReg : GPArgRegs) {
4403 // Get an existing live-in vreg, or add a new one.
4404 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4405 if (!VReg)
4406 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4407
4408 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4409 SDValue Store =
4410 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4411 MemOps.push_back(Store);
4412 // Increment the address by four for the next argument to store
4413 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4414 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4415 }
4416
4417 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4418 // is set.
4419 // The double arguments are stored to the VarArgsFrameIndex
4420 // on the stack.
4421 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4422 // Get an existing live-in vreg, or add a new one.
4423 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4424 if (!VReg)
4425 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4426
4427 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4428 SDValue Store =
4429 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4430 MemOps.push_back(Store);
4431 // Increment the address by eight for the next argument to store
4432 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4433 PtrVT);
4434 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4435 }
4436 }
4437
4438 if (!MemOps.empty())
4439 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4440
4441 return Chain;
4442}
4443
4444// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4445// value to MVT::i64 and then truncate to the correct register size.
4446SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4447 EVT ObjectVT, SelectionDAG &DAG,
4448 SDValue ArgVal,
4449 const SDLoc &dl) const {
4450 if (Flags.isSExt())
4451 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4452 DAG.getValueType(ObjectVT));
4453 else if (Flags.isZExt())
4454 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4455 DAG.getValueType(ObjectVT));
4456
4457 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4458}
4459
4460SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4461 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4462 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4463 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4464 // TODO: add description of PPC stack frame format, or at least some docs.
4465 //
4466 bool isELFv2ABI = Subtarget.isELFv2ABI();
4467 bool isLittleEndian = Subtarget.isLittleEndian();
4468 MachineFunction &MF = DAG.getMachineFunction();
4469 MachineFrameInfo &MFI = MF.getFrameInfo();
4470 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4471
4472 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4473 "fastcc not supported on varargs functions");
4474
4475 EVT PtrVT = getPointerTy(MF.getDataLayout());
4476 // Potential tail calls could cause overwriting of argument stack slots.
4477 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4478 (CallConv == CallingConv::Fast));
4479 unsigned PtrByteSize = 8;
4480 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4481
4482 static const MCPhysReg GPR[] = {
4483 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4484 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4485 };
4486 static const MCPhysReg VR[] = {
4487 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4488 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4489 };
4490
4491 const unsigned Num_GPR_Regs = std::size(GPR);
4492 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4493 const unsigned Num_VR_Regs = std::size(VR);
4494
4495 // Do a first pass over the arguments to determine whether the ABI
4496 // guarantees that our caller has allocated the parameter save area
4497 // on its stack frame. In the ELFv1 ABI, this is always the case;
4498 // in the ELFv2 ABI, it is true if this is a vararg function or if
4499 // any parameter is located in a stack slot.
4500
4501 bool HasParameterArea = !isELFv2ABI || isVarArg;
4502 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4503 unsigned NumBytes = LinkageSize;
4504 unsigned AvailableFPRs = Num_FPR_Regs;
4505 unsigned AvailableVRs = Num_VR_Regs;
4506 for (const ISD::InputArg &In : Ins) {
4507 if (In.Flags.isNest())
4508 continue;
4509
4510 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4511 LinkageSize, ParamAreaSize, NumBytes,
4512 AvailableFPRs, AvailableVRs))
4513 HasParameterArea = true;
4514 }
4515
4516 // Add DAG nodes to load the arguments or copy them out of registers. On
4517 // entry to a function on PPC, the arguments start after the linkage area,
4518 // although the first ones are often in registers.
4519
4520 unsigned ArgOffset = LinkageSize;
4521 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4524 unsigned CurArgIdx = 0;
4525 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4526 SDValue ArgVal;
4527 bool needsLoad = false;
4528 EVT ObjectVT = Ins[ArgNo].VT;
4529 EVT OrigVT = Ins[ArgNo].ArgVT;
4530 unsigned ObjSize = ObjectVT.getStoreSize();
4531 unsigned ArgSize = ObjSize;
4532 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4533 if (Ins[ArgNo].isOrigArg()) {
4534 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4535 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4536 }
4537 // We re-align the argument offset for each argument, except when using the
4538 // fast calling convention, when we need to make sure we do that only when
4539 // we'll actually use a stack slot.
4540 unsigned CurArgOffset;
4541 Align Alignment;
4542 auto ComputeArgOffset = [&]() {
4543 /* Respect alignment of argument on the stack. */
4544 Alignment =
4545 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4546 ArgOffset = alignTo(ArgOffset, Alignment);
4547 CurArgOffset = ArgOffset;
4548 };
4549
4550 if (CallConv != CallingConv::Fast) {
4551 ComputeArgOffset();
4552
4553 /* Compute GPR index associated with argument offset. */
4554 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4555 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4556 }
4557
4558 // FIXME the codegen can be much improved in some cases.
4559 // We do not have to keep everything in memory.
4560 if (Flags.isByVal()) {
4561 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4562
4563 if (CallConv == CallingConv::Fast)
4564 ComputeArgOffset();
4565
4566 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4567 ObjSize = Flags.getByValSize();
4568 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4569 // Empty aggregate parameters do not take up registers. Examples:
4570 // struct { } a;
4571 // union { } b;
4572 // int c[0];
4573 // etc. However, we have to provide a place-holder in InVals, so
4574 // pretend we have an 8-byte item at the current address for that
4575 // purpose.
4576 if (!ObjSize) {
4577 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4578 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4579 InVals.push_back(FIN);
4580 continue;
4581 }
4582
4583 // Create a stack object covering all stack doublewords occupied
4584 // by the argument. If the argument is (fully or partially) on
4585 // the stack, or if the argument is fully in registers but the
4586 // caller has allocated the parameter save anyway, we can refer
4587 // directly to the caller's stack frame. Otherwise, create a
4588 // local copy in our own frame.
4589 int FI;
4590 if (HasParameterArea ||
4591 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4592 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4593 else
4594 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4595 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4596
4597 // Handle aggregates smaller than 8 bytes.
4598 if (ObjSize < PtrByteSize) {
4599 // The value of the object is its address, which differs from the
4600 // address of the enclosing doubleword on big-endian systems.
4601 SDValue Arg = FIN;
4602 if (!isLittleEndian) {
4603 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4604 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4605 }
4606 InVals.push_back(Arg);
4607
4608 if (GPR_idx != Num_GPR_Regs) {
4609 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4610 FuncInfo->addLiveInAttr(VReg, Flags);
4611 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4612 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4613 SDValue Store =
4614 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4615 MachinePointerInfo(&*FuncArg), ObjType);
4616 MemOps.push_back(Store);
4617 }
4618 // Whether we copied from a register or not, advance the offset
4619 // into the parameter save area by a full doubleword.
4620 ArgOffset += PtrByteSize;
4621 continue;
4622 }
4623
4624 // The value of the object is its address, which is the address of
4625 // its first stack doubleword.
4626 InVals.push_back(FIN);
4627
4628 // Store whatever pieces of the object are in registers to memory.
4629 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4630 if (GPR_idx == Num_GPR_Regs)
4631 break;
4632
4633 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4634 FuncInfo->addLiveInAttr(VReg, Flags);
4635 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4636 SDValue Addr = FIN;
4637 if (j) {
4638 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4639 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4640 }
4641 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4642 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4643 SDValue Store =
4644 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4645 MachinePointerInfo(&*FuncArg, j), ObjType);
4646 MemOps.push_back(Store);
4647 ++GPR_idx;
4648 }
4649 ArgOffset += ArgSize;
4650 continue;
4651 }
4652
4653 switch (ObjectVT.getSimpleVT().SimpleTy) {
4654 default: llvm_unreachable("Unhandled argument type!");
4655 case MVT::i1:
4656 case MVT::i32:
4657 case MVT::i64:
4658 if (Flags.isNest()) {
4659 // The 'nest' parameter, if any, is passed in R11.
4660 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4661 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4662
4663 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4664 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4665
4666 break;
4667 }
4668
4669 // These can be scalar arguments or elements of an integer array type
4670 // passed directly. Clang may use those instead of "byval" aggregate
4671 // types to avoid forcing arguments to memory unnecessarily.
4672 if (GPR_idx != Num_GPR_Regs) {
4673 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4674 FuncInfo->addLiveInAttr(VReg, Flags);
4675 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4676
4677 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4678 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4679 // value to MVT::i64 and then truncate to the correct register size.
4680 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4681 } else {
4682 if (CallConv == CallingConv::Fast)
4683 ComputeArgOffset();
4684
4685 needsLoad = true;
4686 ArgSize = PtrByteSize;
4687 }
4688 if (CallConv != CallingConv::Fast || needsLoad)
4689 ArgOffset += 8;
4690 break;
4691
4692 case MVT::f32:
4693 case MVT::f64:
4694 // These can be scalar arguments or elements of a float array type
4695 // passed directly. The latter are used to implement ELFv2 homogenous
4696 // float aggregates.
4697 if (FPR_idx != Num_FPR_Regs) {
4698 unsigned VReg;
4699
4700 if (ObjectVT == MVT::f32)
4701 VReg = MF.addLiveIn(FPR[FPR_idx],
4702 Subtarget.hasP8Vector()
4703 ? &PPC::VSSRCRegClass
4704 : &PPC::F4RCRegClass);
4705 else
4706 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4707 ? &PPC::VSFRCRegClass
4708 : &PPC::F8RCRegClass);
4709
4710 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4711 ++FPR_idx;
4712 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4713 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4714 // once we support fp <-> gpr moves.
4715
4716 // This can only ever happen in the presence of f32 array types,
4717 // since otherwise we never run out of FPRs before running out
4718 // of GPRs.
4719 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4720 FuncInfo->addLiveInAttr(VReg, Flags);
4721 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4722
4723 if (ObjectVT == MVT::f32) {
4724 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4725 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4726 DAG.getConstant(32, dl, MVT::i32));
4727 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4728 }
4729
4730 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4731 } else {
4732 if (CallConv == CallingConv::Fast)
4733 ComputeArgOffset();
4734
4735 needsLoad = true;
4736 }
4737
4738 // When passing an array of floats, the array occupies consecutive
4739 // space in the argument area; only round up to the next doubleword
4740 // at the end of the array. Otherwise, each float takes 8 bytes.
4741 if (CallConv != CallingConv::Fast || needsLoad) {
4742 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4743 ArgOffset += ArgSize;
4744 if (Flags.isInConsecutiveRegsLast())
4745 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4746 }
4747 break;
4748 case MVT::v4f32:
4749 case MVT::v4i32:
4750 case MVT::v8i16:
4751 case MVT::v16i8:
4752 case MVT::v2f64:
4753 case MVT::v2i64:
4754 case MVT::v1i128:
4755 case MVT::f128:
4756 // These can be scalar arguments or elements of a vector array type
4757 // passed directly. The latter are used to implement ELFv2 homogenous
4758 // vector aggregates.
4759 if (VR_idx != Num_VR_Regs) {
4760 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4761 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4762 ++VR_idx;
4763 } else {
4764 if (CallConv == CallingConv::Fast)
4765 ComputeArgOffset();
4766 needsLoad = true;
4767 }
4768 if (CallConv != CallingConv::Fast || needsLoad)
4769 ArgOffset += 16;
4770 break;
4771 }
4772
4773 // We need to load the argument to a virtual register if we determined
4774 // above that we ran out of physical registers of the appropriate type.
4775 if (needsLoad) {
4776 if (ObjSize < ArgSize && !isLittleEndian)
4777 CurArgOffset += ArgSize - ObjSize;
4778 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4779 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4780 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4781 }
4782
4783 InVals.push_back(ArgVal);
4784 }
4785
4786 // Area that is at least reserved in the caller of this function.
4787 unsigned MinReservedArea;
4788 if (HasParameterArea)
4789 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4790 else
4791 MinReservedArea = LinkageSize;
4792
4793 // Set the size that is at least reserved in caller of this function. Tail
4794 // call optimized functions' reserved stack space needs to be aligned so that
4795 // taking the difference between two stack areas will result in an aligned
4796 // stack.
4797 MinReservedArea =
4798 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4799 FuncInfo->setMinReservedArea(MinReservedArea);
4800
4801 // If the function takes variable number of arguments, make a frame index for
4802 // the start of the first vararg value... for expansion of llvm.va_start.
4803 // On ELFv2ABI spec, it writes:
4804 // C programs that are intended to be *portable* across different compilers
4805 // and architectures must use the header file <stdarg.h> to deal with variable
4806 // argument lists.
4807 if (isVarArg && MFI.hasVAStart()) {
4808 int Depth = ArgOffset;
4809
4810 FuncInfo->setVarArgsFrameIndex(
4811 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4812 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4813
4814 // If this function is vararg, store any remaining integer argument regs
4815 // to their spots on the stack so that they may be loaded by dereferencing
4816 // the result of va_next.
4817 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4818 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4819 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4820 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4821 SDValue Store =
4822 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4823 MemOps.push_back(Store);
4824 // Increment the address by four for the next argument to store
4825 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4826 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4827 }
4828 }
4829
4830 if (!MemOps.empty())
4831 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4832
4833 return Chain;
4834}
4835
4836/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4837/// adjusted to accommodate the arguments for the tailcall.
4838static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4839 unsigned ParamSize) {
4840
4841 if (!isTailCall) return 0;
4842
4844 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4845 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4846 // Remember only if the new adjustment is bigger.
4847 if (SPDiff < FI->getTailCallSPDelta())
4848 FI->setTailCallSPDelta(SPDiff);
4849
4850 return SPDiff;
4851}
4852
4853static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4854
4855static bool callsShareTOCBase(const Function *Caller,
4856 const GlobalValue *CalleeGV,
4857 const TargetMachine &TM) {
4858 // It does not make sense to call callsShareTOCBase() with a caller that
4859 // is PC Relative since PC Relative callers do not have a TOC.
4860#ifndef NDEBUG
4861 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4862 assert(!STICaller->isUsingPCRelativeCalls() &&
4863 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4864#endif
4865
4866 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4867 // don't have enough information to determine if the caller and callee share
4868 // the same TOC base, so we have to pessimistically assume they don't for
4869 // correctness.
4870 if (!CalleeGV)
4871 return false;
4872
4873 // If the callee is preemptable, then the static linker will use a plt-stub
4874 // which saves the toc to the stack, and needs a nop after the call
4875 // instruction to convert to a toc-restore.
4876 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4877 return false;
4878
4879 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4880 // We may need a TOC restore in the situation where the caller requires a
4881 // valid TOC but the callee is PC Relative and does not.
4882 const Function *F = dyn_cast<Function>(CalleeGV);
4883 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4884
4885 // If we have an Alias we can try to get the function from there.
4886 if (Alias) {
4887 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4888 F = dyn_cast<Function>(GlobalObj);
4889 }
4890
4891 // If we still have no valid function pointer we do not have enough
4892 // information to determine if the callee uses PC Relative calls so we must
4893 // assume that it does.
4894 if (!F)
4895 return false;
4896
4897 // If the callee uses PC Relative we cannot guarantee that the callee won't
4898 // clobber the TOC of the caller and so we must assume that the two
4899 // functions do not share a TOC base.
4900 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4901 if (STICallee->isUsingPCRelativeCalls())
4902 return false;
4903
4904 // If the GV is not a strong definition then we need to assume it can be
4905 // replaced by another function at link time. The function that replaces
4906 // it may not share the same TOC as the caller since the callee may be
4907 // replaced by a PC Relative version of the same function.
4908 if (!CalleeGV->isStrongDefinitionForLinker())
4909 return false;
4910
4911 // The medium and large code models are expected to provide a sufficiently
4912 // large TOC to provide all data addressing needs of a module with a
4913 // single TOC.
4914 if (CodeModel::Medium == TM.getCodeModel() ||
4916 return true;
4917
4918 // Any explicitly-specified sections and section prefixes must also match.
4919 // Also, if we're using -ffunction-sections, then each function is always in
4920 // a different section (the same is true for COMDAT functions).
4921 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4922 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4923 return false;
4924 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4925 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4926 return false;
4927 }
4928
4929 return true;
4930}
4931
4932static bool
4934 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4935 assert(Subtarget.is64BitELFABI());
4936
4937 const unsigned PtrByteSize = 8;
4938 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4939
4940 static const MCPhysReg GPR[] = {
4941 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4942 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4943 };
4944 static const MCPhysReg VR[] = {
4945 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4946 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4947 };
4948
4949 const unsigned NumGPRs = std::size(GPR);
4950 const unsigned NumFPRs = 13;
4951 const unsigned NumVRs = std::size(VR);
4952 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4953
4954 unsigned NumBytes = LinkageSize;
4955 unsigned AvailableFPRs = NumFPRs;
4956 unsigned AvailableVRs = NumVRs;
4957
4958 for (const ISD::OutputArg& Param : Outs) {
4959 if (Param.Flags.isNest()) continue;
4960
4961 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4962 LinkageSize, ParamAreaSize, NumBytes,
4963 AvailableFPRs, AvailableVRs))
4964 return true;
4965 }
4966 return false;
4967}
4968
4969static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4970 if (CB.arg_size() != CallerFn->arg_size())
4971 return false;
4972
4973 auto CalleeArgIter = CB.arg_begin();
4974 auto CalleeArgEnd = CB.arg_end();
4975 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4976
4977 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4978 const Value* CalleeArg = *CalleeArgIter;
4979 const Value* CallerArg = &(*CallerArgIter);
4980 if (CalleeArg == CallerArg)
4981 continue;
4982
4983 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4984 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4985 // }
4986 // 1st argument of callee is undef and has the same type as caller.
4987 if (CalleeArg->getType() == CallerArg->getType() &&
4988 isa<UndefValue>(CalleeArg))
4989 continue;
4990
4991 return false;
4992 }
4993
4994 return true;
4995}
4996
4997// Returns true if TCO is possible between the callers and callees
4998// calling conventions.
4999static bool
5001 CallingConv::ID CalleeCC) {
5002 // Tail calls are possible with fastcc and ccc.
5003 auto isTailCallableCC = [] (CallingConv::ID CC){
5004 return CC == CallingConv::C || CC == CallingConv::Fast;
5005 };
5006 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5007 return false;
5008
5009 // We can safely tail call both fastcc and ccc callees from a c calling
5010 // convention caller. If the caller is fastcc, we may have less stack space
5011 // than a non-fastcc caller with the same signature so disable tail-calls in
5012 // that case.
5013 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5014}
5015
5016bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5017 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5018 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5020 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5021 bool isCalleeExternalSymbol) const {
5022 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5023
5024 if (DisableSCO && !TailCallOpt) return false;
5025
5026 // Variadic argument functions are not supported.
5027 if (isVarArg) return false;
5028
5029 // Check that the calling conventions are compatible for tco.
5030 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5031 return false;
5032
5033 // Caller contains any byval parameter is not supported.
5034 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5035 return false;
5036
5037 // Callee contains any byval parameter is not supported, too.
5038 // Note: This is a quick work around, because in some cases, e.g.
5039 // caller's stack size > callee's stack size, we are still able to apply
5040 // sibling call optimization. For example, gcc is able to do SCO for caller1
5041 // in the following example, but not for caller2.
5042 // struct test {
5043 // long int a;
5044 // char ary[56];
5045 // } gTest;
5046 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5047 // b->a = v.a;
5048 // return 0;
5049 // }
5050 // void caller1(struct test a, struct test c, struct test *b) {
5051 // callee(gTest, b); }
5052 // void caller2(struct test *b) { callee(gTest, b); }
5053 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5054 return false;
5055
5056 // If callee and caller use different calling conventions, we cannot pass
5057 // parameters on stack since offsets for the parameter area may be different.
5058 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5059 return false;
5060
5061 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5062 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5063 // callee potentially have different TOC bases then we cannot tail call since
5064 // we need to restore the TOC pointer after the call.
5065 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5066 // We cannot guarantee this for indirect calls or calls to external functions.
5067 // When PC-Relative addressing is used, the concept of the TOC is no longer
5068 // applicable so this check is not required.
5069 // Check first for indirect calls.
5070 if (!Subtarget.isUsingPCRelativeCalls() &&
5071 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5072 return false;
5073
5074 // Check if we share the TOC base.
5075 if (!Subtarget.isUsingPCRelativeCalls() &&
5076 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5077 return false;
5078
5079 // TCO allows altering callee ABI, so we don't have to check further.
5080 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5081 return true;
5082
5083 if (DisableSCO) return false;
5084
5085 // If callee use the same argument list that caller is using, then we can
5086 // apply SCO on this case. If it is not, then we need to check if callee needs
5087 // stack for passing arguments.
5088 // PC Relative tail calls may not have a CallBase.
5089 // If there is no CallBase we cannot verify if we have the same argument
5090 // list so assume that we don't have the same argument list.
5091 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5092 needStackSlotPassParameters(Subtarget, Outs))
5093 return false;
5094 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5095 return false;
5096
5097 return true;
5098}
5099
5100/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5101/// for tail call optimization. Targets which want to do tail call
5102/// optimization should implement this function.
5103bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5104 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5105 CallingConv::ID CallerCC, bool isVarArg,
5106 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5107 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5108 return false;
5109
5110 // Variable argument functions are not supported.
5111 if (isVarArg)
5112 return false;
5113
5114 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5115 // Functions containing by val parameters are not supported.
5116 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5117 return false;
5118
5119 // Non-PIC/GOT tail calls are supported.
5120 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5121 return true;
5122
5123 // At the moment we can only do local tail calls (in same module, hidden
5124 // or protected) if we are generating PIC.
5125 if (CalleeGV)
5126 return CalleeGV->hasHiddenVisibility() ||
5127 CalleeGV->hasProtectedVisibility();
5128 }
5129
5130 return false;
5131}
5132
5133/// isCallCompatibleAddress - Return the immediate to use if the specified
5134/// 32-bit value is representable in the immediate field of a BxA instruction.
5137 if (!C) return nullptr;
5138
5139 int Addr = C->getZExtValue();
5140 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5141 SignExtend32<26>(Addr) != Addr)
5142 return nullptr; // Top 6 bits have to be sext of immediate.
5143
5144 return DAG
5146 (int)C->getZExtValue() >> 2, SDLoc(Op),
5148 .getNode();
5149}
5150
5151namespace {
5152
5153struct TailCallArgumentInfo {
5154 SDValue Arg;
5155 SDValue FrameIdxOp;
5156 int FrameIdx = 0;
5157
5158 TailCallArgumentInfo() = default;
5159};
5160
5161} // end anonymous namespace
5162
5163/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5165 SelectionDAG &DAG, SDValue Chain,
5166 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5167 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5168 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5169 SDValue Arg = TailCallArgs[i].Arg;
5170 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5171 int FI = TailCallArgs[i].FrameIdx;
5172 // Store relative to framepointer.
5173 MemOpChains.push_back(DAG.getStore(
5174 Chain, dl, Arg, FIN,
5176 }
5177}
5178
5179/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5180/// the appropriate stack slot for the tail call optimized function call.
5182 SDValue OldRetAddr, SDValue OldFP,
5183 int SPDiff, const SDLoc &dl) {
5184 if (SPDiff) {
5185 // Calculate the new stack slot for the return address.
5187 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5188 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5189 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5190 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5191 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5192 NewRetAddrLoc, true);
5193 SDValue NewRetAddrFrIdx =
5194 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5195 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5196 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5197 }
5198 return Chain;
5199}
5200
5201/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5202/// the position of the argument.
5204 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5205 int SPDiff, unsigned ArgOffset,
5206 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5207 int Offset = ArgOffset + SPDiff;
5208 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5209 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5210 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5211 SDValue FIN = DAG.getFrameIndex(FI, VT);
5212 TailCallArgumentInfo Info;
5213 Info.Arg = Arg;
5214 Info.FrameIdxOp = FIN;
5215 Info.FrameIdx = FI;
5216 TailCallArguments.push_back(Info);
5217}
5218
5219/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5220/// stack slot. Returns the chain as result and the loaded frame pointers in
5221/// LROpOut/FPOpout. Used when tail calling.
5222SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5223 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5224 SDValue &FPOpOut, const SDLoc &dl) const {
5225 if (SPDiff) {
5226 // Load the LR and FP stack slot for later adjusting.
5227 LROpOut = getReturnAddrFrameIndex(DAG);
5228 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5229 MachinePointerInfo());
5230 Chain = SDValue(LROpOut.getNode(), 1);
5231 }
5232 return Chain;
5233}
5234
5235/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5236/// by "Src" to address "Dst" of size "Size". Alignment information is
5237/// specified by the specific parameter attribute. The copy will be passed as
5238/// a byval function parameter.
5239/// Sometimes what we are copying is the end of a larger object, the part that
5240/// does not fit in registers.
5242 SDValue Chain, ISD::ArgFlagsTy Flags,
5243 SelectionDAG &DAG, const SDLoc &dl) {
5244 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5245 Align Alignment = Flags.getNonZeroByValAlign();
5246 return DAG.getMemcpy(
5247 Chain, dl, Dst, Src, SizeNode, Alignment, Alignment, false, false,
5248 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5249}
5250
5251/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5252/// tail calls.
5254 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5255 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5256 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5257 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5259 if (!isTailCall) {
5260 if (isVector) {
5261 SDValue StackPtr;
5262 if (isPPC64)
5263 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5264 else
5265 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5266 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5267 DAG.getConstant(ArgOffset, dl, PtrVT));
5268 }
5269 MemOpChains.push_back(
5270 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5271 // Calculate and remember argument location.
5272 } else
5273 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5274 TailCallArguments);
5275}
5276
5277static void
5279 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5280 SDValue FPOp,
5281 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5282 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5283 // might overwrite each other in case of tail call optimization.
5284 SmallVector<SDValue, 8> MemOpChains2;
5285 // Do not flag preceding copytoreg stuff together with the following stuff.
5286 InGlue = SDValue();
5287 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5288 MemOpChains2, dl);
5289 if (!MemOpChains2.empty())
5290 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5291
5292 // Store the return address to the appropriate stack slot.
5293 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5294
5295 // Emit callseq_end just before tailcall node.
5296 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5297 InGlue = Chain.getValue(1);
5298}
5299
5300// Is this global address that of a function that can be called by name? (as
5301// opposed to something that must hold a descriptor for an indirect call).
5302static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5303 if (GV) {
5304 if (GV->isThreadLocal())
5305 return false;
5306
5307 return GV->getValueType()->isFunctionTy();
5308 }
5309
5310 return false;
5311}
5312
5313SDValue PPCTargetLowering::LowerCallResult(
5314 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5315 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5316 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5318 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5319 *DAG.getContext());
5320
5321 CCRetInfo.AnalyzeCallResult(
5322 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5324 : RetCC_PPC);
5325
5326 // Copy all of the result registers out of their specified physreg.
5327 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5328 CCValAssign &VA = RVLocs[i];
5329 assert(VA.isRegLoc() && "Can only return in registers!");
5330
5331 SDValue Val;
5332
5333 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5334 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5335 InGlue);
5336 Chain = Lo.getValue(1);
5337 InGlue = Lo.getValue(2);
5338 VA = RVLocs[++i]; // skip ahead to next loc
5339 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5340 InGlue);
5341 Chain = Hi.getValue(1);
5342 InGlue = Hi.getValue(2);
5343 if (!Subtarget.isLittleEndian())
5344 std::swap (Lo, Hi);
5345 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5346 } else {
5347 Val = DAG.getCopyFromReg(Chain, dl,
5348 VA.getLocReg(), VA.getLocVT(), InGlue);
5349 Chain = Val.getValue(1);
5350 InGlue = Val.getValue(2);
5351 }
5352
5353 switch (VA.getLocInfo()) {
5354 default: llvm_unreachable("Unknown loc info!");
5355 case CCValAssign::Full: break;
5356 case CCValAssign::AExt:
5357 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5358 break;
5359 case CCValAssign::ZExt:
5360 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5361 DAG.getValueType(VA.getValVT()));
5362 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5363 break;
5364 case CCValAssign::SExt:
5365 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5366 DAG.getValueType(VA.getValVT()));
5367 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5368 break;
5369 }
5370
5371 InVals.push_back(Val);
5372 }
5373
5374 return Chain;
5375}
5376
5377static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5378 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5379 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5380 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5381
5382 // PatchPoint calls are not indirect.
5383 if (isPatchPoint)
5384 return false;
5385
5387 return false;
5388
5389 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5390 // becuase the immediate function pointer points to a descriptor instead of
5391 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5392 // pointer immediate points to the global entry point, while the BLA would
5393 // need to jump to the local entry point (see rL211174).
5394 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5395 isBLACompatibleAddress(Callee, DAG))
5396 return false;
5397
5398 return true;
5399}
5400
5401// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5402static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5403 return Subtarget.isAIXABI() ||
5404 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5405}
5406
5408 const Function &Caller, const SDValue &Callee,
5409 const PPCSubtarget &Subtarget,
5410 const TargetMachine &TM,
5411 bool IsStrictFPCall = false) {
5412 if (CFlags.IsTailCall)
5413 return PPCISD::TC_RETURN;
5414
5415 unsigned RetOpc = 0;
5416 // This is a call through a function pointer.
5417 if (CFlags.IsIndirect) {
5418 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5419 // indirect calls. The save of the caller's TOC pointer to the stack will be
5420 // inserted into the DAG as part of call lowering. The restore of the TOC
5421 // pointer is modeled by using a pseudo instruction for the call opcode that
5422 // represents the 2 instruction sequence of an indirect branch and link,
5423 // immediately followed by a load of the TOC pointer from the stack save
5424 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5425 // as it is not saved or used.
5426 if (Subtarget.usePointerGlueHelper())
5427 RetOpc = PPCISD::BL_LOAD_TOC;
5428 else
5429 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5430 : PPCISD::BCTRL;
5431 } else if (Subtarget.isUsingPCRelativeCalls()) {
5432 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5433 RetOpc = PPCISD::CALL_NOTOC;
5434 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5435 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5436 // immediately following the call instruction if the caller and callee may
5437 // have different TOC bases. At link time if the linker determines the calls
5438 // may not share a TOC base, the call is redirected to a trampoline inserted
5439 // by the linker. The trampoline will (among other things) save the callers
5440 // TOC pointer at an ABI designated offset in the linkage area and the
5441 // linker will rewrite the nop to be a load of the TOC pointer from the
5442 // linkage area into gpr2.
5443 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5444 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5445 RetOpc =
5446 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5447 } else
5448 RetOpc = PPCISD::CALL;
5449 if (IsStrictFPCall) {
5450 switch (RetOpc) {
5451 default:
5452 llvm_unreachable("Unknown call opcode");
5453 case PPCISD::BCTRL_LOAD_TOC:
5454 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5455 break;
5456 case PPCISD::BCTRL:
5457 RetOpc = PPCISD::BCTRL_RM;
5458 break;
5459 case PPCISD::BL_LOAD_TOC:
5460 RetOpc = PPCISD::BL_LOAD_TOC_RM;
5461 break;
5462 case PPCISD::CALL_NOTOC:
5463 RetOpc = PPCISD::CALL_NOTOC_RM;
5464 break;
5465 case PPCISD::CALL:
5466 RetOpc = PPCISD::CALL_RM;
5467 break;
5468 case PPCISD::CALL_NOP:
5469 RetOpc = PPCISD::CALL_NOP_RM;
5470 break;
5471 }
5472 }
5473 return RetOpc;
5474}
5475
5476static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5477 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5478 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5479 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5480 return SDValue(Dest, 0);
5481
5482 // Returns true if the callee is local, and false otherwise.
5483 auto isLocalCallee = [&]() {
5485 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5486
5487 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5489 };
5490
5491 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5492 // a static relocation model causes some versions of GNU LD (2.17.50, at
5493 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5494 // built with secure-PLT.
5495 bool UsePlt =
5496 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5498
5499 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5500 const TargetMachine &TM = Subtarget.getTargetMachine();
5502 auto *S =
5503 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5504
5506 return DAG.getMCSymbol(S, PtrVT);
5507 };
5508
5509 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5510 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5511 if (isFunctionGlobalAddress(GV)) {
5512 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5513
5514 if (Subtarget.isAIXABI()) {
5515 return getAIXFuncEntryPointSymbolSDNode(GV);
5516 }
5517 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5518 UsePlt ? PPCII::MO_PLT : 0);
5519 }
5520
5522 const char *SymName = S->getSymbol();
5523 if (Subtarget.isAIXABI()) {
5524 // If there exists a user-declared function whose name is the same as the
5525 // ExternalSymbol's, then we pick up the user-declared version.
5527 if (const Function *F =
5528 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5529 return getAIXFuncEntryPointSymbolSDNode(F);
5530
5531 // On AIX, direct function calls reference the symbol for the function's
5532 // entry point, which is named by prepending a "." before the function's
5533 // C-linkage name. A Qualname is returned here because an external
5534 // function entry point is a csect with XTY_ER property.
5535 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5536 auto &Context = DAG.getMachineFunction().getContext();
5537 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5538 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5540 return Sec->getQualNameSymbol();
5541 };
5542
5543 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5544 }
5545 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5546 UsePlt ? PPCII::MO_PLT : 0);
5547 }
5548
5549 // No transformation needed.
5550 assert(Callee.getNode() && "What no callee?");
5551 return Callee;
5552}
5553
5555 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5556 "Expected a CALLSEQ_STARTSDNode.");
5557
5558 // The last operand is the chain, except when the node has glue. If the node
5559 // has glue, then the last operand is the glue, and the chain is the second
5560 // last operand.
5561 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5562 if (LastValue.getValueType() != MVT::Glue)
5563 return LastValue;
5564
5565 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5566}
5567
5568// Creates the node that moves a functions address into the count register
5569// to prepare for an indirect call instruction.
5570static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5571 SDValue &Glue, SDValue &Chain,
5572 const SDLoc &dl) {
5573 SDValue MTCTROps[] = {Chain, Callee, Glue};
5574 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5575 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5576 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5577 // The glue is the second value produced.
5578 Glue = Chain.getValue(1);
5579}
5580
5582 SDValue &Glue, SDValue &Chain,
5583 SDValue CallSeqStart,
5584 const CallBase *CB, const SDLoc &dl,
5585 bool hasNest,
5586 const PPCSubtarget &Subtarget) {
5587 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5588 // entry point, but to the function descriptor (the function entry point
5589 // address is part of the function descriptor though).
5590 // The function descriptor is a three doubleword structure with the
5591 // following fields: function entry point, TOC base address and
5592 // environment pointer.
5593 // Thus for a call through a function pointer, the following actions need
5594 // to be performed:
5595 // 1. Save the TOC of the caller in the TOC save area of its stack
5596 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5597 // 2. Load the address of the function entry point from the function
5598 // descriptor.
5599 // 3. Load the TOC of the callee from the function descriptor into r2.
5600 // 4. Load the environment pointer from the function descriptor into
5601 // r11.
5602 // 5. Branch to the function entry point address.
5603 // 6. On return of the callee, the TOC of the caller needs to be
5604 // restored (this is done in FinishCall()).
5605 //
5606 // The loads are scheduled at the beginning of the call sequence, and the
5607 // register copies are flagged together to ensure that no other
5608 // operations can be scheduled in between. E.g. without flagging the
5609 // copies together, a TOC access in the caller could be scheduled between
5610 // the assignment of the callee TOC and the branch to the callee, which leads
5611 // to incorrect code.
5612
5613 // Start by loading the function address from the descriptor.
5614 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5615 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5619
5620 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5621
5622 // Registers used in building the DAG.
5623 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5624 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5625
5626 // Offsets of descriptor members.
5627 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5628 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5629
5630 const MVT RegVT = Subtarget.getScalarIntVT();
5631 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5632
5633 // One load for the functions entry point address.
5634 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5635 Alignment, MMOFlags);
5636
5637 // One for loading the TOC anchor for the module that contains the called
5638 // function.
5639 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5640 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5641 SDValue TOCPtr =
5642 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5643 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5644
5645 // One for loading the environment pointer.
5646 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5647 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5648 SDValue LoadEnvPtr =
5649 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5650 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5651
5652
5653 // Then copy the newly loaded TOC anchor to the TOC pointer.
5654 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5655 Chain = TOCVal.getValue(0);
5656 Glue = TOCVal.getValue(1);
5657
5658 // If the function call has an explicit 'nest' parameter, it takes the
5659 // place of the environment pointer.
5660 assert((!hasNest || !Subtarget.isAIXABI()) &&
5661 "Nest parameter is not supported on AIX.");
5662 if (!hasNest) {
5663 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5664 Chain = EnvVal.getValue(0);
5665 Glue = EnvVal.getValue(1);
5666 }
5667
5668 // The rest of the indirect call sequence is the same as the non-descriptor
5669 // DAG.
5670 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5671}
5672
5674 SDValue &Glue, SDValue &Chain,
5675 SDValue CallSeqStart, const CallBase *CB,
5676 const SDLoc &dl, bool hasNest,
5677 const PPCSubtarget &Subtarget) {
5678 // On AIX there is a feature ("out of line glue code") which uses a special
5679 // trampoline function ._ptrgl to do the indirect call. If this option is
5680 // enabled we instead simply load the address of the descriptor into gpr11,
5681 // with the arguments in the 'normal' registers and branch to the ._ptrgl
5682 // stub.
5683 const MCRegister PtrGlueReg = Subtarget.getGlueCodeDescriptorRegister();
5684 SDValue MoveToPhysicalReg =
5685 DAG.getCopyToReg(Chain, dl, PtrGlueReg, Callee, Glue);
5686 Chain = MoveToPhysicalReg.getValue(0);
5687 Glue = MoveToPhysicalReg.getValue(1);
5688}
5689
5690static void
5692 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5693 SelectionDAG &DAG,
5694 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5695 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5696 const PPCSubtarget &Subtarget) {
5697 const bool IsPPC64 = Subtarget.isPPC64();
5698 // MVT for a general purpose register.
5699 const MVT RegVT = Subtarget.getScalarIntVT();
5700
5701 // First operand is always the chain.
5702 Ops.push_back(Chain);
5703
5704 // If it's a direct call pass the callee as the second operand.
5705 if (!CFlags.IsIndirect)
5706 Ops.push_back(Callee);
5707 else if (Subtarget.usePointerGlueHelper()) {
5708 Ops.push_back(Callee);
5709 // Add the register used to pass the descriptor address.
5710 Ops.push_back(
5711 DAG.getRegister(Subtarget.getGlueCodeDescriptorRegister(), RegVT));
5712 } else {
5713 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5714
5715 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5716 // on the stack (this would have been done in `LowerCall_64SVR4` or
5717 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5718 // represents both the indirect branch and a load that restores the TOC
5719 // pointer from the linkage area. The operand for the TOC restore is an add
5720 // of the TOC save offset to the stack pointer. This must be the second
5721 // operand: after the chain input but before any other variadic arguments.
5722 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5723 // saved or used.
5724 if (isTOCSaveRestoreRequired(Subtarget)) {
5725 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5726
5727 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5728 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5729 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5730 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5731 Ops.push_back(AddTOC);
5732 }
5733
5734 // Add the register used for the environment pointer.
5735 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5736 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5737 RegVT));
5738
5739
5740 // Add CTR register as callee so a bctr can be emitted later.
5741 if (CFlags.IsTailCall)
5742 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5743 }
5744
5745 // If this is a tail call add stack pointer delta.
5746 if (CFlags.IsTailCall)
5747 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5748
5749 // Add argument registers to the end of the list so that they are known live
5750 // into the call.
5751 for (const auto &[Reg, N] : RegsToPass)
5752 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5753
5754 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5755 // no way to mark dependencies as implicit here.
5756 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5757 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5758 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5759 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5760
5761 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5762 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5763 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5764
5765 // Add a register mask operand representing the call-preserved registers.
5766 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5767 const uint32_t *Mask =
5768 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5769 assert(Mask && "Missing call preserved mask for calling convention");
5770 Ops.push_back(DAG.getRegisterMask(Mask));
5771
5772 // If the glue is valid, it is the last operand.
5773 if (Glue.getNode())
5774 Ops.push_back(Glue);
5775}
5776
5777SDValue PPCTargetLowering::FinishCall(
5778 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5779 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5780 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5781 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5782 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5783
5784 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5785 Subtarget.isAIXABI())
5786 setUsesTOCBasePtr(DAG);
5787
5788 unsigned CallOpc =
5789 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5790 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5791
5792 if (!CFlags.IsIndirect)
5793 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5794 else if (Subtarget.usesFunctionDescriptors()) {
5795 if (Subtarget.usePointerGlueHelper()) {
5796 prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl,
5797 CFlags.HasNest, Subtarget);
5798 SDValue PtrGlueCallee =
5799 DAG.getExternalSymbol("_ptrgl", getPointerTy(DAG.getDataLayout()));
5800 Callee = transformCallee(PtrGlueCallee, DAG, dl, Subtarget);
5801 } else {
5802 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5803 dl, CFlags.HasNest, Subtarget);
5804 }
5805 } else {
5806 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5807 }
5808
5809 // Build the operand list for the call instruction.
5811 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5812 SPDiff, Subtarget);
5813
5814 // Emit tail call.
5815 if (CFlags.IsTailCall) {
5816 // Indirect tail call when using PC Relative calls do not have the same
5817 // constraints.
5818 assert(((Callee.getOpcode() == ISD::Register &&
5819 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5820 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5821 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5822 isa<ConstantSDNode>(Callee) ||
5823 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5824 "Expecting a global address, external symbol, absolute value, "
5825 "register or an indirect tail call when PC Relative calls are "
5826 "used.");
5827 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5828 assert(CallOpc == PPCISD::TC_RETURN &&
5829 "Unexpected call opcode for a tail call.");
5831 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5832 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5833 return Ret;
5834 }
5835
5836 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5837 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5838 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5839 Glue = Chain.getValue(1);
5840
5841 // When performing tail call optimization the callee pops its arguments off
5842 // the stack. Account for this here so these bytes can be pushed back on in
5843 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5844 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5846 ? NumBytes
5847 : 0;
5848
5849 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5850 Glue = Chain.getValue(1);
5851
5852 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5853 DAG, InVals);
5854}
5855
5857 CallingConv::ID CalleeCC = CB->getCallingConv();
5858 const Function *CallerFunc = CB->getCaller();
5859 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5860 const Function *CalleeFunc = CB->getCalledFunction();
5861 if (!CalleeFunc)
5862 return false;
5863 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5864
5867
5868 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5869 CalleeFunc->getAttributes(), Outs, *this,
5870 CalleeFunc->getDataLayout());
5871
5872 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5873 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5874 false /*isCalleeExternalSymbol*/);
5875}
5876
5877bool PPCTargetLowering::isEligibleForTCO(
5878 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5879 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5881 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5882 bool isCalleeExternalSymbol) const {
5883 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5884 return false;
5885
5886 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5887 return IsEligibleForTailCallOptimization_64SVR4(
5888 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5889 isCalleeExternalSymbol);
5890 else
5891 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5892 isVarArg, Ins);
5893}
5894
5895SDValue
5896PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5897 SmallVectorImpl<SDValue> &InVals) const {
5898 SelectionDAG &DAG = CLI.DAG;
5899 SDLoc &dl = CLI.DL;
5901 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5903 SDValue Chain = CLI.Chain;
5904 SDValue Callee = CLI.Callee;
5905 bool &isTailCall = CLI.IsTailCall;
5906 CallingConv::ID CallConv = CLI.CallConv;
5907 bool isVarArg = CLI.IsVarArg;
5908 bool isPatchPoint = CLI.IsPatchPoint;
5909 const CallBase *CB = CLI.CB;
5910
5911 if (isTailCall) {
5913 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5914 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5915 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5916 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5917
5918 isTailCall =
5919 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5920 &(MF.getFunction()), IsCalleeExternalSymbol);
5921 if (isTailCall) {
5922 ++NumTailCalls;
5923 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5924 ++NumSiblingCalls;
5925
5926 // PC Relative calls no longer guarantee that the callee is a Global
5927 // Address Node. The callee could be an indirect tail call in which
5928 // case the SDValue for the callee could be a load (to load the address
5929 // of a function pointer) or it may be a register copy (to move the
5930 // address of the callee from a function parameter into a virtual
5931 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5932 assert((Subtarget.isUsingPCRelativeCalls() ||
5933 isa<GlobalAddressSDNode>(Callee)) &&
5934 "Callee should be an llvm::Function object.");
5935
5936 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5937 << "\nTCO callee: ");
5938 LLVM_DEBUG(Callee.dump());
5939 }
5940 }
5941
5942 if (!isTailCall && CB && CB->isMustTailCall())
5943 report_fatal_error("failed to perform tail call elimination on a call "
5944 "site marked musttail");
5945
5946 // When long calls (i.e. indirect calls) are always used, calls are always
5947 // made via function pointer. If we have a function name, first translate it
5948 // into a pointer.
5949 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5950 !isTailCall)
5951 Callee = LowerGlobalAddress(Callee, DAG);
5952
5953 CallFlags CFlags(
5954 CallConv, isTailCall, isVarArg, isPatchPoint,
5955 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5956 // hasNest
5957 Subtarget.is64BitELFABI() &&
5958 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5959 CLI.NoMerge);
5960
5961 if (Subtarget.isAIXABI())
5962 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5963 InVals, CB);
5964
5965 assert(Subtarget.isSVR4ABI());
5966 if (Subtarget.isPPC64())
5967 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5968 InVals, CB);
5969 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5970 InVals, CB);
5971}
5972
5973SDValue PPCTargetLowering::LowerCall_32SVR4(
5974 SDValue Chain, SDValue Callee, CallFlags CFlags,
5976 const SmallVectorImpl<SDValue> &OutVals,
5977 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5979 const CallBase *CB) const {
5980 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5981 // of the 32-bit SVR4 ABI stack frame layout.
5982
5983 const CallingConv::ID CallConv = CFlags.CallConv;
5984 const bool IsVarArg = CFlags.IsVarArg;
5985 const bool IsTailCall = CFlags.IsTailCall;
5986
5987 assert((CallConv == CallingConv::C ||
5988 CallConv == CallingConv::Cold ||
5989 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5990
5991 const Align PtrAlign(4);
5992
5993 MachineFunction &MF = DAG.getMachineFunction();
5994
5995 // Mark this function as potentially containing a function that contains a
5996 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5997 // and restoring the callers stack pointer in this functions epilog. This is
5998 // done because by tail calling the called function might overwrite the value
5999 // in this function's (MF) stack pointer stack slot 0(SP).
6000 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6001 CallConv == CallingConv::Fast)
6002 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6003
6004 // Count how many bytes are to be pushed on the stack, including the linkage
6005 // area, parameter list area and the part of the local variable space which
6006 // contains copies of aggregates which are passed by value.
6007
6008 // Assign locations to all of the outgoing arguments.
6010 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6011
6012 // Reserve space for the linkage area on the stack.
6013 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6014 PtrAlign);
6015
6016 if (IsVarArg) {
6017 // Handle fixed and variable vector arguments differently.
6018 // Fixed vector arguments go into registers as long as registers are
6019 // available. Variable vector arguments always go into memory.
6020 unsigned NumArgs = Outs.size();
6021
6022 for (unsigned i = 0; i != NumArgs; ++i) {
6023 MVT ArgVT = Outs[i].VT;
6024 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6025 bool Result;
6026
6027 if (!ArgFlags.isVarArg()) {
6028 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6029 Outs[i].OrigTy, CCInfo);
6030 } else {
6032 ArgFlags, Outs[i].OrigTy, CCInfo);
6033 }
6034
6035 if (Result) {
6036#ifndef NDEBUG
6037 errs() << "Call operand #" << i << " has unhandled type "
6038 << ArgVT << "\n";
6039#endif
6040 llvm_unreachable(nullptr);
6041 }
6042 }
6043 } else {
6044 // All arguments are treated the same.
6045 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6046 }
6047
6048 // Assign locations to all of the outgoing aggregate by value arguments.
6049 SmallVector<CCValAssign, 16> ByValArgLocs;
6050 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6051
6052 // Reserve stack space for the allocations in CCInfo.
6053 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6054
6055 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6056
6057 // Size of the linkage area, parameter list area and the part of the local
6058 // space variable where copies of aggregates which are passed by value are
6059 // stored.
6060 unsigned NumBytes = CCByValInfo.getStackSize();
6061
6062 // Calculate by how many bytes the stack has to be adjusted in case of tail
6063 // call optimization.
6064 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6065
6066 // Adjust the stack pointer for the new arguments...
6067 // These operations are automatically eliminated by the prolog/epilog pass
6068 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6069 SDValue CallSeqStart = Chain;
6070
6071 // Load the return address and frame pointer so it can be moved somewhere else
6072 // later.
6073 SDValue LROp, FPOp;
6074 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6075
6076 // Set up a copy of the stack pointer for use loading and storing any
6077 // arguments that may not fit in the registers available for argument
6078 // passing.
6079 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6080
6082 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6083 SmallVector<SDValue, 8> MemOpChains;
6084
6085 bool seenFloatArg = false;
6086 // Walk the register/memloc assignments, inserting copies/loads.
6087 // i - Tracks the index into the list of registers allocated for the call
6088 // RealArgIdx - Tracks the index into the list of actual function arguments
6089 // j - Tracks the index into the list of byval arguments
6090 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6091 i != e;
6092 ++i, ++RealArgIdx) {
6093 CCValAssign &VA = ArgLocs[i];
6094 SDValue Arg = OutVals[RealArgIdx];
6095 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6096
6097 if (Flags.isByVal()) {
6098 // Argument is an aggregate which is passed by value, thus we need to
6099 // create a copy of it in the local variable space of the current stack
6100 // frame (which is the stack frame of the caller) and pass the address of
6101 // this copy to the callee.
6102 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6103 CCValAssign &ByValVA = ByValArgLocs[j++];
6104 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6105
6106 // Memory reserved in the local variable space of the callers stack frame.
6107 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6108
6109 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6110 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6111 StackPtr, PtrOff);
6112
6113 // Create a copy of the argument in the local area of the current
6114 // stack frame.
6115 SDValue MemcpyCall =
6116 CreateCopyOfByValArgument(Arg, PtrOff,
6117 CallSeqStart.getNode()->getOperand(0),
6118 Flags, DAG, dl);
6119
6120 // This must go outside the CALLSEQ_START..END.
6121 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6122 SDLoc(MemcpyCall));
6123 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6124 NewCallSeqStart.getNode());
6125 Chain = CallSeqStart = NewCallSeqStart;
6126
6127 // Pass the address of the aggregate copy on the stack either in a
6128 // physical register or in the parameter list area of the current stack
6129 // frame to the callee.
6130 Arg = PtrOff;
6131 }
6132
6133 // When useCRBits() is true, there can be i1 arguments.
6134 // It is because getRegisterType(MVT::i1) => MVT::i1,
6135 // and for other integer types getRegisterType() => MVT::i32.
6136 // Extend i1 and ensure callee will get i32.
6137 if (Arg.getValueType() == MVT::i1)
6138 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6139 dl, MVT::i32, Arg);
6140
6141 if (VA.isRegLoc()) {
6142 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6143 // Put argument in a physical register.
6144 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6145 bool IsLE = Subtarget.isLittleEndian();
6146 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6147 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6148 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6149 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6150 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6151 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6152 SVal.getValue(0)));
6153 } else
6154 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6155 } else {
6156 // Put argument in the parameter list area of the current stack frame.
6157 assert(VA.isMemLoc());
6158 unsigned LocMemOffset = VA.getLocMemOffset();
6159
6160 if (!IsTailCall) {
6161 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6162 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6163 StackPtr, PtrOff);
6164
6165 MemOpChains.push_back(
6166 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6167 } else {
6168 // Calculate and remember argument location.
6169 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6170 TailCallArguments);
6171 }
6172 }
6173 }
6174
6175 if (!MemOpChains.empty())
6176 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6177
6178 // Build a sequence of copy-to-reg nodes chained together with token chain
6179 // and flag operands which copy the outgoing args into the appropriate regs.
6180 SDValue InGlue;
6181 for (const auto &[Reg, N] : RegsToPass) {
6182 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6183 InGlue = Chain.getValue(1);
6184 }
6185
6186 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6187 // registers.
6188 if (IsVarArg) {
6189 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6190 SDValue Ops[] = { Chain, InGlue };
6191
6192 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6193 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6194
6195 InGlue = Chain.getValue(1);
6196 }
6197
6198 if (IsTailCall)
6199 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6200 TailCallArguments);
6201
6202 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6203 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6204}
6205
6206// Copy an argument into memory, being careful to do this outside the
6207// call sequence for the call to which the argument belongs.
6208SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6209 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6210 SelectionDAG &DAG, const SDLoc &dl) const {
6211 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6212 CallSeqStart.getNode()->getOperand(0),
6213 Flags, DAG, dl);
6214 // The MEMCPY must go outside the CALLSEQ_START..END.
6215 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6216 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6217 SDLoc(MemcpyCall));
6218 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6219 NewCallSeqStart.getNode());
6220 return NewCallSeqStart;
6221}
6222
6223SDValue PPCTargetLowering::LowerCall_64SVR4(
6224 SDValue Chain, SDValue Callee, CallFlags CFlags,
6226 const SmallVectorImpl<SDValue> &OutVals,
6227 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6229 const CallBase *CB) const {
6230 bool isELFv2ABI = Subtarget.isELFv2ABI();
6231 bool isLittleEndian = Subtarget.isLittleEndian();
6232 unsigned NumOps = Outs.size();
6233 bool IsSibCall = false;
6234 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6235
6236 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6237 unsigned PtrByteSize = 8;
6238
6239 MachineFunction &MF = DAG.getMachineFunction();
6240
6241 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6242 IsSibCall = true;
6243
6244 // Mark this function as potentially containing a function that contains a
6245 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6246 // and restoring the callers stack pointer in this functions epilog. This is
6247 // done because by tail calling the called function might overwrite the value
6248 // in this function's (MF) stack pointer stack slot 0(SP).
6249 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6250 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6251
6252 assert(!(IsFastCall && CFlags.IsVarArg) &&
6253 "fastcc not supported on varargs functions");
6254
6255 // Count how many bytes are to be pushed on the stack, including the linkage
6256 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6257 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6258 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6259 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6260 unsigned NumBytes = LinkageSize;
6261 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6262
6263 static const MCPhysReg GPR[] = {
6264 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6265 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6266 };
6267 static const MCPhysReg VR[] = {
6268 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6269 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6270 };
6271
6272 const unsigned NumGPRs = std::size(GPR);
6273 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6274 const unsigned NumVRs = std::size(VR);
6275
6276 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6277 // can be passed to the callee in registers.
6278 // For the fast calling convention, there is another check below.
6279 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6280 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6281 if (!HasParameterArea) {
6282 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6283 unsigned AvailableFPRs = NumFPRs;
6284 unsigned AvailableVRs = NumVRs;
6285 unsigned NumBytesTmp = NumBytes;
6286 for (unsigned i = 0; i != NumOps; ++i) {
6287 if (Outs[i].Flags.isNest()) continue;
6288 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6289 PtrByteSize, LinkageSize, ParamAreaSize,
6290 NumBytesTmp, AvailableFPRs, AvailableVRs))
6291 HasParameterArea = true;
6292 }
6293 }
6294
6295 // When using the fast calling convention, we don't provide backing for
6296 // arguments that will be in registers.
6297 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6298
6299 // Avoid allocating parameter area for fastcc functions if all the arguments
6300 // can be passed in the registers.
6301 if (IsFastCall)
6302 HasParameterArea = false;
6303
6304 // Add up all the space actually used.
6305 for (unsigned i = 0; i != NumOps; ++i) {
6306 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6307 EVT ArgVT = Outs[i].VT;
6308 EVT OrigVT = Outs[i].ArgVT;
6309
6310 if (Flags.isNest())
6311 continue;
6312
6313 if (IsFastCall) {
6314 if (Flags.isByVal()) {
6315 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6316 if (NumGPRsUsed > NumGPRs)
6317 HasParameterArea = true;
6318 } else {
6319 switch (ArgVT.getSimpleVT().SimpleTy) {
6320 default: llvm_unreachable("Unexpected ValueType for argument!");
6321 case MVT::i1:
6322 case MVT::i32:
6323 case MVT::i64:
6324 if (++NumGPRsUsed <= NumGPRs)
6325 continue;
6326 break;
6327 case MVT::v4i32:
6328 case MVT::v8i16:
6329 case MVT::v16i8:
6330 case MVT::v2f64:
6331 case MVT::v2i64:
6332 case MVT::v1i128:
6333 case MVT::f128:
6334 if (++NumVRsUsed <= NumVRs)
6335 continue;
6336 break;
6337 case MVT::v4f32:
6338 if (++NumVRsUsed <= NumVRs)
6339 continue;
6340 break;
6341 case MVT::f32:
6342 case MVT::f64:
6343 if (++NumFPRsUsed <= NumFPRs)
6344 continue;
6345 break;
6346 }
6347 HasParameterArea = true;
6348 }
6349 }
6350
6351 /* Respect alignment of argument on the stack. */
6352 auto Alignement =
6353 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6354 NumBytes = alignTo(NumBytes, Alignement);
6355
6356 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6357 if (Flags.isInConsecutiveRegsLast())
6358 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6359 }
6360
6361 unsigned NumBytesActuallyUsed = NumBytes;
6362
6363 // In the old ELFv1 ABI,
6364 // the prolog code of the callee may store up to 8 GPR argument registers to
6365 // the stack, allowing va_start to index over them in memory if its varargs.
6366 // Because we cannot tell if this is needed on the caller side, we have to
6367 // conservatively assume that it is needed. As such, make sure we have at
6368 // least enough stack space for the caller to store the 8 GPRs.
6369 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6370 // really requires memory operands, e.g. a vararg function.
6371 if (HasParameterArea)
6372 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6373 else
6374 NumBytes = LinkageSize;
6375
6376 // Tail call needs the stack to be aligned.
6377 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6378 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6379
6380 int SPDiff = 0;
6381
6382 // Calculate by how many bytes the stack has to be adjusted in case of tail
6383 // call optimization.
6384 if (!IsSibCall)
6385 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6386
6387 // To protect arguments on the stack from being clobbered in a tail call,
6388 // force all the loads to happen before doing any other lowering.
6389 if (CFlags.IsTailCall)
6390 Chain = DAG.getStackArgumentTokenFactor(Chain);
6391
6392 // Adjust the stack pointer for the new arguments...
6393 // These operations are automatically eliminated by the prolog/epilog pass
6394 if (!IsSibCall)
6395 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6396 SDValue CallSeqStart = Chain;
6397
6398 // Load the return address and frame pointer so it can be move somewhere else
6399 // later.
6400 SDValue LROp, FPOp;
6401 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6402
6403 // Set up a copy of the stack pointer for use loading and storing any
6404 // arguments that may not fit in the registers available for argument
6405 // passing.
6406 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6407
6408 // Figure out which arguments are going to go in registers, and which in
6409 // memory. Also, if this is a vararg function, floating point operations
6410 // must be stored to our stack, and loaded into integer regs as well, if
6411 // any integer regs are available for argument passing.
6412 unsigned ArgOffset = LinkageSize;
6413
6415 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6416
6417 SmallVector<SDValue, 8> MemOpChains;
6418 for (unsigned i = 0; i != NumOps; ++i) {
6419 SDValue Arg = OutVals[i];
6420 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6421 EVT ArgVT = Outs[i].VT;
6422 EVT OrigVT = Outs[i].ArgVT;
6423
6424 // PtrOff will be used to store the current argument to the stack if a
6425 // register cannot be found for it.
6426 SDValue PtrOff;
6427
6428 // We re-align the argument offset for each argument, except when using the
6429 // fast calling convention, when we need to make sure we do that only when
6430 // we'll actually use a stack slot.
6431 auto ComputePtrOff = [&]() {
6432 /* Respect alignment of argument on the stack. */
6433 auto Alignment =
6434 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6435 ArgOffset = alignTo(ArgOffset, Alignment);
6436
6437 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6438
6439 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6440 };
6441
6442 if (!IsFastCall) {
6443 ComputePtrOff();
6444
6445 /* Compute GPR index associated with argument offset. */
6446 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6447 GPR_idx = std::min(GPR_idx, NumGPRs);
6448 }
6449
6450 // Promote integers to 64-bit values.
6451 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6452 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6453 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6454 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6455 }
6456
6457 // FIXME memcpy is used way more than necessary. Correctness first.
6458 // Note: "by value" is code for passing a structure by value, not
6459 // basic types.
6460 if (Flags.isByVal()) {
6461 // Note: Size includes alignment padding, so
6462 // struct x { short a; char b; }
6463 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6464 // These are the proper values we need for right-justifying the
6465 // aggregate in a parameter register.
6466 unsigned Size = Flags.getByValSize();
6467
6468 // An empty aggregate parameter takes up no storage and no
6469 // registers.
6470 if (Size == 0)
6471 continue;
6472
6473 if (IsFastCall)
6474 ComputePtrOff();
6475
6476 // All aggregates smaller than 8 bytes must be passed right-justified.
6477 if (Size==1 || Size==2 || Size==4) {
6478 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6479 if (GPR_idx != NumGPRs) {
6480 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6481 MachinePointerInfo(), VT);
6482 MemOpChains.push_back(Load.getValue(1));
6483 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6484
6485 ArgOffset += PtrByteSize;
6486 continue;
6487 }
6488 }
6489
6490 if (GPR_idx == NumGPRs && Size < 8) {
6491 SDValue AddPtr = PtrOff;
6492 if (!isLittleEndian) {
6493 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6494 PtrOff.getValueType());
6495 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6496 }
6497 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6498 CallSeqStart,
6499 Flags, DAG, dl);
6500 ArgOffset += PtrByteSize;
6501 continue;
6502 }
6503 // Copy the object to parameter save area if it can not be entirely passed
6504 // by registers.
6505 // FIXME: we only need to copy the parts which need to be passed in
6506 // parameter save area. For the parts passed by registers, we don't need
6507 // to copy them to the stack although we need to allocate space for them
6508 // in parameter save area.
6509 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6510 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6511 CallSeqStart,
6512 Flags, DAG, dl);
6513
6514 // When a register is available, pass a small aggregate right-justified.
6515 if (Size < 8 && GPR_idx != NumGPRs) {
6516 // The easiest way to get this right-justified in a register
6517 // is to copy the structure into the rightmost portion of a
6518 // local variable slot, then load the whole slot into the
6519 // register.
6520 // FIXME: The memcpy seems to produce pretty awful code for
6521 // small aggregates, particularly for packed ones.
6522 // FIXME: It would be preferable to use the slot in the
6523 // parameter save area instead of a new local variable.
6524 SDValue AddPtr = PtrOff;
6525 if (!isLittleEndian) {
6526 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6527 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6528 }
6529 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6530 CallSeqStart,
6531 Flags, DAG, dl);
6532
6533 // Load the slot into the register.
6534 SDValue Load =
6535 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6536 MemOpChains.push_back(Load.getValue(1));
6537 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6538
6539 // Done with this argument.
6540 ArgOffset += PtrByteSize;
6541 continue;
6542 }
6543
6544 // For aggregates larger than PtrByteSize, copy the pieces of the
6545 // object that fit into registers from the parameter save area.
6546 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6547 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6548 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6549 if (GPR_idx != NumGPRs) {
6550 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6551 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6552 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6553 MachinePointerInfo(), ObjType);
6554
6555 MemOpChains.push_back(Load.getValue(1));
6556 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6557 ArgOffset += PtrByteSize;
6558 } else {
6559 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6560 break;
6561 }
6562 }
6563 continue;
6564 }
6565
6566 switch (Arg.getSimpleValueType().SimpleTy) {
6567 default: llvm_unreachable("Unexpected ValueType for argument!");
6568 case MVT::i1:
6569 case MVT::i32:
6570 case MVT::i64:
6571 if (Flags.isNest()) {
6572 // The 'nest' parameter, if any, is passed in R11.
6573 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6574 break;
6575 }
6576
6577 // These can be scalar arguments or elements of an integer array type
6578 // passed directly. Clang may use those instead of "byval" aggregate
6579 // types to avoid forcing arguments to memory unnecessarily.
6580 if (GPR_idx != NumGPRs) {
6581 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6582 } else {
6583 if (IsFastCall)
6584 ComputePtrOff();
6585
6586 assert(HasParameterArea &&
6587 "Parameter area must exist to pass an argument in memory.");
6588 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6589 true, CFlags.IsTailCall, false, MemOpChains,
6590 TailCallArguments, dl);
6591 if (IsFastCall)
6592 ArgOffset += PtrByteSize;
6593 }
6594 if (!IsFastCall)
6595 ArgOffset += PtrByteSize;
6596 break;
6597 case MVT::f32:
6598 case MVT::f64: {
6599 // These can be scalar arguments or elements of a float array type
6600 // passed directly. The latter are used to implement ELFv2 homogenous
6601 // float aggregates.
6602
6603 // Named arguments go into FPRs first, and once they overflow, the
6604 // remaining arguments go into GPRs and then the parameter save area.
6605 // Unnamed arguments for vararg functions always go to GPRs and
6606 // then the parameter save area. For now, put all arguments to vararg
6607 // routines always in both locations (FPR *and* GPR or stack slot).
6608 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6609 bool NeededLoad = false;
6610
6611 // First load the argument into the next available FPR.
6612 if (FPR_idx != NumFPRs)
6613 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6614
6615 // Next, load the argument into GPR or stack slot if needed.
6616 if (!NeedGPROrStack)
6617 ;
6618 else if (GPR_idx != NumGPRs && !IsFastCall) {
6619 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6620 // once we support fp <-> gpr moves.
6621
6622 // In the non-vararg case, this can only ever happen in the
6623 // presence of f32 array types, since otherwise we never run
6624 // out of FPRs before running out of GPRs.
6625 SDValue ArgVal;
6626
6627 // Double values are always passed in a single GPR.
6628 if (Arg.getValueType() != MVT::f32) {
6629 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6630
6631 // Non-array float values are extended and passed in a GPR.
6632 } else if (!Flags.isInConsecutiveRegs()) {
6633 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6634 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6635
6636 // If we have an array of floats, we collect every odd element
6637 // together with its predecessor into one GPR.
6638 } else if (ArgOffset % PtrByteSize != 0) {
6639 SDValue Lo, Hi;
6640 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6641 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6642 if (!isLittleEndian)
6643 std::swap(Lo, Hi);
6644 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6645
6646 // The final element, if even, goes into the first half of a GPR.
6647 } else if (Flags.isInConsecutiveRegsLast()) {
6648 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6649 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6650 if (!isLittleEndian)
6651 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6652 DAG.getConstant(32, dl, MVT::i32));
6653
6654 // Non-final even elements are skipped; they will be handled
6655 // together the with subsequent argument on the next go-around.
6656 } else
6657 ArgVal = SDValue();
6658
6659 if (ArgVal.getNode())
6660 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6661 } else {
6662 if (IsFastCall)
6663 ComputePtrOff();
6664
6665 // Single-precision floating-point values are mapped to the
6666 // second (rightmost) word of the stack doubleword.
6667 if (Arg.getValueType() == MVT::f32 &&
6668 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6669 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6670 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6671 }
6672
6673 assert(HasParameterArea &&
6674 "Parameter area must exist to pass an argument in memory.");
6675 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6676 true, CFlags.IsTailCall, false, MemOpChains,
6677 TailCallArguments, dl);
6678
6679 NeededLoad = true;
6680 }
6681 // When passing an array of floats, the array occupies consecutive
6682 // space in the argument area; only round up to the next doubleword
6683 // at the end of the array. Otherwise, each float takes 8 bytes.
6684 if (!IsFastCall || NeededLoad) {
6685 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6686 Flags.isInConsecutiveRegs()) ? 4 : 8;
6687 if (Flags.isInConsecutiveRegsLast())
6688 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6689 }
6690 break;
6691 }
6692 case MVT::v4f32:
6693 case MVT::v4i32:
6694 case MVT::v8i16:
6695 case MVT::v16i8:
6696 case MVT::v2f64:
6697 case MVT::v2i64:
6698 case MVT::v1i128:
6699 case MVT::f128:
6700 // These can be scalar arguments or elements of a vector array type
6701 // passed directly. The latter are used to implement ELFv2 homogenous
6702 // vector aggregates.
6703
6704 // For a varargs call, named arguments go into VRs or on the stack as
6705 // usual; unnamed arguments always go to the stack or the corresponding
6706 // GPRs when within range. For now, we always put the value in both
6707 // locations (or even all three).
6708 if (CFlags.IsVarArg) {
6709 assert(HasParameterArea &&
6710 "Parameter area must exist if we have a varargs call.");
6711 // We could elide this store in the case where the object fits
6712 // entirely in R registers. Maybe later.
6713 SDValue Store =
6714 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6715 MemOpChains.push_back(Store);
6716 if (VR_idx != NumVRs) {
6717 SDValue Load =
6718 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6719 MemOpChains.push_back(Load.getValue(1));
6720 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6721 }
6722 ArgOffset += 16;
6723 for (unsigned i=0; i<16; i+=PtrByteSize) {
6724 if (GPR_idx == NumGPRs)
6725 break;
6726 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6727 DAG.getConstant(i, dl, PtrVT));
6728 SDValue Load =
6729 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6730 MemOpChains.push_back(Load.getValue(1));
6731 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6732 }
6733 break;
6734 }
6735
6736 // Non-varargs Altivec params go into VRs or on the stack.
6737 if (VR_idx != NumVRs) {
6738 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6739 } else {
6740 if (IsFastCall)
6741 ComputePtrOff();
6742
6743 assert(HasParameterArea &&
6744 "Parameter area must exist to pass an argument in memory.");
6745 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6746 true, CFlags.IsTailCall, true, MemOpChains,
6747 TailCallArguments, dl);
6748 if (IsFastCall)
6749 ArgOffset += 16;
6750 }
6751
6752 if (!IsFastCall)
6753 ArgOffset += 16;
6754 break;
6755 }
6756 }
6757
6758 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6759 "mismatch in size of parameter area");
6760 (void)NumBytesActuallyUsed;
6761
6762 if (!MemOpChains.empty())
6763 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6764
6765 // Check if this is an indirect call (MTCTR/BCTRL).
6766 // See prepareDescriptorIndirectCall and buildCallOperands for more
6767 // information about calls through function pointers in the 64-bit SVR4 ABI.
6768 if (CFlags.IsIndirect) {
6769 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6770 // caller in the TOC save area.
6771 if (isTOCSaveRestoreRequired(Subtarget)) {
6772 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6773 // Load r2 into a virtual register and store it to the TOC save area.
6774 setUsesTOCBasePtr(DAG);
6775 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6776 // TOC save area offset.
6777 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6778 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6779 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6780 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6782 DAG.getMachineFunction(), TOCSaveOffset));
6783 }
6784 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6785 // This does not mean the MTCTR instruction must use R12; it's easier
6786 // to model this as an extra parameter, so do that.
6787 if (isELFv2ABI && !CFlags.IsPatchPoint)
6788 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6789 }
6790
6791 // Build a sequence of copy-to-reg nodes chained together with token chain
6792 // and flag operands which copy the outgoing args into the appropriate regs.
6793 SDValue InGlue;
6794 for (const auto &[Reg, N] : RegsToPass) {
6795 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6796 InGlue = Chain.getValue(1);
6797 }
6798
6799 if (CFlags.IsTailCall && !IsSibCall)
6800 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6801 TailCallArguments);
6802
6803 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6804 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6805}
6806
6807// Returns true when the shadow of a general purpose argument register
6808// in the parameter save area is aligned to at least 'RequiredAlign'.
6809static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6810 assert(RequiredAlign.value() <= 16 &&
6811 "Required alignment greater than stack alignment.");
6812 switch (Reg) {
6813 default:
6814 report_fatal_error("called on invalid register.");
6815 case PPC::R5:
6816 case PPC::R9:
6817 case PPC::X3:
6818 case PPC::X5:
6819 case PPC::X7:
6820 case PPC::X9:
6821 // These registers are 16 byte aligned which is the most strict aligment
6822 // we can support.
6823 return true;
6824 case PPC::R3:
6825 case PPC::R7:
6826 case PPC::X4:
6827 case PPC::X6:
6828 case PPC::X8:
6829 case PPC::X10:
6830 // The shadow of these registers in the PSA is 8 byte aligned.
6831 return RequiredAlign <= 8;
6832 case PPC::R4:
6833 case PPC::R6:
6834 case PPC::R8:
6835 case PPC::R10:
6836 return RequiredAlign <= 4;
6837 }
6838}
6839
6840static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6841 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6842 Type *OrigTy, CCState &State) {
6843 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6844 State.getMachineFunction().getSubtarget());
6845 const bool IsPPC64 = Subtarget.isPPC64();
6846 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6847 const Align PtrAlign(PtrSize);
6848 const Align StackAlign(16);
6849 const MVT RegVT = Subtarget.getScalarIntVT();
6850
6851 if (ValVT == MVT::f128)
6852 report_fatal_error("f128 is unimplemented on AIX.");
6853
6854 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6855 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6856 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6857 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6858 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6859 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6860
6861 static const MCPhysReg VR[] = {// Vector registers.
6862 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6863 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6864 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6865
6866 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6867
6868 if (ArgFlags.isNest()) {
6869 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6870 if (!EnvReg)
6871 report_fatal_error("More then one nest argument.");
6872 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6873 return false;
6874 }
6875
6876 if (ArgFlags.isByVal()) {
6877 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6878 if (ByValAlign > StackAlign)
6879 report_fatal_error("Pass-by-value arguments with alignment greater than "
6880 "16 are not supported.");
6881
6882 const unsigned ByValSize = ArgFlags.getByValSize();
6883 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6884
6885 // An empty aggregate parameter takes up no storage and no registers,
6886 // but needs a MemLoc for a stack slot for the formal arguments side.
6887 if (ByValSize == 0) {
6889 State.getStackSize(), RegVT, LocInfo));
6890 return false;
6891 }
6892
6893 // Shadow allocate any registers that are not properly aligned.
6894 unsigned NextReg = State.getFirstUnallocated(GPRs);
6895 while (NextReg != GPRs.size() &&
6896 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6897 // Shadow allocate next registers since its aligment is not strict enough.
6898 MCRegister Reg = State.AllocateReg(GPRs);
6899 // Allocate the stack space shadowed by said register.
6900 State.AllocateStack(PtrSize, PtrAlign);
6901 assert(Reg && "Alocating register unexpectedly failed.");
6902 (void)Reg;
6903 NextReg = State.getFirstUnallocated(GPRs);
6904 }
6905
6906 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6907 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6908 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6909 if (MCRegister Reg = State.AllocateReg(GPRs))
6910 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6911 else {
6914 LocInfo));
6915 break;
6916 }
6917 }
6918 return false;
6919 }
6920
6921 // Arguments always reserve parameter save area.
6922 switch (ValVT.SimpleTy) {
6923 default:
6924 report_fatal_error("Unhandled value type for argument.");
6925 case MVT::i64:
6926 // i64 arguments should have been split to i32 for PPC32.
6927 assert(IsPPC64 && "PPC32 should have split i64 values.");
6928 [[fallthrough]];
6929 case MVT::i1:
6930 case MVT::i32: {
6931 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6932 // AIX integer arguments are always passed in register width.
6933 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6934 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6936 if (MCRegister Reg = State.AllocateReg(GPRs))
6937 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6938 else
6939 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6940
6941 return false;
6942 }
6943 case MVT::f32:
6944 case MVT::f64: {
6945 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6946 const unsigned StoreSize = LocVT.getStoreSize();
6947 // Floats are always 4-byte aligned in the PSA on AIX.
6948 // This includes f64 in 64-bit mode for ABI compatibility.
6949 const unsigned Offset =
6950 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6951 MCRegister FReg = State.AllocateReg(FPR);
6952 if (FReg)
6953 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6954
6955 // Reserve and initialize GPRs or initialize the PSA as required.
6956 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6957 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6958 assert(FReg && "An FPR should be available when a GPR is reserved.");
6959 if (State.isVarArg()) {
6960 // Successfully reserved GPRs are only initialized for vararg calls.
6961 // Custom handling is required for:
6962 // f64 in PPC32 needs to be split into 2 GPRs.
6963 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6964 State.addLoc(
6965 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6966 }
6967 } else {
6968 // If there are insufficient GPRs, the PSA needs to be initialized.
6969 // Initialization occurs even if an FPR was initialized for
6970 // compatibility with the AIX XL compiler. The full memory for the
6971 // argument will be initialized even if a prior word is saved in GPR.
6972 // A custom memLoc is used when the argument also passes in FPR so
6973 // that the callee handling can skip over it easily.
6974 State.addLoc(
6975 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6976 LocInfo)
6977 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6978 break;
6979 }
6980 }
6981
6982 return false;
6983 }
6984 case MVT::v4f32:
6985 case MVT::v4i32:
6986 case MVT::v8i16:
6987 case MVT::v16i8:
6988 case MVT::v2i64:
6989 case MVT::v2f64:
6990 case MVT::v1i128: {
6991 const unsigned VecSize = 16;
6992 const Align VecAlign(VecSize);
6993
6994 if (!State.isVarArg()) {
6995 // If there are vector registers remaining we don't consume any stack
6996 // space.
6997 if (MCRegister VReg = State.AllocateReg(VR)) {
6998 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6999 return false;
7000 }
7001 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7002 // might be allocated in the portion of the PSA that is shadowed by the
7003 // GPRs.
7004 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7005 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7006 return false;
7007 }
7008
7009 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7010 // Burn any underaligned registers and their shadowed stack space until
7011 // we reach the required alignment.
7012 while (NextRegIndex != GPRs.size() &&
7013 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7014 // Shadow allocate register and its stack shadow.
7015 MCRegister Reg = State.AllocateReg(GPRs);
7016 State.AllocateStack(PtrSize, PtrAlign);
7017 assert(Reg && "Allocating register unexpectedly failed.");
7018 (void)Reg;
7019 NextRegIndex = State.getFirstUnallocated(GPRs);
7020 }
7021
7022 // Vectors that are passed as fixed arguments are handled differently.
7023 // They are passed in VRs if any are available (unlike arguments passed
7024 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7025 // functions)
7026 if (!ArgFlags.isVarArg()) {
7027 if (MCRegister VReg = State.AllocateReg(VR)) {
7028 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7029 // Shadow allocate GPRs and stack space even though we pass in a VR.
7030 for (unsigned I = 0; I != VecSize; I += PtrSize)
7031 State.AllocateReg(GPRs);
7032 State.AllocateStack(VecSize, VecAlign);
7033 return false;
7034 }
7035 // No vector registers remain so pass on the stack.
7036 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7037 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7038 return false;
7039 }
7040
7041 // If all GPRS are consumed then we pass the argument fully on the stack.
7042 if (NextRegIndex == GPRs.size()) {
7043 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7044 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7045 return false;
7046 }
7047
7048 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7049 // half of the argument, and then need to pass the remaining half on the
7050 // stack.
7051 if (GPRs[NextRegIndex] == PPC::R9) {
7052 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7053 State.addLoc(
7054 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7055
7056 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7057 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7058 assert(FirstReg && SecondReg &&
7059 "Allocating R9 or R10 unexpectedly failed.");
7060 State.addLoc(
7061 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7062 State.addLoc(
7063 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7064 return false;
7065 }
7066
7067 // We have enough GPRs to fully pass the vector argument, and we have
7068 // already consumed any underaligned registers. Start with the custom
7069 // MemLoc and then the custom RegLocs.
7070 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7071 State.addLoc(
7072 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7073 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7074 const MCRegister Reg = State.AllocateReg(GPRs);
7075 assert(Reg && "Failed to allocated register for vararg vector argument");
7076 State.addLoc(
7077 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7078 }
7079 return false;
7080 }
7081 }
7082 return true;
7083}
7084
7085// So far, this function is only used by LowerFormalArguments_AIX()
7087 bool IsPPC64,
7088 bool HasP8Vector,
7089 bool HasVSX) {
7090 assert((IsPPC64 || SVT != MVT::i64) &&
7091 "i64 should have been split for 32-bit codegen.");
7092
7093 switch (SVT) {
7094 default:
7095 report_fatal_error("Unexpected value type for formal argument");
7096 case MVT::i1:
7097 case MVT::i32:
7098 case MVT::i64:
7099 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7100 case MVT::f32:
7101 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7102 case MVT::f64:
7103 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7104 case MVT::v4f32:
7105 case MVT::v4i32:
7106 case MVT::v8i16:
7107 case MVT::v16i8:
7108 case MVT::v2i64:
7109 case MVT::v2f64:
7110 case MVT::v1i128:
7111 return &PPC::VRRCRegClass;
7112 }
7113}
7114
7116 SelectionDAG &DAG, SDValue ArgValue,
7117 MVT LocVT, const SDLoc &dl) {
7118 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7119 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7120
7121 if (Flags.isSExt())
7122 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7123 DAG.getValueType(ValVT));
7124 else if (Flags.isZExt())
7125 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7126 DAG.getValueType(ValVT));
7127
7128 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7129}
7130
7131static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7132 const unsigned LASize = FL->getLinkageSize();
7133
7134 if (PPC::GPRCRegClass.contains(Reg)) {
7135 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7136 "Reg must be a valid argument register!");
7137 return LASize + 4 * (Reg - PPC::R3);
7138 }
7139
7140 if (PPC::G8RCRegClass.contains(Reg)) {
7141 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7142 "Reg must be a valid argument register!");
7143 return LASize + 8 * (Reg - PPC::X3);
7144 }
7145
7146 llvm_unreachable("Only general purpose registers expected.");
7147}
7148
7149// AIX ABI Stack Frame Layout:
7150//
7151// Low Memory +--------------------------------------------+
7152// SP +---> | Back chain | ---+
7153// | +--------------------------------------------+ |
7154// | | Saved Condition Register | |
7155// | +--------------------------------------------+ |
7156// | | Saved Linkage Register | |
7157// | +--------------------------------------------+ | Linkage Area
7158// | | Reserved for compilers | |
7159// | +--------------------------------------------+ |
7160// | | Reserved for binders | |
7161// | +--------------------------------------------+ |
7162// | | Saved TOC pointer | ---+
7163// | +--------------------------------------------+
7164// | | Parameter save area |
7165// | +--------------------------------------------+
7166// | | Alloca space |
7167// | +--------------------------------------------+
7168// | | Local variable space |
7169// | +--------------------------------------------+
7170// | | Float/int conversion temporary |
7171// | +--------------------------------------------+
7172// | | Save area for AltiVec registers |
7173// | +--------------------------------------------+
7174// | | AltiVec alignment padding |
7175// | +--------------------------------------------+
7176// | | Save area for VRSAVE register |
7177// | +--------------------------------------------+
7178// | | Save area for General Purpose registers |
7179// | +--------------------------------------------+
7180// | | Save area for Floating Point registers |
7181// | +--------------------------------------------+
7182// +---- | Back chain |
7183// High Memory +--------------------------------------------+
7184//
7185// Specifications:
7186// AIX 7.2 Assembler Language Reference
7187// Subroutine linkage convention
7188
7189SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7190 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7191 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7192 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7193
7194 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7195 CallConv == CallingConv::Fast) &&
7196 "Unexpected calling convention!");
7197
7198 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7199 report_fatal_error("Tail call support is unimplemented on AIX.");
7200
7201 if (useSoftFloat())
7202 report_fatal_error("Soft float support is unimplemented on AIX.");
7203
7204 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7205
7206 const bool IsPPC64 = Subtarget.isPPC64();
7207 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7208
7209 // Assign locations to all of the incoming arguments.
7211 MachineFunction &MF = DAG.getMachineFunction();
7212 MachineFrameInfo &MFI = MF.getFrameInfo();
7213 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7214 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7215
7216 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7217 // Reserve space for the linkage area on the stack.
7218 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7219 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7220 uint64_t SaveStackPos = CCInfo.getStackSize();
7221 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7222 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7223
7225
7226 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7227 CCValAssign &VA = ArgLocs[I++];
7228 MVT LocVT = VA.getLocVT();
7229 MVT ValVT = VA.getValVT();
7230 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7231
7232 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7233 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7234 // For compatibility with the AIX XL compiler, the float args in the
7235 // parameter save area are initialized even if the argument is available
7236 // in register. The caller is required to initialize both the register
7237 // and memory, however, the callee can choose to expect it in either.
7238 // The memloc is dismissed here because the argument is retrieved from
7239 // the register.
7240 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7241 continue;
7242
7243 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7244 const TargetRegisterClass *RegClass = getRegClassForSVT(
7245 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7246 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7247 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7248 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7249 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7250 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7251 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7252 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7253 MachinePointerInfo(), Align(PtrByteSize));
7254 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7255 MemOps.push_back(StoreReg);
7256 }
7257
7258 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7259 unsigned StoreSize =
7260 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7261 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7262 }
7263
7264 auto HandleMemLoc = [&]() {
7265 const unsigned LocSize = LocVT.getStoreSize();
7266 const unsigned ValSize = ValVT.getStoreSize();
7267 assert((ValSize <= LocSize) &&
7268 "Object size is larger than size of MemLoc");
7269 int CurArgOffset = VA.getLocMemOffset();
7270 // Objects are right-justified because AIX is big-endian.
7271 if (LocSize > ValSize)
7272 CurArgOffset += LocSize - ValSize;
7273 // Potential tail calls could cause overwriting of argument stack slots.
7274 const bool IsImmutable =
7276 (CallConv == CallingConv::Fast));
7277 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7278 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7279 SDValue ArgValue =
7280 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7281
7282 // While the ABI specifies the argument type is (sign or zero) extended
7283 // out to register width, not all code is compliant. We truncate and
7284 // re-extend to be more forgiving of these callers when the argument type
7285 // is smaller than register width.
7286 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7287 ValVT.isInteger() &&
7288 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7289 // It is possible to have either real integer values
7290 // or integers that were not originally integers.
7291 // In the latter case, these could have came from structs,
7292 // and these integers would not have an extend on the parameter.
7293 // Since these types of integers do not have an extend specified
7294 // in the first place, the type of extend that we do should not matter.
7295 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7296 ? MVT::i8
7297 : ArgVT;
7298 SDValue ArgValueTrunc =
7299 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7300 SDValue ArgValueExt =
7301 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7302 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7303 InVals.push_back(ArgValueExt);
7304 } else {
7305 InVals.push_back(ArgValue);
7306 }
7307 };
7308
7309 // Vector arguments to VaArg functions are passed both on the stack, and
7310 // in any available GPRs. Load the value from the stack and add the GPRs
7311 // as live ins.
7312 if (VA.isMemLoc() && VA.needsCustom()) {
7313 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7314 assert(isVarArg && "Only use custom memloc for vararg.");
7315 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7316 // matching custom RegLocs.
7317 const unsigned OriginalValNo = VA.getValNo();
7318 (void)OriginalValNo;
7319
7320 auto HandleCustomVecRegLoc = [&]() {
7321 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7322 "Missing custom RegLoc.");
7323 VA = ArgLocs[I++];
7324 assert(VA.getValVT().isVector() &&
7325 "Unexpected Val type for custom RegLoc.");
7326 assert(VA.getValNo() == OriginalValNo &&
7327 "ValNo mismatch between custom MemLoc and RegLoc.");
7329 MF.addLiveIn(VA.getLocReg(),
7330 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7331 Subtarget.hasVSX()));
7332 };
7333
7334 HandleMemLoc();
7335 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7336 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7337 // R10.
7338 HandleCustomVecRegLoc();
7339 HandleCustomVecRegLoc();
7340
7341 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7342 // we passed the vector in R5, R6, R7 and R8.
7343 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7344 assert(!IsPPC64 &&
7345 "Only 2 custom RegLocs expected for 64-bit codegen.");
7346 HandleCustomVecRegLoc();
7347 HandleCustomVecRegLoc();
7348 }
7349
7350 continue;
7351 }
7352
7353 if (VA.isRegLoc()) {
7354 if (VA.getValVT().isScalarInteger())
7356 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7357 switch (VA.getValVT().SimpleTy) {
7358 default:
7359 report_fatal_error("Unhandled value type for argument.");
7360 case MVT::f32:
7362 break;
7363 case MVT::f64:
7365 break;
7366 }
7367 } else if (VA.getValVT().isVector()) {
7368 switch (VA.getValVT().SimpleTy) {
7369 default:
7370 report_fatal_error("Unhandled value type for argument.");
7371 case MVT::v16i8:
7373 break;
7374 case MVT::v8i16:
7376 break;
7377 case MVT::v4i32:
7378 case MVT::v2i64:
7379 case MVT::v1i128:
7381 break;
7382 case MVT::v4f32:
7383 case MVT::v2f64:
7385 break;
7386 }
7387 }
7388 }
7389
7390 if (Flags.isByVal() && VA.isMemLoc()) {
7391 const unsigned Size =
7392 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7393 PtrByteSize);
7394 const int FI = MF.getFrameInfo().CreateFixedObject(
7395 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7396 /* IsAliased */ true);
7397 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7398 InVals.push_back(FIN);
7399
7400 continue;
7401 }
7402
7403 if (Flags.isByVal()) {
7404 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7405
7406 const MCPhysReg ArgReg = VA.getLocReg();
7407 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7408
7409 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7410 const int FI = MF.getFrameInfo().CreateFixedObject(
7411 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7412 /* IsAliased */ true);
7413 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7414 InVals.push_back(FIN);
7415
7416 // Add live ins for all the RegLocs for the same ByVal.
7417 const TargetRegisterClass *RegClass =
7418 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7419
7420 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7421 unsigned Offset) {
7422 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7423 // Since the callers side has left justified the aggregate in the
7424 // register, we can simply store the entire register into the stack
7425 // slot.
7426 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7427 // The store to the fixedstack object is needed becuase accessing a
7428 // field of the ByVal will use a gep and load. Ideally we will optimize
7429 // to extracting the value from the register directly, and elide the
7430 // stores when the arguments address is not taken, but that will need to
7431 // be future work.
7432 SDValue Store = DAG.getStore(
7433 CopyFrom.getValue(1), dl, CopyFrom,
7436
7437 MemOps.push_back(Store);
7438 };
7439
7440 unsigned Offset = 0;
7441 HandleRegLoc(VA.getLocReg(), Offset);
7442 Offset += PtrByteSize;
7443 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7444 Offset += PtrByteSize) {
7445 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7446 "RegLocs should be for ByVal argument.");
7447
7448 const CCValAssign RL = ArgLocs[I++];
7449 HandleRegLoc(RL.getLocReg(), Offset);
7451 }
7452
7453 if (Offset != StackSize) {
7454 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7455 "Expected MemLoc for remaining bytes.");
7456 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7457 // Consume the MemLoc.The InVal has already been emitted, so nothing
7458 // more needs to be done.
7459 ++I;
7460 }
7461
7462 continue;
7463 }
7464
7465 if (VA.isRegLoc() && !VA.needsCustom()) {
7466 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7467 Register VReg =
7468 MF.addLiveIn(VA.getLocReg(),
7469 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7470 Subtarget.hasVSX()));
7471 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7472 if (ValVT.isScalarInteger() &&
7473 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7474 ArgValue =
7475 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7476 }
7477 InVals.push_back(ArgValue);
7478 continue;
7479 }
7480 if (VA.isMemLoc()) {
7481 HandleMemLoc();
7482 continue;
7483 }
7484 }
7485
7486 // On AIX a minimum of 8 words is saved to the parameter save area.
7487 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7488 // Area that is at least reserved in the caller of this function.
7489 unsigned CallerReservedArea = std::max<unsigned>(
7490 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7491
7492 // Set the size that is at least reserved in caller of this function. Tail
7493 // call optimized function's reserved stack space needs to be aligned so
7494 // that taking the difference between two stack areas will result in an
7495 // aligned stack.
7496 CallerReservedArea =
7497 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7498 FuncInfo->setMinReservedArea(CallerReservedArea);
7499
7500 if (isVarArg) {
7501 int VAListIndex = 0;
7502 // If any of the optional arguments are passed in register then the fixed
7503 // stack object we spill into is not immutable. Create a fixed stack object
7504 // that overlaps the remainder of the parameter save area.
7505 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7506 unsigned FixedStackSize =
7507 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7508 VAListIndex =
7509 MFI.CreateFixedObject(FixedStackSize, CCInfo.getStackSize(),
7510 /* IsImmutable */ false, /* IsAliased */ true);
7511 } else {
7512 // All the arguments passed through ellipses are on the stack. Create a
7513 // dummy fixed stack object the same size as a pointer since we don't
7514 // know the actual size.
7515 VAListIndex =
7516 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(),
7517 /* IsImmutable */ true, /* IsAliased */ true);
7518 }
7519
7520 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7521 SDValue FIN = DAG.getFrameIndex(VAListIndex, PtrVT);
7522
7523 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7524 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7525
7526 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7527 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7528 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7529
7530 // The fixed integer arguments of a variadic function are stored to the
7531 // VarArgsFrameIndex on the stack so that they may be loaded by
7532 // dereferencing the result of va_next.
7533 for (unsigned
7534 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7535 Offset = 0;
7536 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7537
7538 const Register VReg =
7539 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7540 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7541
7542 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7543 MachinePointerInfo MPI =
7544 MachinePointerInfo::getFixedStack(MF, VAListIndex, Offset);
7545 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MPI);
7546 MemOps.push_back(Store);
7547 // Increment the address for the next argument to store.
7548 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7549 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7550 }
7551 }
7552
7553 if (!MemOps.empty())
7554 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7555
7556 return Chain;
7557}
7558
7559SDValue PPCTargetLowering::LowerCall_AIX(
7560 SDValue Chain, SDValue Callee, CallFlags CFlags,
7562 const SmallVectorImpl<SDValue> &OutVals,
7563 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7565 const CallBase *CB) const {
7566 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7567 // AIX ABI stack frame layout.
7568
7569 assert((CFlags.CallConv == CallingConv::C ||
7570 CFlags.CallConv == CallingConv::Cold ||
7571 CFlags.CallConv == CallingConv::Fast) &&
7572 "Unexpected calling convention!");
7573
7574 if (CFlags.IsPatchPoint)
7575 report_fatal_error("This call type is unimplemented on AIX.");
7576
7577 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7578
7579 MachineFunction &MF = DAG.getMachineFunction();
7581 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7582 *DAG.getContext());
7583
7584 // Reserve space for the linkage save area (LSA) on the stack.
7585 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7586 // [SP][CR][LR][2 x reserved][TOC].
7587 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7588 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7589 const bool IsPPC64 = Subtarget.isPPC64();
7590 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7591 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7592 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7593 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7594
7595 // The prolog code of the callee may store up to 8 GPR argument registers to
7596 // the stack, allowing va_start to index over them in memory if the callee
7597 // is variadic.
7598 // Because we cannot tell if this is needed on the caller side, we have to
7599 // conservatively assume that it is needed. As such, make sure we have at
7600 // least enough stack space for the caller to store the 8 GPRs.
7601 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7602 const unsigned NumBytes = std::max<unsigned>(
7603 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7604
7605 // Adjust the stack pointer for the new arguments...
7606 // These operations are automatically eliminated by the prolog/epilog pass.
7607 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7608 SDValue CallSeqStart = Chain;
7609
7611 SmallVector<SDValue, 8> MemOpChains;
7612
7613 // Set up a copy of the stack pointer for loading and storing any
7614 // arguments that may not fit in the registers available for argument
7615 // passing.
7616 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7617 : DAG.getRegister(PPC::R1, MVT::i32);
7618
7619 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7620 const unsigned ValNo = ArgLocs[I].getValNo();
7621 SDValue Arg = OutVals[ValNo];
7622 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7623
7624 if (Flags.isByVal()) {
7625 const unsigned ByValSize = Flags.getByValSize();
7626
7627 // Nothing to do for zero-sized ByVals on the caller side.
7628 if (!ByValSize) {
7629 ++I;
7630 continue;
7631 }
7632
7633 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7634 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7635 (LoadOffset != 0)
7636 ? DAG.getObjectPtrOffset(
7637 dl, Arg, TypeSize::getFixed(LoadOffset))
7638 : Arg,
7639 MachinePointerInfo(), VT);
7640 };
7641
7642 unsigned LoadOffset = 0;
7643
7644 // Initialize registers, which are fully occupied by the by-val argument.
7645 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7646 SDValue Load = GetLoad(PtrVT, LoadOffset);
7647 MemOpChains.push_back(Load.getValue(1));
7648 LoadOffset += PtrByteSize;
7649 const CCValAssign &ByValVA = ArgLocs[I++];
7650 assert(ByValVA.getValNo() == ValNo &&
7651 "Unexpected location for pass-by-value argument.");
7652 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7653 }
7654
7655 if (LoadOffset == ByValSize)
7656 continue;
7657
7658 // There must be one more loc to handle the remainder.
7659 assert(ArgLocs[I].getValNo() == ValNo &&
7660 "Expected additional location for by-value argument.");
7661
7662 if (ArgLocs[I].isMemLoc()) {
7663 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7664 const CCValAssign &ByValVA = ArgLocs[I++];
7665 ISD::ArgFlagsTy MemcpyFlags = Flags;
7666 // Only memcpy the bytes that don't pass in register.
7667 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7668 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7669 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7670 dl, Arg, TypeSize::getFixed(LoadOffset))
7671 : Arg,
7673 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7674 CallSeqStart, MemcpyFlags, DAG, dl);
7675 continue;
7676 }
7677
7678 // Initialize the final register residue.
7679 // Any residue that occupies the final by-val arg register must be
7680 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7681 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7682 // 2 and 1 byte loads.
7683 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7684 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7685 "Unexpected register residue for by-value argument.");
7686 SDValue ResidueVal;
7687 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7688 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7689 const MVT VT =
7690 N == 1 ? MVT::i8
7691 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7692 SDValue Load = GetLoad(VT, LoadOffset);
7693 MemOpChains.push_back(Load.getValue(1));
7694 LoadOffset += N;
7695 Bytes += N;
7696
7697 // By-val arguments are passed left-justfied in register.
7698 // Every load here needs to be shifted, otherwise a full register load
7699 // should have been used.
7700 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7701 "Unexpected load emitted during handling of pass-by-value "
7702 "argument.");
7703 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7704 EVT ShiftAmountTy =
7705 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7706 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7707 SDValue ShiftedLoad =
7708 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7709 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7710 ShiftedLoad)
7711 : ShiftedLoad;
7712 }
7713
7714 const CCValAssign &ByValVA = ArgLocs[I++];
7715 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7716 continue;
7717 }
7718
7719 CCValAssign &VA = ArgLocs[I++];
7720 const MVT LocVT = VA.getLocVT();
7721 const MVT ValVT = VA.getValVT();
7722
7723 switch (VA.getLocInfo()) {
7724 default:
7725 report_fatal_error("Unexpected argument extension type.");
7726 case CCValAssign::Full:
7727 break;
7728 case CCValAssign::ZExt:
7729 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7730 break;
7731 case CCValAssign::SExt:
7732 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7733 break;
7734 }
7735
7736 if (VA.isRegLoc() && !VA.needsCustom()) {
7737 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7738 continue;
7739 }
7740
7741 // Vector arguments passed to VarArg functions need custom handling when
7742 // they are passed (at least partially) in GPRs.
7743 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7744 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7745 // Store value to its stack slot.
7746 SDValue PtrOff =
7747 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7748 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7749 SDValue Store =
7750 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7751 MemOpChains.push_back(Store);
7752 const unsigned OriginalValNo = VA.getValNo();
7753 // Then load the GPRs from the stack
7754 unsigned LoadOffset = 0;
7755 auto HandleCustomVecRegLoc = [&]() {
7756 assert(I != E && "Unexpected end of CCvalAssigns.");
7757 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7758 "Expected custom RegLoc.");
7759 CCValAssign RegVA = ArgLocs[I++];
7760 assert(RegVA.getValNo() == OriginalValNo &&
7761 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7762 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7763 DAG.getConstant(LoadOffset, dl, PtrVT));
7764 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7765 MemOpChains.push_back(Load.getValue(1));
7766 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7767 LoadOffset += PtrByteSize;
7768 };
7769
7770 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7771 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7772 // R10.
7773 HandleCustomVecRegLoc();
7774 HandleCustomVecRegLoc();
7775
7776 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7777 ArgLocs[I].getValNo() == OriginalValNo) {
7778 assert(!IsPPC64 &&
7779 "Only 2 custom RegLocs expected for 64-bit codegen.");
7780 HandleCustomVecRegLoc();
7781 HandleCustomVecRegLoc();
7782 }
7783
7784 continue;
7785 }
7786
7787 if (VA.isMemLoc()) {
7788 SDValue PtrOff =
7789 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7790 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7791 MemOpChains.push_back(
7792 DAG.getStore(Chain, dl, Arg, PtrOff,
7794 Subtarget.getFrameLowering()->getStackAlign()));
7795
7796 continue;
7797 }
7798
7799 if (!ValVT.isFloatingPoint())
7801 "Unexpected register handling for calling convention.");
7802
7803 // Custom handling is used for GPR initializations for vararg float
7804 // arguments.
7805 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7806 LocVT.isInteger() &&
7807 "Custom register handling only expected for VarArg.");
7808
7809 SDValue ArgAsInt =
7810 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7811
7812 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7813 // f32 in 32-bit GPR
7814 // f64 in 64-bit GPR
7815 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7816 else if (Arg.getValueType().getFixedSizeInBits() <
7817 LocVT.getFixedSizeInBits())
7818 // f32 in 64-bit GPR.
7819 RegsToPass.push_back(std::make_pair(
7820 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7821 else {
7822 // f64 in two 32-bit GPRs
7823 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7824 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7825 "Unexpected custom register for argument!");
7826 CCValAssign &GPR1 = VA;
7827 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7828 DAG.getConstant(32, dl, MVT::i8));
7829 RegsToPass.push_back(std::make_pair(
7830 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7831
7832 if (I != E) {
7833 // If only 1 GPR was available, there will only be one custom GPR and
7834 // the argument will also pass in memory.
7835 CCValAssign &PeekArg = ArgLocs[I];
7836 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7837 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7838 CCValAssign &GPR2 = ArgLocs[I++];
7839 RegsToPass.push_back(std::make_pair(
7840 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7841 }
7842 }
7843 }
7844 }
7845
7846 if (!MemOpChains.empty())
7847 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7848
7849 // For indirect calls, we need to save the TOC base to the stack for
7850 // restoration after the call.
7851 if (CFlags.IsIndirect && !Subtarget.usePointerGlueHelper()) {
7852 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7853 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7854 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7855 const MVT PtrVT = Subtarget.getScalarIntVT();
7856 const unsigned TOCSaveOffset =
7857 Subtarget.getFrameLowering()->getTOCSaveOffset();
7858
7859 setUsesTOCBasePtr(DAG);
7860 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7861 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7862 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7863 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7864 Chain = DAG.getStore(
7865 Val.getValue(1), dl, Val, AddPtr,
7866 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7867 }
7868
7869 // Build a sequence of copy-to-reg nodes chained together with token chain
7870 // and flag operands which copy the outgoing args into the appropriate regs.
7871 SDValue InGlue;
7872 for (auto Reg : RegsToPass) {
7873 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7874 InGlue = Chain.getValue(1);
7875 }
7876
7877 const int SPDiff = 0;
7878 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7879 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7880}
7881
7882bool
7883PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7884 MachineFunction &MF, bool isVarArg,
7887 const Type *RetTy) const {
7889 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7890 return CCInfo.CheckReturn(
7891 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7893 : RetCC_PPC);
7894}
7895
7896SDValue
7897PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7898 bool isVarArg,
7900 const SmallVectorImpl<SDValue> &OutVals,
7901 const SDLoc &dl, SelectionDAG &DAG) const {
7903 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7904 *DAG.getContext());
7905 CCInfo.AnalyzeReturn(Outs,
7906 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7908 : RetCC_PPC);
7909
7910 SDValue Glue;
7911 SmallVector<SDValue, 4> RetOps(1, Chain);
7912
7913 // Copy the result values into the output registers.
7914 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7915 CCValAssign &VA = RVLocs[i];
7916 assert(VA.isRegLoc() && "Can only return in registers!");
7917
7918 SDValue Arg = OutVals[RealResIdx];
7919
7920 switch (VA.getLocInfo()) {
7921 default: llvm_unreachable("Unknown loc info!");
7922 case CCValAssign::Full: break;
7923 case CCValAssign::AExt:
7924 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7925 break;
7926 case CCValAssign::ZExt:
7927 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7928 break;
7929 case CCValAssign::SExt:
7930 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7931 break;
7932 }
7933 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7934 bool isLittleEndian = Subtarget.isLittleEndian();
7935 // Legalize ret f64 -> ret 2 x i32.
7936 SDValue SVal =
7937 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7938 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7939 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7940 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7941 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7942 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7943 Glue = Chain.getValue(1);
7944 VA = RVLocs[++i]; // skip ahead to next loc
7945 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7946 } else
7947 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7948 Glue = Chain.getValue(1);
7949 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7950 }
7951
7952 RetOps[0] = Chain; // Update chain.
7953
7954 // Add the glue if we have it.
7955 if (Glue.getNode())
7956 RetOps.push_back(Glue);
7957
7958 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7959}
7960
7961SDValue
7962PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7963 SelectionDAG &DAG) const {
7964 SDLoc dl(Op);
7965
7966 // Get the correct type for integers.
7967 EVT IntVT = Op.getValueType();
7968
7969 // Get the inputs.
7970 SDValue Chain = Op.getOperand(0);
7971 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7972 // Build a DYNAREAOFFSET node.
7973 SDValue Ops[2] = {Chain, FPSIdx};
7974 SDVTList VTs = DAG.getVTList(IntVT);
7975 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7976}
7977
7978SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7979 SelectionDAG &DAG) const {
7980 // When we pop the dynamic allocation we need to restore the SP link.
7981 SDLoc dl(Op);
7982
7983 // Get the correct type for pointers.
7984 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7985
7986 // Construct the stack pointer operand.
7987 bool isPPC64 = Subtarget.isPPC64();
7988 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7989 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7990
7991 // Get the operands for the STACKRESTORE.
7992 SDValue Chain = Op.getOperand(0);
7993 SDValue SaveSP = Op.getOperand(1);
7994
7995 // Load the old link SP.
7996 SDValue LoadLinkSP =
7997 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7998
7999 // Restore the stack pointer.
8000 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
8001
8002 // Store the old link SP.
8003 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
8004}
8005
8006SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
8007 MachineFunction &MF = DAG.getMachineFunction();
8008 bool isPPC64 = Subtarget.isPPC64();
8009 EVT PtrVT = getPointerTy(MF.getDataLayout());
8010
8011 // Get current frame pointer save index. The users of this index will be
8012 // primarily DYNALLOC instructions.
8013 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8014 int RASI = FI->getReturnAddrSaveIndex();
8015
8016 // If the frame pointer save index hasn't been defined yet.
8017 if (!RASI) {
8018 // Find out what the fix offset of the frame pointer save area.
8019 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8020 // Allocate the frame index for frame pointer save area.
8021 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8022 // Save the result.
8023 FI->setReturnAddrSaveIndex(RASI);
8024 }
8025 return DAG.getFrameIndex(RASI, PtrVT);
8026}
8027
8028SDValue
8029PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8030 MachineFunction &MF = DAG.getMachineFunction();
8031 bool isPPC64 = Subtarget.isPPC64();
8032 EVT PtrVT = getPointerTy(MF.getDataLayout());
8033
8034 // Get current frame pointer save index. The users of this index will be
8035 // primarily DYNALLOC instructions.
8036 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8037 int FPSI = FI->getFramePointerSaveIndex();
8038
8039 // If the frame pointer save index hasn't been defined yet.
8040 if (!FPSI) {
8041 // Find out what the fix offset of the frame pointer save area.
8042 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8043 // Allocate the frame index for frame pointer save area.
8044 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8045 // Save the result.
8046 FI->setFramePointerSaveIndex(FPSI);
8047 }
8048 return DAG.getFrameIndex(FPSI, PtrVT);
8049}
8050
8051SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8052 SelectionDAG &DAG) const {
8053 MachineFunction &MF = DAG.getMachineFunction();
8054 // Get the inputs.
8055 SDValue Chain = Op.getOperand(0);
8056 SDValue Size = Op.getOperand(1);
8057 SDLoc dl(Op);
8058
8059 // Get the correct type for pointers.
8060 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8061 // Negate the size.
8062 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8063 DAG.getConstant(0, dl, PtrVT), Size);
8064 // Construct a node for the frame pointer save index.
8065 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8066 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8067 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8068 if (hasInlineStackProbe(MF))
8069 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8070 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8071}
8072
8073SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8074 SelectionDAG &DAG) const {
8075 MachineFunction &MF = DAG.getMachineFunction();
8076
8077 bool isPPC64 = Subtarget.isPPC64();
8078 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8079
8080 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8081 return DAG.getFrameIndex(FI, PtrVT);
8082}
8083
8084SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8085 SelectionDAG &DAG) const {
8086 SDLoc DL(Op);
8087 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8088 DAG.getVTList(MVT::i32, MVT::Other),
8089 Op.getOperand(0), Op.getOperand(1));
8090}
8091
8092SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8093 SelectionDAG &DAG) const {
8094 SDLoc DL(Op);
8095 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8096 Op.getOperand(0), Op.getOperand(1));
8097}
8098
8099SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8100 if (Op.getValueType().isVector())
8101 return LowerVectorLoad(Op, DAG);
8102
8103 assert(Op.getValueType() == MVT::i1 &&
8104 "Custom lowering only for i1 loads");
8105
8106 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8107
8108 SDLoc dl(Op);
8109 LoadSDNode *LD = cast<LoadSDNode>(Op);
8110
8111 SDValue Chain = LD->getChain();
8112 SDValue BasePtr = LD->getBasePtr();
8113 MachineMemOperand *MMO = LD->getMemOperand();
8114
8115 SDValue NewLD =
8116 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8117 BasePtr, MVT::i8, MMO);
8118 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8119
8120 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8121 return DAG.getMergeValues(Ops, dl);
8122}
8123
8124SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8125 if (Op.getOperand(1).getValueType().isVector())
8126 return LowerVectorStore(Op, DAG);
8127
8128 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8129 "Custom lowering only for i1 stores");
8130
8131 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8132
8133 SDLoc dl(Op);
8134 StoreSDNode *ST = cast<StoreSDNode>(Op);
8135
8136 SDValue Chain = ST->getChain();
8137 SDValue BasePtr = ST->getBasePtr();
8138 SDValue Value = ST->getValue();
8139 MachineMemOperand *MMO = ST->getMemOperand();
8140
8142 Value);
8143 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8144}
8145
8146// FIXME: Remove this once the ANDI glue bug is fixed:
8147SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8148 assert(Op.getValueType() == MVT::i1 &&
8149 "Custom lowering only for i1 results");
8150
8151 SDLoc DL(Op);
8152 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8153}
8154
8155SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8156 SelectionDAG &DAG) const {
8157
8158 // Implements a vector truncate that fits in a vector register as a shuffle.
8159 // We want to legalize vector truncates down to where the source fits in
8160 // a vector register (and target is therefore smaller than vector register
8161 // size). At that point legalization will try to custom lower the sub-legal
8162 // result and get here - where we can contain the truncate as a single target
8163 // operation.
8164
8165 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8166 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8167 //
8168 // We will implement it for big-endian ordering as this (where x denotes
8169 // undefined):
8170 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8171 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8172 //
8173 // The same operation in little-endian ordering will be:
8174 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8175 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8176
8177 EVT TrgVT = Op.getValueType();
8178 assert(TrgVT.isVector() && "Vector type expected.");
8179 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8180 EVT EltVT = TrgVT.getVectorElementType();
8181 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8182 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8184 return SDValue();
8185
8186 SDValue N1 = Op.getOperand(0);
8187 EVT SrcVT = N1.getValueType();
8188 unsigned SrcSize = SrcVT.getSizeInBits();
8189 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8192 return SDValue();
8193 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8194 return SDValue();
8195
8196 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8197 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8198
8199 SDLoc DL(Op);
8200 SDValue Op1, Op2;
8201 if (SrcSize == 256) {
8202 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8203 EVT SplitVT =
8205 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8206 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8207 DAG.getConstant(0, DL, VecIdxTy));
8208 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8209 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8210 }
8211 else {
8212 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8213 Op2 = DAG.getUNDEF(WideVT);
8214 }
8215
8216 // First list the elements we want to keep.
8217 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8218 SmallVector<int, 16> ShuffV;
8219 if (Subtarget.isLittleEndian())
8220 for (unsigned i = 0; i < TrgNumElts; ++i)
8221 ShuffV.push_back(i * SizeMult);
8222 else
8223 for (unsigned i = 1; i <= TrgNumElts; ++i)
8224 ShuffV.push_back(i * SizeMult - 1);
8225
8226 // Populate the remaining elements with undefs.
8227 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8228 // ShuffV.push_back(i + WideNumElts);
8229 ShuffV.push_back(WideNumElts + 1);
8230
8231 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8232 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8233 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8234}
8235
8236/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8237/// possible.
8238SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8239 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8240 EVT ResVT = Op.getValueType();
8241 EVT CmpVT = Op.getOperand(0).getValueType();
8242 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8243 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8244 SDLoc dl(Op);
8245
8246 // Without power9-vector, we don't have native instruction for f128 comparison.
8247 // Following transformation to libcall is needed for setcc:
8248 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8249 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8250 SDValue Z = DAG.getSetCC(
8251 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8252 LHS, RHS, CC);
8253 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8254 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8255 }
8256
8257 // Not FP, or using SPE? Not a fsel.
8258 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8259 Subtarget.hasSPE())
8260 return Op;
8261
8262 SDNodeFlags Flags = Op.getNode()->getFlags();
8263
8264 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8265 // presence of infinities.
8266 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8267 switch (CC) {
8268 default:
8269 break;
8270 case ISD::SETOGT:
8271 case ISD::SETGT:
8272 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8273 case ISD::SETOLT:
8274 case ISD::SETLT:
8275 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8276 }
8277 }
8278
8279 // We might be able to do better than this under some circumstances, but in
8280 // general, fsel-based lowering of select is a finite-math-only optimization.
8281 // For more information, see section F.3 of the 2.06 ISA specification.
8282 // With ISA 3.0
8283 if (!Flags.hasNoInfs() || !Flags.hasNoNaNs() || ResVT == MVT::f128)
8284 return Op;
8285
8286 // If the RHS of the comparison is a 0.0, we don't need to do the
8287 // subtraction at all.
8288 SDValue Sel1;
8290 switch (CC) {
8291 default: break; // SETUO etc aren't handled by fsel.
8292 case ISD::SETNE:
8293 std::swap(TV, FV);
8294 [[fallthrough]];
8295 case ISD::SETEQ:
8296 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8297 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8298 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8299 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8300 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8301 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8302 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8303 case ISD::SETULT:
8304 case ISD::SETLT:
8305 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8306 [[fallthrough]];
8307 case ISD::SETOGE:
8308 case ISD::SETGE:
8309 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8310 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8311 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8312 case ISD::SETUGT:
8313 case ISD::SETGT:
8314 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8315 [[fallthrough]];
8316 case ISD::SETOLE:
8317 case ISD::SETLE:
8318 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8319 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8320 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8321 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8322 }
8323
8324 SDValue Cmp;
8325 switch (CC) {
8326 default: break; // SETUO etc aren't handled by fsel.
8327 case ISD::SETNE:
8328 std::swap(TV, FV);
8329 [[fallthrough]];
8330 case ISD::SETEQ:
8331 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8332 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8333 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8334 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8335 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8336 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8337 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8338 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8339 case ISD::SETULT:
8340 case ISD::SETLT:
8341 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8342 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8343 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8344 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8345 case ISD::SETOGE:
8346 case ISD::SETGE:
8347 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8348 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8349 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8350 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8351 case ISD::SETUGT:
8352 case ISD::SETGT:
8353 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8354 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8355 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8356 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8357 case ISD::SETOLE:
8358 case ISD::SETLE:
8359 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8360 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8361 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8362 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8363 }
8364 return Op;
8365}
8366
8367static unsigned getPPCStrictOpcode(unsigned Opc) {
8368 switch (Opc) {
8369 default:
8370 llvm_unreachable("No strict version of this opcode!");
8371 case PPCISD::FCTIDZ:
8372 return PPCISD::STRICT_FCTIDZ;
8373 case PPCISD::FCTIWZ:
8374 return PPCISD::STRICT_FCTIWZ;
8375 case PPCISD::FCTIDUZ:
8376 return PPCISD::STRICT_FCTIDUZ;
8377 case PPCISD::FCTIWUZ:
8378 return PPCISD::STRICT_FCTIWUZ;
8379 case PPCISD::FCFID:
8380 return PPCISD::STRICT_FCFID;
8381 case PPCISD::FCFIDU:
8382 return PPCISD::STRICT_FCFIDU;
8383 case PPCISD::FCFIDS:
8384 return PPCISD::STRICT_FCFIDS;
8385 case PPCISD::FCFIDUS:
8386 return PPCISD::STRICT_FCFIDUS;
8387 }
8388}
8389
8391 const PPCSubtarget &Subtarget) {
8392 SDLoc dl(Op);
8393 bool IsStrict = Op->isStrictFPOpcode();
8394 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8395 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8396
8397 // TODO: Any other flags to propagate?
8398 SDNodeFlags Flags;
8399 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8400
8401 // For strict nodes, source is the second operand.
8402 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8403 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8404 MVT DestTy = Op.getSimpleValueType();
8405 assert(Src.getValueType().isFloatingPoint() &&
8406 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8407 DestTy == MVT::i64) &&
8408 "Invalid FP_TO_INT types");
8409 if (Src.getValueType() == MVT::f32) {
8410 if (IsStrict) {
8411 Src =
8413 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8414 Chain = Src.getValue(1);
8415 } else
8416 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8417 }
8418 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8419 DestTy = Subtarget.getScalarIntVT();
8420 unsigned Opc = ISD::DELETED_NODE;
8421 switch (DestTy.SimpleTy) {
8422 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8423 case MVT::i32:
8424 Opc = IsSigned ? PPCISD::FCTIWZ
8425 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8426 break;
8427 case MVT::i64:
8428 assert((IsSigned || Subtarget.hasFPCVT()) &&
8429 "i64 FP_TO_UINT is supported only with FPCVT");
8430 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8431 }
8432 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8433 SDValue Conv;
8434 if (IsStrict) {
8436 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8437 Flags);
8438 } else {
8439 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8440 }
8441 return Conv;
8442}
8443
8444void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8445 SelectionDAG &DAG,
8446 const SDLoc &dl) const {
8447 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8448 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8449 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8450 bool IsStrict = Op->isStrictFPOpcode();
8451
8452 // Convert the FP value to an int value through memory.
8453 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8454 (IsSigned || Subtarget.hasFPCVT());
8455 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8456 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8457 MachinePointerInfo MPI =
8459
8460 // Emit a store to the stack slot.
8461 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8462 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8463 if (i32Stack) {
8464 MachineFunction &MF = DAG.getMachineFunction();
8465 Alignment = Align(4);
8466 MachineMemOperand *MMO =
8467 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8468 SDValue Ops[] = { Chain, Tmp, FIPtr };
8469 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8470 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8471 } else
8472 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8473
8474 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8475 // add in a bias on big endian.
8476 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8477 !Subtarget.isLittleEndian()) {
8478 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8479 DAG.getConstant(4, dl, FIPtr.getValueType()));
8480 MPI = MPI.getWithOffset(4);
8481 }
8482
8483 RLI.Chain = Chain;
8484 RLI.Ptr = FIPtr;
8485 RLI.MPI = MPI;
8486 RLI.Alignment = Alignment;
8487}
8488
8489/// Custom lowers floating point to integer conversions to use
8490/// the direct move instructions available in ISA 2.07 to avoid the
8491/// need for load/store combinations.
8492SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8493 SelectionDAG &DAG,
8494 const SDLoc &dl) const {
8495 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8496 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8497 if (Op->isStrictFPOpcode())
8498 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8499 else
8500 return Mov;
8501}
8502
8503SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8504 const SDLoc &dl) const {
8505 bool IsStrict = Op->isStrictFPOpcode();
8506 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8507 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8508 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8509 EVT SrcVT = Src.getValueType();
8510 EVT DstVT = Op.getValueType();
8511
8512 // FP to INT conversions are legal for f128.
8513 if (SrcVT == MVT::f128)
8514 return Subtarget.hasP9Vector() ? Op : SDValue();
8515
8516 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8517 // PPC (the libcall is not available).
8518 if (SrcVT == MVT::ppcf128) {
8519 if (DstVT == MVT::i32) {
8520 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8521 // set other fast-math flags to FP operations in both strict and
8522 // non-strict cases. (FP_TO_SINT, FSUB)
8523 SDNodeFlags Flags;
8524 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8525
8526 if (IsSigned) {
8527 SDValue Lo, Hi;
8528 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8529
8530 // Add the two halves of the long double in round-to-zero mode, and use
8531 // a smaller FP_TO_SINT.
8532 if (IsStrict) {
8533 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8534 DAG.getVTList(MVT::f64, MVT::Other),
8535 {Op.getOperand(0), Lo, Hi}, Flags);
8536 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8537 DAG.getVTList(MVT::i32, MVT::Other),
8538 {Res.getValue(1), Res}, Flags);
8539 } else {
8540 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8541 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8542 }
8543 } else {
8544 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8545 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8546 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8547 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8548 if (IsStrict) {
8549 // Sel = Src < 0x80000000
8550 // FltOfs = select Sel, 0.0, 0x80000000
8551 // IntOfs = select Sel, 0, 0x80000000
8552 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8553 SDValue Chain = Op.getOperand(0);
8554 EVT SetCCVT =
8555 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8556 EVT DstSetCCVT =
8557 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8558 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8559 Chain, true);
8560 Chain = Sel.getValue(1);
8561
8562 SDValue FltOfs = DAG.getSelect(
8563 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8564 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8565
8566 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8567 DAG.getVTList(SrcVT, MVT::Other),
8568 {Chain, Src, FltOfs}, Flags);
8569 Chain = Val.getValue(1);
8570 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8571 DAG.getVTList(DstVT, MVT::Other),
8572 {Chain, Val}, Flags);
8573 Chain = SInt.getValue(1);
8574 SDValue IntOfs = DAG.getSelect(
8575 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8576 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8577 return DAG.getMergeValues({Result, Chain}, dl);
8578 } else {
8579 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8580 // FIXME: generated code sucks.
8581 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8582 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8583 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8584 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8585 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8586 }
8587 }
8588 }
8589
8590 return SDValue();
8591 }
8592
8593 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8594 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8595
8596 ReuseLoadInfo RLI;
8597 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8598
8599 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8600 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8601}
8602
8603// We're trying to insert a regular store, S, and then a load, L. If the
8604// incoming value, O, is a load, we might just be able to have our load use the
8605// address used by O. However, we don't know if anything else will store to
8606// that address before we can load from it. To prevent this situation, we need
8607// to insert our load, L, into the chain as a peer of O. To do this, we give L
8608// the same chain operand as O, we create a token factor from the chain results
8609// of O and L, and we replace all uses of O's chain result with that token
8610// factor (this last part is handled by makeEquivalentMemoryOrdering).
8611bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8612 ReuseLoadInfo &RLI,
8613 SelectionDAG &DAG,
8614 ISD::LoadExtType ET) const {
8615 // Conservatively skip reusing for constrained FP nodes.
8616 if (Op->isStrictFPOpcode())
8617 return false;
8618
8619 SDLoc dl(Op);
8620 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8621 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8622 if (ET == ISD::NON_EXTLOAD &&
8623 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8624 isOperationLegalOrCustom(Op.getOpcode(),
8625 Op.getOperand(0).getValueType())) {
8626
8627 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8628 return true;
8629 }
8630
8631 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8632 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8633 LD->isNonTemporal())
8634 return false;
8635 if (LD->getMemoryVT() != MemVT)
8636 return false;
8637
8638 // If the result of the load is an illegal type, then we can't build a
8639 // valid chain for reuse since the legalised loads and token factor node that
8640 // ties the legalised loads together uses a different output chain then the
8641 // illegal load.
8642 if (!isTypeLegal(LD->getValueType(0)))
8643 return false;
8644
8645 RLI.Ptr = LD->getBasePtr();
8646 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8647 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8648 "Non-pre-inc AM on PPC?");
8649 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8650 LD->getOffset());
8651 }
8652
8653 RLI.Chain = LD->getChain();
8654 RLI.MPI = LD->getPointerInfo();
8655 RLI.IsDereferenceable = LD->isDereferenceable();
8656 RLI.IsInvariant = LD->isInvariant();
8657 RLI.Alignment = LD->getAlign();
8658 RLI.AAInfo = LD->getAAInfo();
8659 RLI.Ranges = LD->getRanges();
8660
8661 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8662 return true;
8663}
8664
8665/// Analyze profitability of direct move
8666/// prefer float load to int load plus direct move
8667/// when there is no integer use of int load
8668bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8669 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8670 if (Origin->getOpcode() != ISD::LOAD)
8671 return true;
8672
8673 // If there is no LXSIBZX/LXSIHZX, like Power8,
8674 // prefer direct move if the memory size is 1 or 2 bytes.
8675 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8676 if (!Subtarget.hasP9Vector() &&
8677 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8678 return true;
8679
8680 for (SDUse &Use : Origin->uses()) {
8681
8682 // Only look at the users of the loaded value.
8683 if (Use.getResNo() != 0)
8684 continue;
8685
8686 SDNode *User = Use.getUser();
8687 if (User->getOpcode() != ISD::SINT_TO_FP &&
8688 User->getOpcode() != ISD::UINT_TO_FP &&
8689 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8690 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8691 return true;
8692 }
8693
8694 return false;
8695}
8696
8698 const PPCSubtarget &Subtarget,
8699 SDValue Chain = SDValue()) {
8700 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8701 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8702 SDLoc dl(Op);
8703
8704 // TODO: Any other flags to propagate?
8705 SDNodeFlags Flags;
8706 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8707
8708 // If we have FCFIDS, then use it when converting to single-precision.
8709 // Otherwise, convert to double-precision and then round.
8710 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8711 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8712 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8713 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8714 if (Op->isStrictFPOpcode()) {
8715 if (!Chain)
8716 Chain = Op.getOperand(0);
8717 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8718 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8719 } else
8720 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8721}
8722
8723/// Custom lowers integer to floating point conversions to use
8724/// the direct move instructions available in ISA 2.07 to avoid the
8725/// need for load/store combinations.
8726SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8727 SelectionDAG &DAG,
8728 const SDLoc &dl) const {
8729 assert((Op.getValueType() == MVT::f32 ||
8730 Op.getValueType() == MVT::f64) &&
8731 "Invalid floating point type as target of conversion");
8732 assert(Subtarget.hasFPCVT() &&
8733 "Int to FP conversions with direct moves require FPCVT");
8734 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8735 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8736 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8737 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8738 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8739 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8740 return convertIntToFP(Op, Mov, DAG, Subtarget);
8741}
8742
8743static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8744
8745 EVT VecVT = Vec.getValueType();
8746 assert(VecVT.isVector() && "Expected a vector type.");
8747 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8748
8749 EVT EltVT = VecVT.getVectorElementType();
8750 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8751 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8752
8753 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8754 SmallVector<SDValue, 16> Ops(NumConcat);
8755 Ops[0] = Vec;
8756 SDValue UndefVec = DAG.getUNDEF(VecVT);
8757 for (unsigned i = 1; i < NumConcat; ++i)
8758 Ops[i] = UndefVec;
8759
8760 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8761}
8762
8763SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8764 const SDLoc &dl) const {
8765 bool IsStrict = Op->isStrictFPOpcode();
8766 unsigned Opc = Op.getOpcode();
8767 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8770 "Unexpected conversion type");
8771 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8772 "Supports conversions to v2f64/v4f32 only.");
8773
8774 // TODO: Any other flags to propagate?
8775 SDNodeFlags Flags;
8776 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8777
8778 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8779 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8780
8781 SDValue Wide = widenVec(DAG, Src, dl);
8782 EVT WideVT = Wide.getValueType();
8783 unsigned WideNumElts = WideVT.getVectorNumElements();
8784 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8785
8786 SmallVector<int, 16> ShuffV;
8787 for (unsigned i = 0; i < WideNumElts; ++i)
8788 ShuffV.push_back(i + WideNumElts);
8789
8790 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8791 int SaveElts = FourEltRes ? 4 : 2;
8792 if (Subtarget.isLittleEndian())
8793 for (int i = 0; i < SaveElts; i++)
8794 ShuffV[i * Stride] = i;
8795 else
8796 for (int i = 1; i <= SaveElts; i++)
8797 ShuffV[i * Stride - 1] = i - 1;
8798
8799 SDValue ShuffleSrc2 =
8800 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8801 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8802
8803 SDValue Extend;
8804 if (SignedConv) {
8805 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8806 EVT ExtVT = Src.getValueType();
8807 if (Subtarget.hasP9Altivec())
8808 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8809 IntermediateVT.getVectorNumElements());
8810
8811 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8812 DAG.getValueType(ExtVT));
8813 } else
8814 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8815
8816 if (IsStrict)
8817 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8818 {Op.getOperand(0), Extend}, Flags);
8819
8820 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8821}
8822
8823SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8824 SelectionDAG &DAG) const {
8825 SDLoc dl(Op);
8826 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8827 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8828 bool IsStrict = Op->isStrictFPOpcode();
8829 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8830 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8831
8832 // TODO: Any other flags to propagate?
8833 SDNodeFlags Flags;
8834 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8835
8836 EVT InVT = Src.getValueType();
8837 EVT OutVT = Op.getValueType();
8838 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8839 isOperationCustom(Op.getOpcode(), InVT))
8840 return LowerINT_TO_FPVector(Op, DAG, dl);
8841
8842 // Conversions to f128 are legal.
8843 if (Op.getValueType() == MVT::f128)
8844 return Subtarget.hasP9Vector() ? Op : SDValue();
8845
8846 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8847 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8848 return SDValue();
8849
8850 if (Src.getValueType() == MVT::i1) {
8851 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8852 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8853 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8854 if (IsStrict)
8855 return DAG.getMergeValues({Sel, Chain}, dl);
8856 else
8857 return Sel;
8858 }
8859
8860 // If we have direct moves, we can do all the conversion, skip the store/load
8861 // however, without FPCVT we can't do most conversions.
8862 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8863 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8864 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8865
8866 assert((IsSigned || Subtarget.hasFPCVT()) &&
8867 "UINT_TO_FP is supported only with FPCVT");
8868
8869 if (Src.getValueType() == MVT::i64) {
8870 SDValue SINT = Src;
8871 // When converting to single-precision, we actually need to convert
8872 // to double-precision first and then round to single-precision.
8873 // To avoid double-rounding effects during that operation, we have
8874 // to prepare the input operand. Bits that might be truncated when
8875 // converting to double-precision are replaced by a bit that won't
8876 // be lost at this stage, but is below the single-precision rounding
8877 // position.
8878 //
8879 // However, if afn is in effect, accept double
8880 // rounding to avoid the extra overhead.
8881 // FIXME: Currently INT_TO_FP can't support fast math flags because
8882 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8883 // false.
8884 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8885 !Op->getFlags().hasApproximateFuncs()) {
8886
8887 // Twiddle input to make sure the low 11 bits are zero. (If this
8888 // is the case, we are guaranteed the value will fit into the 53 bit
8889 // mantissa of an IEEE double-precision value without rounding.)
8890 // If any of those low 11 bits were not zero originally, make sure
8891 // bit 12 (value 2048) is set instead, so that the final rounding
8892 // to single-precision gets the correct result.
8893 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8894 SINT, DAG.getConstant(2047, dl, MVT::i64));
8895 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8896 Round, DAG.getConstant(2047, dl, MVT::i64));
8897 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8898 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8899 DAG.getSignedConstant(-2048, dl, MVT::i64));
8900
8901 // However, we cannot use that value unconditionally: if the magnitude
8902 // of the input value is small, the bit-twiddling we did above might
8903 // end up visibly changing the output. Fortunately, in that case, we
8904 // don't need to twiddle bits since the original input will convert
8905 // exactly to double-precision floating-point already. Therefore,
8906 // construct a conditional to use the original value if the top 11
8907 // bits are all sign-bit copies, and use the rounded value computed
8908 // above otherwise.
8909 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8910 SINT, DAG.getConstant(53, dl, MVT::i32));
8911 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8912 Cond, DAG.getConstant(1, dl, MVT::i64));
8913 Cond = DAG.getSetCC(
8914 dl,
8915 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8916 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8917
8918 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8919 }
8920
8921 ReuseLoadInfo RLI;
8922 SDValue Bits;
8923
8924 MachineFunction &MF = DAG.getMachineFunction();
8925 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8926 // Drop range metadata, as this metadata becomes invalid for f64 bit
8927 // reinterpretation of i64 values.
8928 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8929 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, nullptr);
8930 if (RLI.ResChain)
8931 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8932 } else if (Subtarget.hasLFIWAX() &&
8933 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8934 MachineMemOperand *MMO =
8936 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8937 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8938 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8939 DAG.getVTList(MVT::f64, MVT::Other),
8940 Ops, MVT::i32, MMO);
8941 if (RLI.ResChain)
8942 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8943 } else if (Subtarget.hasFPCVT() &&
8944 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8945 MachineMemOperand *MMO =
8947 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8948 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8949 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8950 DAG.getVTList(MVT::f64, MVT::Other),
8951 Ops, MVT::i32, MMO);
8952 if (RLI.ResChain)
8953 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8954 } else if (((Subtarget.hasLFIWAX() &&
8955 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8956 (Subtarget.hasFPCVT() &&
8957 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8958 SINT.getOperand(0).getValueType() == MVT::i32) {
8959 MachineFrameInfo &MFI = MF.getFrameInfo();
8960 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8961
8962 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8963 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8964
8965 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8967 DAG.getMachineFunction(), FrameIdx));
8968 Chain = Store;
8969
8970 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8971 "Expected an i32 store");
8972
8973 RLI.Ptr = FIdx;
8974 RLI.Chain = Chain;
8975 RLI.MPI =
8977 RLI.Alignment = Align(4);
8978
8979 MachineMemOperand *MMO =
8981 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8982 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8984 PPCISD::LFIWZX : PPCISD::LFIWAX,
8985 dl, DAG.getVTList(MVT::f64, MVT::Other),
8986 Ops, MVT::i32, MMO);
8987 Chain = Bits.getValue(1);
8988 } else
8989 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8990
8991 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8992 if (IsStrict)
8993 Chain = FP.getValue(1);
8994
8995 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8996 if (IsStrict)
8997 FP = DAG.getNode(
8998 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8999 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
9000 Flags);
9001 else
9002 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9003 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9004 }
9005 return FP;
9006 }
9007
9008 assert(Src.getValueType() == MVT::i32 &&
9009 "Unhandled INT_TO_FP type in custom expander!");
9010 // Since we only generate this in 64-bit mode, we can take advantage of
9011 // 64-bit registers. In particular, sign extend the input value into the
9012 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
9013 // then lfd it and fcfid it.
9014 MachineFunction &MF = DAG.getMachineFunction();
9015 MachineFrameInfo &MFI = MF.getFrameInfo();
9016 EVT PtrVT = getPointerTy(MF.getDataLayout());
9017
9018 SDValue Ld;
9019 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9020 ReuseLoadInfo RLI;
9021 bool ReusingLoad;
9022 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9023 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9024 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9025
9026 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9028 DAG.getMachineFunction(), FrameIdx));
9029 Chain = Store;
9030
9031 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9032 "Expected an i32 store");
9033
9034 RLI.Ptr = FIdx;
9035 RLI.Chain = Chain;
9036 RLI.MPI =
9038 RLI.Alignment = Align(4);
9039 }
9040
9041 MachineMemOperand *MMO =
9043 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9044 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9045 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9046 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9047 MVT::i32, MMO);
9048 Chain = Ld.getValue(1);
9049 if (ReusingLoad && RLI.ResChain) {
9050 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
9051 }
9052 } else {
9053 assert(Subtarget.isPPC64() &&
9054 "i32->FP without LFIWAX supported only on PPC64");
9055
9056 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9057 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9058
9059 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9060
9061 // STD the extended value into the stack slot.
9062 SDValue Store = DAG.getStore(
9063 Chain, dl, Ext64, FIdx,
9065 Chain = Store;
9066
9067 // Load the value as a double.
9068 Ld = DAG.getLoad(
9069 MVT::f64, dl, Chain, FIdx,
9071 Chain = Ld.getValue(1);
9072 }
9073
9074 // FCFID it and return it.
9075 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9076 if (IsStrict)
9077 Chain = FP.getValue(1);
9078 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9079 if (IsStrict)
9080 FP = DAG.getNode(
9081 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9082 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9083 else
9084 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9085 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9086 }
9087 return FP;
9088}
9089
9090SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9091 SelectionDAG &DAG) const {
9092 SDLoc Dl(Op);
9093 MachineFunction &MF = DAG.getMachineFunction();
9094 EVT PtrVT = getPointerTy(MF.getDataLayout());
9095 SDValue Chain = Op.getOperand(0);
9096
9097 // If requested mode is constant, just use simpler mtfsb/mffscrni
9098 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9099 uint64_t Mode = CVal->getZExtValue();
9100 assert(Mode < 4 && "Unsupported rounding mode!");
9101 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9102 if (Subtarget.isISA3_0())
9103 return SDValue(
9104 DAG.getMachineNode(
9105 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9106 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9107 1);
9108 SDNode *SetHi = DAG.getMachineNode(
9109 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9110 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9111 SDNode *SetLo = DAG.getMachineNode(
9112 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9113 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9114 return SDValue(SetLo, 0);
9115 }
9116
9117 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9118 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9119 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9120 DAG.getConstant(3, Dl, MVT::i32));
9121 SDValue DstFlag = DAG.getNode(
9122 ISD::XOR, Dl, MVT::i32, SrcFlag,
9123 DAG.getNode(ISD::AND, Dl, MVT::i32,
9124 DAG.getNOT(Dl,
9125 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9126 MVT::i32),
9127 One));
9128 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9129 SDValue MFFS;
9130 if (!Subtarget.isISA3_0()) {
9131 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9132 Chain = MFFS.getValue(1);
9133 }
9134 SDValue NewFPSCR;
9135 if (Subtarget.isPPC64()) {
9136 if (Subtarget.isISA3_0()) {
9137 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9138 } else {
9139 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9140 SDNode *InsertRN = DAG.getMachineNode(
9141 PPC::RLDIMI, Dl, MVT::i64,
9142 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9143 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9144 DAG.getTargetConstant(0, Dl, MVT::i32),
9145 DAG.getTargetConstant(62, Dl, MVT::i32)});
9146 NewFPSCR = SDValue(InsertRN, 0);
9147 }
9148 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9149 } else {
9150 // In 32-bit mode, store f64, load and update the lower half.
9151 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9152 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9153 SDValue Addr = Subtarget.isLittleEndian()
9154 ? StackSlot
9155 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9156 DAG.getConstant(4, Dl, PtrVT));
9157 if (Subtarget.isISA3_0()) {
9158 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9159 } else {
9160 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9161 SDValue Tmp =
9162 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9163 Chain = Tmp.getValue(1);
9164 Tmp = SDValue(DAG.getMachineNode(
9165 PPC::RLWIMI, Dl, MVT::i32,
9166 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9167 DAG.getTargetConstant(30, Dl, MVT::i32),
9168 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9169 0);
9170 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9171 }
9172 NewFPSCR =
9173 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9174 Chain = NewFPSCR.getValue(1);
9175 }
9176 if (Subtarget.isISA3_0())
9177 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9178 {NewFPSCR, Chain}),
9179 1);
9180 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9181 SDNode *MTFSF = DAG.getMachineNode(
9182 PPC::MTFSF, Dl, MVT::Other,
9183 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9184 return SDValue(MTFSF, 0);
9185}
9186
9187SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9188 SelectionDAG &DAG) const {
9189 SDLoc dl(Op);
9190 /*
9191 The rounding mode is in bits 30:31 of FPSR, and has the following
9192 settings:
9193 00 Round to nearest
9194 01 Round to 0
9195 10 Round to +inf
9196 11 Round to -inf
9197
9198 GET_ROUNDING, on the other hand, expects the following:
9199 -1 Undefined
9200 0 Round to 0
9201 1 Round to nearest
9202 2 Round to +inf
9203 3 Round to -inf
9204
9205 To perform the conversion, we do:
9206 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9207 */
9208
9209 MachineFunction &MF = DAG.getMachineFunction();
9210 EVT VT = Op.getValueType();
9211 EVT PtrVT = getPointerTy(MF.getDataLayout());
9212
9213 // Save FP Control Word to register
9214 SDValue Chain = Op.getOperand(0);
9215 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9216 Chain = MFFS.getValue(1);
9217
9218 SDValue CWD;
9219 if (isTypeLegal(MVT::i64)) {
9220 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9221 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9222 } else {
9223 // Save FP register to stack slot
9224 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9225 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9226 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9227
9228 // Load FP Control Word from low 32 bits of stack slot.
9230 "Stack slot adjustment is valid only on big endian subtargets!");
9231 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9232 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9233 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9234 Chain = CWD.getValue(1);
9235 }
9236
9237 // Transform as necessary
9238 SDValue CWD1 =
9239 DAG.getNode(ISD::AND, dl, MVT::i32,
9240 CWD, DAG.getConstant(3, dl, MVT::i32));
9241 SDValue CWD2 =
9242 DAG.getNode(ISD::SRL, dl, MVT::i32,
9243 DAG.getNode(ISD::AND, dl, MVT::i32,
9244 DAG.getNode(ISD::XOR, dl, MVT::i32,
9245 CWD, DAG.getConstant(3, dl, MVT::i32)),
9246 DAG.getConstant(3, dl, MVT::i32)),
9247 DAG.getConstant(1, dl, MVT::i32));
9248
9249 SDValue RetVal =
9250 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9251
9252 RetVal =
9254 dl, VT, RetVal);
9255
9256 return DAG.getMergeValues({RetVal, Chain}, dl);
9257}
9258
9259SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9260 EVT VT = Op.getValueType();
9261 uint64_t BitWidth = VT.getSizeInBits();
9262 SDLoc dl(Op);
9263 assert(Op.getNumOperands() == 3 &&
9264 VT == Op.getOperand(1).getValueType() &&
9265 "Unexpected SHL!");
9266
9267 // Expand into a bunch of logical ops. Note that these ops
9268 // depend on the PPC behavior for oversized shift amounts.
9269 SDValue Lo = Op.getOperand(0);
9270 SDValue Hi = Op.getOperand(1);
9271 SDValue Amt = Op.getOperand(2);
9272 EVT AmtVT = Amt.getValueType();
9273
9274 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9275 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9276 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9277 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9278 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9279 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9280 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9281 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9282 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9283 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9284 SDValue OutOps[] = { OutLo, OutHi };
9285 return DAG.getMergeValues(OutOps, dl);
9286}
9287
9288SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9289 EVT VT = Op.getValueType();
9290 SDLoc dl(Op);
9291 uint64_t BitWidth = VT.getSizeInBits();
9292 assert(Op.getNumOperands() == 3 &&
9293 VT == Op.getOperand(1).getValueType() &&
9294 "Unexpected SRL!");
9295
9296 // Expand into a bunch of logical ops. Note that these ops
9297 // depend on the PPC behavior for oversized shift amounts.
9298 SDValue Lo = Op.getOperand(0);
9299 SDValue Hi = Op.getOperand(1);
9300 SDValue Amt = Op.getOperand(2);
9301 EVT AmtVT = Amt.getValueType();
9302
9303 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9304 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9305 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9306 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9307 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9308 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9309 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9310 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9311 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9312 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9313 SDValue OutOps[] = { OutLo, OutHi };
9314 return DAG.getMergeValues(OutOps, dl);
9315}
9316
9317SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9318 SDLoc dl(Op);
9319 EVT VT = Op.getValueType();
9320 uint64_t BitWidth = VT.getSizeInBits();
9321 assert(Op.getNumOperands() == 3 &&
9322 VT == Op.getOperand(1).getValueType() &&
9323 "Unexpected SRA!");
9324
9325 // Expand into a bunch of logical ops, followed by a select_cc.
9326 SDValue Lo = Op.getOperand(0);
9327 SDValue Hi = Op.getOperand(1);
9328 SDValue Amt = Op.getOperand(2);
9329 EVT AmtVT = Amt.getValueType();
9330
9331 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9332 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9333 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9334 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9335 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9336 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9337 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9338 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9339 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9340 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9341 Tmp4, Tmp6, ISD::SETLE);
9342 SDValue OutOps[] = { OutLo, OutHi };
9343 return DAG.getMergeValues(OutOps, dl);
9344}
9345
9346SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9347 SelectionDAG &DAG) const {
9348 SDLoc dl(Op);
9349 EVT VT = Op.getValueType();
9350 unsigned BitWidth = VT.getSizeInBits();
9351
9352 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9353 SDValue X = Op.getOperand(0);
9354 SDValue Y = Op.getOperand(1);
9355 SDValue Z = Op.getOperand(2);
9356 EVT AmtVT = Z.getValueType();
9357
9358 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9359 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9360 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9361 // on PowerPC shift by BW being well defined.
9362 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9363 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9364 SDValue SubZ =
9365 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9366 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9367 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9368 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9369}
9370
9371//===----------------------------------------------------------------------===//
9372// Vector related lowering.
9373//
9374
9375/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9376/// element size of SplatSize. Cast the result to VT.
9377static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9378 SelectionDAG &DAG, const SDLoc &dl) {
9379 static const MVT VTys[] = { // canonical VT to use for each size.
9380 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9381 };
9382
9383 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9384
9385 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9386 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9387 SplatSize = 1;
9388 Val = 0xFF;
9389 }
9390
9391 EVT CanonicalVT = VTys[SplatSize-1];
9392
9393 // Build a canonical splat for this value.
9394 // Explicitly truncate APInt here, as this API is used with a mix of
9395 // signed and unsigned values.
9396 return DAG.getBitcast(
9397 ReqVT,
9398 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9399}
9400
9401/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9402/// specified intrinsic ID.
9404 const SDLoc &dl, EVT DestVT = MVT::Other) {
9405 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9406 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9407 DAG.getConstant(IID, dl, MVT::i32), Op);
9408}
9409
9410/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9411/// specified intrinsic ID.
9413 SelectionDAG &DAG, const SDLoc &dl,
9414 EVT DestVT = MVT::Other) {
9415 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9416 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9417 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9418}
9419
9420/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9421/// specified intrinsic ID.
9422static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9423 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9424 EVT DestVT = MVT::Other) {
9425 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9426 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9427 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9428}
9429
9430/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9431/// amount. The result has the specified value type.
9432static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9433 SelectionDAG &DAG, const SDLoc &dl) {
9434 // Force LHS/RHS to be the right type.
9435 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9436 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9437
9438 int Ops[16];
9439 for (unsigned i = 0; i != 16; ++i)
9440 Ops[i] = i + Amt;
9441 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9442 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9443}
9444
9445/// Do we have an efficient pattern in a .td file for this node?
9446///
9447/// \param V - pointer to the BuildVectorSDNode being matched
9448/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9449///
9450/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9451/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9452/// the opposite is true (expansion is beneficial) are:
9453/// - The node builds a vector out of integers that are not 32 or 64-bits
9454/// - The node builds a vector out of constants
9455/// - The node is a "load-and-splat"
9456/// In all other cases, we will choose to keep the BUILD_VECTOR.
9458 bool HasDirectMove,
9459 bool HasP8Vector) {
9460 EVT VecVT = V->getValueType(0);
9461 bool RightType = VecVT == MVT::v2f64 ||
9462 (HasP8Vector && VecVT == MVT::v4f32) ||
9463 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9464 if (!RightType)
9465 return false;
9466
9467 bool IsSplat = true;
9468 bool IsLoad = false;
9469 SDValue Op0 = V->getOperand(0);
9470
9471 // This function is called in a block that confirms the node is not a constant
9472 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9473 // different constants.
9474 if (V->isConstant())
9475 return false;
9476 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9477 if (V->getOperand(i).isUndef())
9478 return false;
9479 // We want to expand nodes that represent load-and-splat even if the
9480 // loaded value is a floating point truncation or conversion to int.
9481 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9482 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9483 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9484 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9485 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9486 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9487 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9488 IsLoad = true;
9489 // If the operands are different or the input is not a load and has more
9490 // uses than just this BV node, then it isn't a splat.
9491 if (V->getOperand(i) != Op0 ||
9492 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9493 IsSplat = false;
9494 }
9495 return !(IsSplat && IsLoad);
9496}
9497
9498// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9499SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9500
9501 SDLoc dl(Op);
9502 SDValue Op0 = Op->getOperand(0);
9503
9504 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9505 (Op.getValueType() != MVT::f128))
9506 return SDValue();
9507
9508 SDValue Lo = Op0.getOperand(0);
9509 SDValue Hi = Op0.getOperand(1);
9510 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9511 return SDValue();
9512
9513 if (!Subtarget.isLittleEndian())
9514 std::swap(Lo, Hi);
9515
9516 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9517}
9518
9519static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9520 const SDValue *InputLoad = &Op;
9521 while (InputLoad->getOpcode() == ISD::BITCAST)
9522 InputLoad = &InputLoad->getOperand(0);
9523 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9524 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9525 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9526 InputLoad = &InputLoad->getOperand(0);
9527 }
9528 if (InputLoad->getOpcode() != ISD::LOAD)
9529 return nullptr;
9530 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9531 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9532}
9533
9534// Convert the argument APFloat to a single precision APFloat if there is no
9535// loss in information during the conversion to single precision APFloat and the
9536// resulting number is not a denormal number. Return true if successful.
9538 APFloat APFloatToConvert = ArgAPFloat;
9539 bool LosesInfo = true;
9541 &LosesInfo);
9542 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9543 if (Success)
9544 ArgAPFloat = APFloatToConvert;
9545 return Success;
9546}
9547
9548// Bitcast the argument APInt to a double and convert it to a single precision
9549// APFloat, bitcast the APFloat to an APInt and assign it to the original
9550// argument if there is no loss in information during the conversion from
9551// double to single precision APFloat and the resulting number is not a denormal
9552// number. Return true if successful.
9554 double DpValue = ArgAPInt.bitsToDouble();
9555 APFloat APFloatDp(DpValue);
9556 bool Success = convertToNonDenormSingle(APFloatDp);
9557 if (Success)
9558 ArgAPInt = APFloatDp.bitcastToAPInt();
9559 return Success;
9560}
9561
9562// Nondestructive check for convertTonNonDenormSingle.
9564 // Only convert if it loses info, since XXSPLTIDP should
9565 // handle the other case.
9566 APFloat APFloatToConvert = ArgAPFloat;
9567 bool LosesInfo = true;
9569 &LosesInfo);
9570
9571 return (!LosesInfo && !APFloatToConvert.isDenormal());
9572}
9573
9574static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9575 unsigned &Opcode) {
9576 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9577 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9578 return false;
9579
9580 EVT Ty = Op->getValueType(0);
9581 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9582 // as we cannot handle extending loads for these types.
9583 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9584 ISD::isNON_EXTLoad(InputNode))
9585 return true;
9586
9587 EVT MemVT = InputNode->getMemoryVT();
9588 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9589 // memory VT is the same vector element VT type.
9590 // The loads feeding into the v8i16 and v16i8 types will be extending because
9591 // scalar i8/i16 are not legal types.
9592 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9593 (MemVT == Ty.getVectorElementType()))
9594 return true;
9595
9596 if (Ty == MVT::v2i64) {
9597 // Check the extend type, when the input type is i32, and the output vector
9598 // type is v2i64.
9599 if (MemVT == MVT::i32) {
9600 if (ISD::isZEXTLoad(InputNode))
9601 Opcode = PPCISD::ZEXT_LD_SPLAT;
9602 if (ISD::isSEXTLoad(InputNode))
9603 Opcode = PPCISD::SEXT_LD_SPLAT;
9604 }
9605 return true;
9606 }
9607 return false;
9608}
9609
9611 bool IsLittleEndian) {
9612 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9613
9614 BitMask.clearAllBits();
9615 EVT VT = BVN.getValueType(0);
9616 unsigned VTSize = VT.getSizeInBits();
9617 APInt ConstValue(VTSize, 0);
9618
9619 unsigned EltWidth = VT.getScalarSizeInBits();
9620
9621 unsigned BitPos = 0;
9622 for (auto OpVal : BVN.op_values()) {
9623 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9624
9625 if (!CN)
9626 return false;
9627 // The elements in a vector register are ordered in reverse byte order
9628 // between little-endian and big-endian modes.
9629 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9630 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9631 BitPos += EltWidth;
9632 }
9633
9634 for (unsigned J = 0; J < 16; ++J) {
9635 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9636 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9637 return false;
9638 if (ExtractValue == 0xFF)
9639 BitMask.setBit(J);
9640 }
9641 return true;
9642}
9643
9644// If this is a case we can't handle, return null and let the default
9645// expansion code take care of it. If we CAN select this case, and if it
9646// selects to a single instruction, return Op. Otherwise, if we can codegen
9647// this case more efficiently than a constant pool load, lower it to the
9648// sequence of ops that should be used.
9649SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9650 SelectionDAG &DAG) const {
9651 SDLoc dl(Op);
9652 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9653 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9654
9655 if (Subtarget.hasP10Vector()) {
9656 APInt BitMask(32, 0);
9657 // If the value of the vector is all zeros or all ones,
9658 // we do not convert it to MTVSRBMI.
9659 // The xxleqv instruction sets a vector with all ones.
9660 // The xxlxor instruction sets a vector with all zeros.
9661 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9662 BitMask != 0 && BitMask != 0xffff) {
9663 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9664 MachineSDNode *MSDNode =
9665 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9666 SDValue SDV = SDValue(MSDNode, 0);
9667 EVT DVT = BVN->getValueType(0);
9668 EVT SVT = SDV.getValueType();
9669 if (SVT != DVT) {
9670 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9671 }
9672 return SDV;
9673 }
9674 // Recognize build vector patterns to emit VSX vector instructions
9675 // instead of loading value from memory.
9676 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9677 return VecPat;
9678 }
9679 // Check if this is a splat of a constant value.
9680 APInt APSplatBits, APSplatUndef;
9681 unsigned SplatBitSize = 0;
9682 bool HasAnyUndefs;
9683 bool BVNIsConstantSplat =
9684 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9685 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9686
9687 // If it is a splat of a double, check if we can shrink it to a 32 bit
9688 // non-denormal float which when converted back to double gives us the same
9689 // double. This is to exploit the XXSPLTIDP instruction.
9690 // If we lose precision, we use XXSPLTI32DX.
9691 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9692 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9693 // Check the type first to short-circuit so we don't modify APSplatBits if
9694 // this block isn't executed.
9695 if ((Op->getValueType(0) == MVT::v2f64) &&
9696 convertToNonDenormSingle(APSplatBits)) {
9697 SDValue SplatNode = DAG.getNode(
9698 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9699 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9700 return DAG.getBitcast(Op.getValueType(), SplatNode);
9701 } else {
9702 // We may lose precision, so we have to use XXSPLTI32DX.
9703
9704 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9705 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9706 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9707
9708 if (!Hi || !Lo)
9709 // If either load is 0, then we should generate XXLXOR to set to 0.
9710 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9711
9712 if (Hi)
9713 SplatNode = DAG.getNode(
9714 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9715 DAG.getTargetConstant(0, dl, MVT::i32),
9716 DAG.getTargetConstant(Hi, dl, MVT::i32));
9717
9718 if (Lo)
9719 SplatNode =
9720 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9721 DAG.getTargetConstant(1, dl, MVT::i32),
9722 DAG.getTargetConstant(Lo, dl, MVT::i32));
9723
9724 return DAG.getBitcast(Op.getValueType(), SplatNode);
9725 }
9726 }
9727
9728 if (SDValue V =
9729 LowerVecSplatSmallFP(Op, DAG, BVNIsConstantSplat, SplatBitSize))
9730 return V;
9731
9732 bool IsSplat64 = false;
9733 uint64_t SplatBits = 0;
9734 int32_t SextVal = 0;
9735 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9736 SplatBits = APSplatBits.getZExtValue();
9737 if (SplatBitSize <= 32) {
9738 SextVal = SignExtend32(SplatBits, SplatBitSize);
9739 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9740 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9741 bool P9Vector = Subtarget.hasP9Vector();
9742 int32_t Hi = P9Vector ? 127 : 15;
9743 int32_t Lo = P9Vector ? -128 : -16;
9744 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9745 SextVal = static_cast<int32_t>(SplatBits);
9746 }
9747 }
9748
9749 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9750 unsigned NewOpcode = PPCISD::LD_SPLAT;
9751
9752 // Handle load-and-splat patterns as we have instructions that will do this
9753 // in one go.
9754 if (DAG.isSplatValue(Op, true) &&
9755 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9756 const SDValue *InputLoad = &Op.getOperand(0);
9757 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9758
9759 // If the input load is an extending load, it will be an i32 -> i64
9760 // extending load and isValidSplatLoad() will update NewOpcode.
9761 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9762 unsigned ElementSize =
9763 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9764
9765 assert(((ElementSize == 2 * MemorySize)
9766 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9767 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9768 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9769 "Unmatched element size and opcode!\n");
9770
9771 // Checking for a single use of this load, we have to check for vector
9772 // width (128 bits) / ElementSize uses (since each operand of the
9773 // BUILD_VECTOR is a separate use of the value.
9774 unsigned NumUsesOfInputLD = 128 / ElementSize;
9775 for (SDValue BVInOp : Op->ops())
9776 if (BVInOp.isUndef())
9777 NumUsesOfInputLD--;
9778
9779 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9780 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9781 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9782 // 15", but function IsValidSplatLoad() now will only return true when
9783 // the data at index 0 is not nullptr. So we will not get into trouble for
9784 // these cases.
9785 //
9786 // case 1 - lfiwzx/lfiwax
9787 // 1.1: load result is i32 and is sign/zero extend to i64;
9788 // 1.2: build a v2i64 vector type with above loaded value;
9789 // 1.3: the vector has only one value at index 0, others are all undef;
9790 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9791 if (NumUsesOfInputLD == 1 &&
9792 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9793 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9794 Subtarget.hasLFIWAX()))
9795 return SDValue();
9796
9797 // case 2 - lxvr[hb]x
9798 // 2.1: load result is at most i16;
9799 // 2.2: build a vector with above loaded value;
9800 // 2.3: the vector has only one value at index 0, others are all undef;
9801 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9802 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9803 Subtarget.isISA3_1() && ElementSize <= 16)
9804 return SDValue();
9805
9806 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9807 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9808 Subtarget.hasVSX()) {
9809 SDValue Ops[] = {
9810 LD->getChain(), // Chain
9811 LD->getBasePtr(), // Ptr
9812 DAG.getValueType(Op.getValueType()) // VT
9813 };
9814 SDValue LdSplt = DAG.getMemIntrinsicNode(
9815 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9816 LD->getMemoryVT(), LD->getMemOperand());
9817 // Replace all uses of the output chain of the original load with the
9818 // output chain of the new load.
9819 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9820 LdSplt.getValue(1));
9821 return LdSplt;
9822 }
9823 }
9824
9825 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9826 // 32-bits can be lowered to VSX instructions under certain conditions.
9827 // Without VSX, there is no pattern more efficient than expanding the node.
9828 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9829 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9830 Subtarget.hasP8Vector()))
9831 return Op;
9832 return SDValue();
9833 }
9834
9835 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9836 unsigned SplatSize = SplatBitSize / 8;
9837
9838 // First, handle single instruction cases.
9839
9840 // All zeros?
9841 if (SplatBits == 0) {
9842 // Canonicalize all zero vectors to be v4i32.
9843 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9844 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9845 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9846 }
9847 return Op;
9848 }
9849
9850 // We have XXSPLTIW for constant splats four bytes wide.
9851 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9852 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9853 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9854 // turned into a 4-byte splat of 0xABABABAB.
9855 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9856 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9857 Op.getValueType(), DAG, dl);
9858
9859 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9860 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9861 dl);
9862
9863 // We have XXSPLTIB for constant splats one byte wide.
9864 if (Subtarget.hasP9Vector() && SplatSize == 1)
9865 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9866 dl);
9867
9868 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9869 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9870 if (SextVal >= -16 && SextVal <= 15) {
9871 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9872 // generate a splat word with extend for size 8.
9873 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9874 SDValue Res =
9875 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9876 if (SplatSize != 8)
9877 return Res;
9878 SDValue IntrinsicOp =
9879 BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw,
9880 DAG.getBitcast(MVT::v4i32, Res), DAG, dl, MVT::v2i64);
9881 return DAG.getBitcast(Op.getValueType(), IntrinsicOp);
9882 }
9883
9884 // Two instruction sequences.
9885
9886 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9887 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9889 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9890 unsigned IID;
9891 EVT VT;
9892 switch (SplatSize) {
9893 default:
9894 llvm_unreachable("Unexpected type for vector constant.");
9895 case 2:
9896 IID = Intrinsic::ppc_altivec_vupklsb;
9897 VT = MVT::v8i16;
9898 break;
9899 case 4:
9900 IID = Intrinsic::ppc_altivec_vextsb2w;
9901 VT = MVT::v4i32;
9902 break;
9903 case 8:
9904 IID = Intrinsic::ppc_altivec_vextsb2d;
9905 VT = MVT::v2i64;
9906 break;
9907 }
9908 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9909 return DAG.getBitcast(Op->getValueType(0), Extend);
9910 }
9911 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9912
9913 // If this value is in the range [-32,30] and is even, use:
9914 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9915 // If this value is in the range [17,31] and is odd, use:
9916 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9917 // If this value is in the range [-31,-17] and is odd, use:
9918 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9919 // Note the last two are three-instruction sequences.
9920 if (SextVal >= -32 && SextVal <= 31) {
9921 // To avoid having these optimizations undone by constant folding,
9922 // we convert to a pseudo that will be expanded later into one of
9923 // the above forms.
9924 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9925 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9926 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9927 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9928 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9929 if (VT == Op.getValueType())
9930 return RetVal;
9931 else
9932 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9933 }
9934
9935 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9936 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9937 // for fneg/fabs.
9938 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9939 // Make -1 and vspltisw -1:
9940 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9941
9942 // Make the VSLW intrinsic, computing 0x8000_0000.
9943 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9944 OnesV, DAG, dl);
9945
9946 // xor by OnesV to invert it.
9947 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9948 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9949 }
9950
9951 // Check to see if this is a wide variety of vsplti*, binop self cases.
9952 static const signed char SplatCsts[] = {
9953 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9954 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9955 };
9956
9957 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9958 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9959 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9960 int i = SplatCsts[idx];
9961
9962 // Figure out what shift amount will be used by altivec if shifted by i in
9963 // this splat size.
9964 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9965
9966 // vsplti + shl self.
9967 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9968 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9969 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9970 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9971 Intrinsic::ppc_altivec_vslw
9972 };
9973 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9974 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9975 }
9976
9977 // vsplti + srl self.
9978 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9979 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9980 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9981 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9982 Intrinsic::ppc_altivec_vsrw
9983 };
9984 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9985 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9986 }
9987
9988 // vsplti + rol self.
9989 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9990 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9991 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9992 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9993 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9994 Intrinsic::ppc_altivec_vrlw
9995 };
9996 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9997 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9998 }
9999
10000 // t = vsplti c, result = vsldoi t, t, 1
10001 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
10002 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10003 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
10004 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10005 }
10006 // t = vsplti c, result = vsldoi t, t, 2
10007 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
10008 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10009 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
10010 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10011 }
10012 // t = vsplti c, result = vsldoi t, t, 3
10013 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
10014 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10015 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
10016 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10017 }
10018 }
10019
10020 return SDValue();
10021}
10022
10023/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
10024/// the specified operations to build the shuffle.
10026 SDValue RHS, SelectionDAG &DAG,
10027 const SDLoc &dl) {
10028 unsigned OpNum = (PFEntry >> 26) & 0x0F;
10029 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
10030 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
10031
10032 enum {
10033 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
10034 OP_VMRGHW,
10035 OP_VMRGLW,
10036 OP_VSPLTISW0,
10037 OP_VSPLTISW1,
10038 OP_VSPLTISW2,
10039 OP_VSPLTISW3,
10040 OP_VSLDOI4,
10041 OP_VSLDOI8,
10042 OP_VSLDOI12
10043 };
10044
10045 if (OpNum == OP_COPY) {
10046 if (LHSID == (1*9+2)*9+3) return LHS;
10047 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
10048 return RHS;
10049 }
10050
10051 SDValue OpLHS, OpRHS;
10052 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
10053 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
10054
10055 int ShufIdxs[16];
10056 switch (OpNum) {
10057 default: llvm_unreachable("Unknown i32 permute!");
10058 case OP_VMRGHW:
10059 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
10060 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
10061 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
10062 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
10063 break;
10064 case OP_VMRGLW:
10065 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
10066 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
10067 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
10068 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
10069 break;
10070 case OP_VSPLTISW0:
10071 for (unsigned i = 0; i != 16; ++i)
10072 ShufIdxs[i] = (i&3)+0;
10073 break;
10074 case OP_VSPLTISW1:
10075 for (unsigned i = 0; i != 16; ++i)
10076 ShufIdxs[i] = (i&3)+4;
10077 break;
10078 case OP_VSPLTISW2:
10079 for (unsigned i = 0; i != 16; ++i)
10080 ShufIdxs[i] = (i&3)+8;
10081 break;
10082 case OP_VSPLTISW3:
10083 for (unsigned i = 0; i != 16; ++i)
10084 ShufIdxs[i] = (i&3)+12;
10085 break;
10086 case OP_VSLDOI4:
10087 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
10088 case OP_VSLDOI8:
10089 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
10090 case OP_VSLDOI12:
10091 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
10092 }
10093 EVT VT = OpLHS.getValueType();
10094 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
10095 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
10096 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
10097 return DAG.getNode(ISD::BITCAST, dl, VT, T);
10098}
10099
10100/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10101/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10102/// SDValue.
10103SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10104 SelectionDAG &DAG) const {
10105 const unsigned BytesInVector = 16;
10106 bool IsLE = Subtarget.isLittleEndian();
10107 SDLoc dl(N);
10108 SDValue V1 = N->getOperand(0);
10109 SDValue V2 = N->getOperand(1);
10110 unsigned ShiftElts = 0, InsertAtByte = 0;
10111 bool Swap = false;
10112
10113 // Shifts required to get the byte we want at element 7.
10114 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10115 0, 15, 14, 13, 12, 11, 10, 9};
10116 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10117 1, 2, 3, 4, 5, 6, 7, 8};
10118
10119 ArrayRef<int> Mask = N->getMask();
10120 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10121
10122 // For each mask element, find out if we're just inserting something
10123 // from V2 into V1 or vice versa.
10124 // Possible permutations inserting an element from V2 into V1:
10125 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10126 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10127 // ...
10128 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10129 // Inserting from V1 into V2 will be similar, except mask range will be
10130 // [16,31].
10131
10132 bool FoundCandidate = false;
10133 // If both vector operands for the shuffle are the same vector, the mask
10134 // will contain only elements from the first one and the second one will be
10135 // undef.
10136 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10137 // Go through the mask of half-words to find an element that's being moved
10138 // from one vector to the other.
10139 for (unsigned i = 0; i < BytesInVector; ++i) {
10140 unsigned CurrentElement = Mask[i];
10141 // If 2nd operand is undefined, we should only look for element 7 in the
10142 // Mask.
10143 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10144 continue;
10145
10146 bool OtherElementsInOrder = true;
10147 // Examine the other elements in the Mask to see if they're in original
10148 // order.
10149 for (unsigned j = 0; j < BytesInVector; ++j) {
10150 if (j == i)
10151 continue;
10152 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10153 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10154 // in which we always assume we're always picking from the 1st operand.
10155 int MaskOffset =
10156 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10157 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10158 OtherElementsInOrder = false;
10159 break;
10160 }
10161 }
10162 // If other elements are in original order, we record the number of shifts
10163 // we need to get the element we want into element 7. Also record which byte
10164 // in the vector we should insert into.
10165 if (OtherElementsInOrder) {
10166 // If 2nd operand is undefined, we assume no shifts and no swapping.
10167 if (V2.isUndef()) {
10168 ShiftElts = 0;
10169 Swap = false;
10170 } else {
10171 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10172 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10173 : BigEndianShifts[CurrentElement & 0xF];
10174 Swap = CurrentElement < BytesInVector;
10175 }
10176 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10177 FoundCandidate = true;
10178 break;
10179 }
10180 }
10181
10182 if (!FoundCandidate)
10183 return SDValue();
10184
10185 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10186 // optionally with VECSHL if shift is required.
10187 if (Swap)
10188 std::swap(V1, V2);
10189 if (V2.isUndef())
10190 V2 = V1;
10191 if (ShiftElts) {
10192 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10193 DAG.getConstant(ShiftElts, dl, MVT::i32));
10194 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10195 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10196 }
10197 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10198 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10199}
10200
10201/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10202/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10203/// SDValue.
10204SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10205 SelectionDAG &DAG) const {
10206 const unsigned NumHalfWords = 8;
10207 const unsigned BytesInVector = NumHalfWords * 2;
10208 // Check that the shuffle is on half-words.
10209 if (!isNByteElemShuffleMask(N, 2, 1))
10210 return SDValue();
10211
10212 bool IsLE = Subtarget.isLittleEndian();
10213 SDLoc dl(N);
10214 SDValue V1 = N->getOperand(0);
10215 SDValue V2 = N->getOperand(1);
10216 unsigned ShiftElts = 0, InsertAtByte = 0;
10217 bool Swap = false;
10218
10219 // Shifts required to get the half-word we want at element 3.
10220 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10221 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10222
10223 uint32_t Mask = 0;
10224 uint32_t OriginalOrderLow = 0x1234567;
10225 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10226 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10227 // 32-bit space, only need 4-bit nibbles per element.
10228 for (unsigned i = 0; i < NumHalfWords; ++i) {
10229 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10230 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10231 }
10232
10233 // For each mask element, find out if we're just inserting something
10234 // from V2 into V1 or vice versa. Possible permutations inserting an element
10235 // from V2 into V1:
10236 // X, 1, 2, 3, 4, 5, 6, 7
10237 // 0, X, 2, 3, 4, 5, 6, 7
10238 // 0, 1, X, 3, 4, 5, 6, 7
10239 // 0, 1, 2, X, 4, 5, 6, 7
10240 // 0, 1, 2, 3, X, 5, 6, 7
10241 // 0, 1, 2, 3, 4, X, 6, 7
10242 // 0, 1, 2, 3, 4, 5, X, 7
10243 // 0, 1, 2, 3, 4, 5, 6, X
10244 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10245
10246 bool FoundCandidate = false;
10247 // Go through the mask of half-words to find an element that's being moved
10248 // from one vector to the other.
10249 for (unsigned i = 0; i < NumHalfWords; ++i) {
10250 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10251 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10252 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10253 uint32_t TargetOrder = 0x0;
10254
10255 // If both vector operands for the shuffle are the same vector, the mask
10256 // will contain only elements from the first one and the second one will be
10257 // undef.
10258 if (V2.isUndef()) {
10259 ShiftElts = 0;
10260 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10261 TargetOrder = OriginalOrderLow;
10262 Swap = false;
10263 // Skip if not the correct element or mask of other elements don't equal
10264 // to our expected order.
10265 if (MaskOneElt == VINSERTHSrcElem &&
10266 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10267 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10268 FoundCandidate = true;
10269 break;
10270 }
10271 } else { // If both operands are defined.
10272 // Target order is [8,15] if the current mask is between [0,7].
10273 TargetOrder =
10274 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10275 // Skip if mask of other elements don't equal our expected order.
10276 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10277 // We only need the last 3 bits for the number of shifts.
10278 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10279 : BigEndianShifts[MaskOneElt & 0x7];
10280 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10281 Swap = MaskOneElt < NumHalfWords;
10282 FoundCandidate = true;
10283 break;
10284 }
10285 }
10286 }
10287
10288 if (!FoundCandidate)
10289 return SDValue();
10290
10291 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10292 // optionally with VECSHL if shift is required.
10293 if (Swap)
10294 std::swap(V1, V2);
10295 if (V2.isUndef())
10296 V2 = V1;
10297 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10298 if (ShiftElts) {
10299 // Double ShiftElts because we're left shifting on v16i8 type.
10300 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10301 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10302 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10303 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10304 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10305 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10306 }
10307 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10308 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10309 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10310 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10311}
10312
10313/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10314/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10315/// return the default SDValue.
10316SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10317 SelectionDAG &DAG) const {
10318 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10319 // to v16i8. Peek through the bitcasts to get the actual operands.
10322
10323 auto ShuffleMask = SVN->getMask();
10324 SDValue VecShuffle(SVN, 0);
10325 SDLoc DL(SVN);
10326
10327 // Check that we have a four byte shuffle.
10328 if (!isNByteElemShuffleMask(SVN, 4, 1))
10329 return SDValue();
10330
10331 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10332 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10333 std::swap(LHS, RHS);
10335 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10336 if (!CommutedSV)
10337 return SDValue();
10338 ShuffleMask = CommutedSV->getMask();
10339 }
10340
10341 // Ensure that the RHS is a vector of constants.
10342 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10343 if (!BVN)
10344 return SDValue();
10345
10346 // Check if RHS is a splat of 4-bytes (or smaller).
10347 APInt APSplatValue, APSplatUndef;
10348 unsigned SplatBitSize;
10349 bool HasAnyUndefs;
10350 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10351 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10352 SplatBitSize > 32)
10353 return SDValue();
10354
10355 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10356 // The instruction splats a constant C into two words of the source vector
10357 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10358 // Thus we check that the shuffle mask is the equivalent of
10359 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10360 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10361 // within each word are consecutive, so we only need to check the first byte.
10362 SDValue Index;
10363 bool IsLE = Subtarget.isLittleEndian();
10364 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10365 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10366 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10367 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10368 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10369 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10370 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10371 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10372 else
10373 return SDValue();
10374
10375 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10376 // for XXSPLTI32DX.
10377 unsigned SplatVal = APSplatValue.getZExtValue();
10378 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10379 SplatVal |= (SplatVal << SplatBitSize);
10380
10381 SDValue SplatNode = DAG.getNode(
10382 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10383 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10384 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10385}
10386
10387/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10388/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10389/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10390/// i.e (or (shl x, C1), (srl x, 128-C1)).
10391SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10392 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10393 assert(Op.getValueType() == MVT::v1i128 &&
10394 "Only set v1i128 as custom, other type shouldn't reach here!");
10395 SDLoc dl(Op);
10396 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10397 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10398 unsigned SHLAmt = N1.getConstantOperandVal(0);
10399 if (SHLAmt % 8 == 0) {
10400 std::array<int, 16> Mask;
10401 std::iota(Mask.begin(), Mask.end(), 0);
10402 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10403 if (SDValue Shuffle =
10404 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10405 DAG.getUNDEF(MVT::v16i8), Mask))
10406 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10407 }
10408 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10409 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10410 DAG.getConstant(SHLAmt, dl, MVT::i32));
10411 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10412 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10413 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10414 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10415}
10416
10417/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10418/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10419/// return the code it can be lowered into. Worst case, it can always be
10420/// lowered into a vperm.
10421SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10422 SelectionDAG &DAG) const {
10423 SDLoc dl(Op);
10424 SDValue V1 = Op.getOperand(0);
10425 SDValue V2 = Op.getOperand(1);
10426 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10427
10428 // Any nodes that were combined in the target-independent combiner prior
10429 // to vector legalization will not be sent to the target combine. Try to
10430 // combine it here.
10431 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10432 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10433 return NewShuffle;
10434 Op = NewShuffle;
10436 V1 = Op.getOperand(0);
10437 V2 = Op.getOperand(1);
10438 }
10439 EVT VT = Op.getValueType();
10440 bool isLittleEndian = Subtarget.isLittleEndian();
10441
10442 unsigned ShiftElts, InsertAtByte;
10443 bool Swap = false;
10444
10445 // If this is a load-and-splat, we can do that with a single instruction
10446 // in some cases. However if the load has multiple uses, we don't want to
10447 // combine it because that will just produce multiple loads.
10448 bool IsPermutedLoad = false;
10449 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10450 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10451 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10452 InputLoad->hasOneUse()) {
10453 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10454 int SplatIdx =
10455 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10456
10457 // The splat index for permuted loads will be in the left half of the vector
10458 // which is strictly wider than the loaded value by 8 bytes. So we need to
10459 // adjust the splat index to point to the correct address in memory.
10460 if (IsPermutedLoad) {
10461 assert((isLittleEndian || IsFourByte) &&
10462 "Unexpected size for permuted load on big endian target");
10463 SplatIdx += IsFourByte ? 2 : 1;
10464 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10465 "Splat of a value outside of the loaded memory");
10466 }
10467
10468 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10469 // For 4-byte load-and-splat, we need Power9.
10470 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10471 uint64_t Offset = 0;
10472 if (IsFourByte)
10473 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10474 else
10475 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10476
10477 // If the width of the load is the same as the width of the splat,
10478 // loading with an offset would load the wrong memory.
10479 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10480 Offset = 0;
10481
10482 SDValue BasePtr = LD->getBasePtr();
10483 if (Offset != 0)
10485 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10486 SDValue Ops[] = {
10487 LD->getChain(), // Chain
10488 BasePtr, // BasePtr
10489 DAG.getValueType(Op.getValueType()) // VT
10490 };
10491 SDVTList VTL =
10492 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10493 SDValue LdSplt =
10494 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10495 Ops, LD->getMemoryVT(), LD->getMemOperand());
10496 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10497 if (LdSplt.getValueType() != SVOp->getValueType(0))
10498 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10499 return LdSplt;
10500 }
10501 }
10502
10503 // All v2i64 and v2f64 shuffles are legal
10504 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10505 return Op;
10506
10507 if (Subtarget.hasP9Vector() &&
10508 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10509 isLittleEndian)) {
10510 if (V2.isUndef())
10511 V2 = V1;
10512 else if (Swap)
10513 std::swap(V1, V2);
10514 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10515 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10516 if (ShiftElts) {
10517 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10518 DAG.getConstant(ShiftElts, dl, MVT::i32));
10519 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10520 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10521 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10522 }
10523 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10524 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10525 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10526 }
10527
10528 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10529 SDValue SplatInsertNode;
10530 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10531 return SplatInsertNode;
10532 }
10533
10534 if (Subtarget.hasP9Altivec()) {
10535 SDValue NewISDNode;
10536 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10537 return NewISDNode;
10538
10539 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10540 return NewISDNode;
10541 }
10542
10543 if (Subtarget.hasVSX() &&
10544 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10545 if (Swap)
10546 std::swap(V1, V2);
10547 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10548 SDValue Conv2 =
10549 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10550
10551 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10552 DAG.getConstant(ShiftElts, dl, MVT::i32));
10553 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10554 }
10555
10556 if (Subtarget.hasVSX() &&
10557 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10558 if (Swap)
10559 std::swap(V1, V2);
10560 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10561 SDValue Conv2 =
10562 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10563
10564 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10565 DAG.getConstant(ShiftElts, dl, MVT::i32));
10566 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10567 }
10568
10569 if (Subtarget.hasP9Vector()) {
10570 if (PPC::isXXBRHShuffleMask(SVOp)) {
10571 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10572 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10573 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10574 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10575 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10576 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10577 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10578 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10579 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10580 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10581 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10582 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10583 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10584 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10585 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10586 }
10587 }
10588
10589 if (Subtarget.hasVSX()) {
10590 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10591 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10592
10593 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10594 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10595 DAG.getConstant(SplatIdx, dl, MVT::i32));
10596 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10597 }
10598
10599 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10600 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10601 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10602 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10603 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10604 }
10605 }
10606
10607 // Cases that are handled by instructions that take permute immediates
10608 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10609 // selected by the instruction selector.
10610 if (V2.isUndef()) {
10611 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10612 PPC::isSplatShuffleMask(SVOp, 2) ||
10613 PPC::isSplatShuffleMask(SVOp, 4) ||
10614 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10615 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10616 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10617 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10618 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10619 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10620 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10621 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10622 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10623 (Subtarget.hasP8Altivec() && (
10624 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10625 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10626 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10627 return Op;
10628 }
10629 }
10630
10631 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10632 // and produce a fixed permutation. If any of these match, do not lower to
10633 // VPERM.
10634 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10635 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10636 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10637 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10638 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10639 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10640 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10641 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10642 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10643 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10644 (Subtarget.hasP8Altivec() && (
10645 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10646 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10647 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10648 return Op;
10649
10650 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10651 // perfect shuffle table to emit an optimal matching sequence.
10652 ArrayRef<int> PermMask = SVOp->getMask();
10653
10654 if (!DisablePerfectShuffle && !isLittleEndian) {
10655 unsigned PFIndexes[4];
10656 bool isFourElementShuffle = true;
10657 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10658 ++i) { // Element number
10659 unsigned EltNo = 8; // Start out undef.
10660 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10661 if (PermMask[i * 4 + j] < 0)
10662 continue; // Undef, ignore it.
10663
10664 unsigned ByteSource = PermMask[i * 4 + j];
10665 if ((ByteSource & 3) != j) {
10666 isFourElementShuffle = false;
10667 break;
10668 }
10669
10670 if (EltNo == 8) {
10671 EltNo = ByteSource / 4;
10672 } else if (EltNo != ByteSource / 4) {
10673 isFourElementShuffle = false;
10674 break;
10675 }
10676 }
10677 PFIndexes[i] = EltNo;
10678 }
10679
10680 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10681 // perfect shuffle vector to determine if it is cost effective to do this as
10682 // discrete instructions, or whether we should use a vperm.
10683 // For now, we skip this for little endian until such time as we have a
10684 // little-endian perfect shuffle table.
10685 if (isFourElementShuffle) {
10686 // Compute the index in the perfect shuffle table.
10687 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10688 PFIndexes[2] * 9 + PFIndexes[3];
10689
10690 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10691 unsigned Cost = (PFEntry >> 30);
10692
10693 // Determining when to avoid vperm is tricky. Many things affect the cost
10694 // of vperm, particularly how many times the perm mask needs to be
10695 // computed. For example, if the perm mask can be hoisted out of a loop or
10696 // is already used (perhaps because there are multiple permutes with the
10697 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10698 // permute mask out of the loop requires an extra register.
10699 //
10700 // As a compromise, we only emit discrete instructions if the shuffle can
10701 // be generated in 3 or fewer operations. When we have loop information
10702 // available, if this block is within a loop, we should avoid using vperm
10703 // for 3-operation perms and use a constant pool load instead.
10704 if (Cost < 3)
10705 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10706 }
10707 }
10708
10709 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10710 // vector that will get spilled to the constant pool.
10711 if (V2.isUndef()) V2 = V1;
10712
10713 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10714}
10715
10716SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10717 ArrayRef<int> PermMask, EVT VT,
10718 SDValue V1, SDValue V2) const {
10719 unsigned Opcode = PPCISD::VPERM;
10720 EVT ValType = V1.getValueType();
10721 SDLoc dl(Op);
10722 bool NeedSwap = false;
10723 bool isLittleEndian = Subtarget.isLittleEndian();
10724 bool isPPC64 = Subtarget.isPPC64();
10725
10726 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10727 (V1->hasOneUse() || V2->hasOneUse())) {
10728 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10729 "XXPERM instead\n");
10730 Opcode = PPCISD::XXPERM;
10731
10732 // The second input to XXPERM is also an output so if the second input has
10733 // multiple uses then copying is necessary, as a result we want the
10734 // single-use operand to be used as the second input to prevent copying.
10735 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10736 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10737 std::swap(V1, V2);
10738 NeedSwap = !NeedSwap;
10739 }
10740 }
10741
10742 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10743 // that it is in input element units, not in bytes. Convert now.
10744
10745 // For little endian, the order of the input vectors is reversed, and
10746 // the permutation mask is complemented with respect to 31. This is
10747 // necessary to produce proper semantics with the big-endian-based vperm
10748 // instruction.
10749 EVT EltVT = V1.getValueType().getVectorElementType();
10750 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10751
10752 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10753 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10754
10755 /*
10756 Vectors will be appended like so: [ V1 | v2 ]
10757 XXSWAPD on V1:
10758 [ A | B | C | D ] -> [ C | D | A | B ]
10759 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10760 i.e. index of A, B += 8, and index of C, D -= 8.
10761 XXSWAPD on V2:
10762 [ E | F | G | H ] -> [ G | H | E | F ]
10763 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10764 i.e. index of E, F += 8, index of G, H -= 8
10765 Swap V1 and V2:
10766 [ V1 | V2 ] -> [ V2 | V1 ]
10767 0-15 16-31 0-15 16-31
10768 i.e. index of V1 += 16, index of V2 -= 16
10769 */
10770
10771 SmallVector<SDValue, 16> ResultMask;
10772 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10773 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10774
10775 if (V1HasXXSWAPD) {
10776 if (SrcElt < 8)
10777 SrcElt += 8;
10778 else if (SrcElt < 16)
10779 SrcElt -= 8;
10780 }
10781 if (V2HasXXSWAPD) {
10782 if (SrcElt > 23)
10783 SrcElt -= 8;
10784 else if (SrcElt > 15)
10785 SrcElt += 8;
10786 }
10787 if (NeedSwap) {
10788 if (SrcElt < 16)
10789 SrcElt += 16;
10790 else
10791 SrcElt -= 16;
10792 }
10793 for (unsigned j = 0; j != BytesPerElement; ++j)
10794 if (isLittleEndian)
10795 ResultMask.push_back(
10796 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10797 else
10798 ResultMask.push_back(
10799 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10800 }
10801
10802 if (V1HasXXSWAPD) {
10803 dl = SDLoc(V1->getOperand(0));
10804 V1 = V1->getOperand(0)->getOperand(1);
10805 }
10806 if (V2HasXXSWAPD) {
10807 dl = SDLoc(V2->getOperand(0));
10808 V2 = V2->getOperand(0)->getOperand(1);
10809 }
10810
10811 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10812 if (ValType != MVT::v2f64)
10813 V1 = DAG.getBitcast(MVT::v2f64, V1);
10814 if (V2.getValueType() != MVT::v2f64)
10815 V2 = DAG.getBitcast(MVT::v2f64, V2);
10816 }
10817
10818 ShufflesHandledWithVPERM++;
10819 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10820 LLVM_DEBUG({
10821 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10822 if (Opcode == PPCISD::XXPERM) {
10823 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10824 } else {
10825 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10826 }
10827 SVOp->dump();
10828 dbgs() << "With the following permute control vector:\n";
10829 VPermMask.dump();
10830 });
10831
10832 if (Opcode == PPCISD::XXPERM)
10833 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10834
10835 // Only need to place items backwards in LE,
10836 // the mask was properly calculated.
10837 if (isLittleEndian)
10838 std::swap(V1, V2);
10839
10840 SDValue VPERMNode =
10841 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10842
10843 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10844 return VPERMNode;
10845}
10846
10847/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10848/// vector comparison. If it is, return true and fill in Opc/isDot with
10849/// information about the intrinsic.
10850static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10851 bool &isDot, const PPCSubtarget &Subtarget) {
10852 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10853 CompareOpc = -1;
10854 isDot = false;
10855 switch (IntrinsicID) {
10856 default:
10857 return false;
10858 // Comparison predicates.
10859 case Intrinsic::ppc_altivec_vcmpbfp_p:
10860 CompareOpc = 966;
10861 isDot = true;
10862 break;
10863 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10864 CompareOpc = 198;
10865 isDot = true;
10866 break;
10867 case Intrinsic::ppc_altivec_vcmpequb_p:
10868 CompareOpc = 6;
10869 isDot = true;
10870 break;
10871 case Intrinsic::ppc_altivec_vcmpequh_p:
10872 CompareOpc = 70;
10873 isDot = true;
10874 break;
10875 case Intrinsic::ppc_altivec_vcmpequw_p:
10876 CompareOpc = 134;
10877 isDot = true;
10878 break;
10879 case Intrinsic::ppc_altivec_vcmpequd_p:
10880 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10881 CompareOpc = 199;
10882 isDot = true;
10883 } else
10884 return false;
10885 break;
10886 case Intrinsic::ppc_altivec_vcmpneb_p:
10887 case Intrinsic::ppc_altivec_vcmpneh_p:
10888 case Intrinsic::ppc_altivec_vcmpnew_p:
10889 case Intrinsic::ppc_altivec_vcmpnezb_p:
10890 case Intrinsic::ppc_altivec_vcmpnezh_p:
10891 case Intrinsic::ppc_altivec_vcmpnezw_p:
10892 if (Subtarget.hasP9Altivec()) {
10893 switch (IntrinsicID) {
10894 default:
10895 llvm_unreachable("Unknown comparison intrinsic.");
10896 case Intrinsic::ppc_altivec_vcmpneb_p:
10897 CompareOpc = 7;
10898 break;
10899 case Intrinsic::ppc_altivec_vcmpneh_p:
10900 CompareOpc = 71;
10901 break;
10902 case Intrinsic::ppc_altivec_vcmpnew_p:
10903 CompareOpc = 135;
10904 break;
10905 case Intrinsic::ppc_altivec_vcmpnezb_p:
10906 CompareOpc = 263;
10907 break;
10908 case Intrinsic::ppc_altivec_vcmpnezh_p:
10909 CompareOpc = 327;
10910 break;
10911 case Intrinsic::ppc_altivec_vcmpnezw_p:
10912 CompareOpc = 391;
10913 break;
10914 }
10915 isDot = true;
10916 } else
10917 return false;
10918 break;
10919 case Intrinsic::ppc_altivec_vcmpgefp_p:
10920 CompareOpc = 454;
10921 isDot = true;
10922 break;
10923 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10924 CompareOpc = 710;
10925 isDot = true;
10926 break;
10927 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10928 CompareOpc = 774;
10929 isDot = true;
10930 break;
10931 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10932 CompareOpc = 838;
10933 isDot = true;
10934 break;
10935 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10936 CompareOpc = 902;
10937 isDot = true;
10938 break;
10939 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10940 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10941 CompareOpc = 967;
10942 isDot = true;
10943 } else
10944 return false;
10945 break;
10946 case Intrinsic::ppc_altivec_vcmpgtub_p:
10947 CompareOpc = 518;
10948 isDot = true;
10949 break;
10950 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10951 CompareOpc = 582;
10952 isDot = true;
10953 break;
10954 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10955 CompareOpc = 646;
10956 isDot = true;
10957 break;
10958 case Intrinsic::ppc_altivec_vcmpgtud_p:
10959 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10960 CompareOpc = 711;
10961 isDot = true;
10962 } else
10963 return false;
10964 break;
10965
10966 case Intrinsic::ppc_altivec_vcmpequq:
10967 case Intrinsic::ppc_altivec_vcmpgtsq:
10968 case Intrinsic::ppc_altivec_vcmpgtuq:
10969 if (!Subtarget.isISA3_1())
10970 return false;
10971 switch (IntrinsicID) {
10972 default:
10973 llvm_unreachable("Unknown comparison intrinsic.");
10974 case Intrinsic::ppc_altivec_vcmpequq:
10975 CompareOpc = 455;
10976 break;
10977 case Intrinsic::ppc_altivec_vcmpgtsq:
10978 CompareOpc = 903;
10979 break;
10980 case Intrinsic::ppc_altivec_vcmpgtuq:
10981 CompareOpc = 647;
10982 break;
10983 }
10984 break;
10985
10986 // VSX predicate comparisons use the same infrastructure
10987 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10988 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10989 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10990 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10991 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10992 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10993 if (Subtarget.hasVSX()) {
10994 switch (IntrinsicID) {
10995 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10996 CompareOpc = 99;
10997 break;
10998 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10999 CompareOpc = 115;
11000 break;
11001 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11002 CompareOpc = 107;
11003 break;
11004 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11005 CompareOpc = 67;
11006 break;
11007 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11008 CompareOpc = 83;
11009 break;
11010 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11011 CompareOpc = 75;
11012 break;
11013 }
11014 isDot = true;
11015 } else
11016 return false;
11017 break;
11018
11019 // Normal Comparisons.
11020 case Intrinsic::ppc_altivec_vcmpbfp:
11021 CompareOpc = 966;
11022 break;
11023 case Intrinsic::ppc_altivec_vcmpeqfp:
11024 CompareOpc = 198;
11025 break;
11026 case Intrinsic::ppc_altivec_vcmpequb:
11027 CompareOpc = 6;
11028 break;
11029 case Intrinsic::ppc_altivec_vcmpequh:
11030 CompareOpc = 70;
11031 break;
11032 case Intrinsic::ppc_altivec_vcmpequw:
11033 CompareOpc = 134;
11034 break;
11035 case Intrinsic::ppc_altivec_vcmpequd:
11036 if (Subtarget.hasP8Altivec())
11037 CompareOpc = 199;
11038 else
11039 return false;
11040 break;
11041 case Intrinsic::ppc_altivec_vcmpneb:
11042 case Intrinsic::ppc_altivec_vcmpneh:
11043 case Intrinsic::ppc_altivec_vcmpnew:
11044 case Intrinsic::ppc_altivec_vcmpnezb:
11045 case Intrinsic::ppc_altivec_vcmpnezh:
11046 case Intrinsic::ppc_altivec_vcmpnezw:
11047 if (Subtarget.hasP9Altivec())
11048 switch (IntrinsicID) {
11049 default:
11050 llvm_unreachable("Unknown comparison intrinsic.");
11051 case Intrinsic::ppc_altivec_vcmpneb:
11052 CompareOpc = 7;
11053 break;
11054 case Intrinsic::ppc_altivec_vcmpneh:
11055 CompareOpc = 71;
11056 break;
11057 case Intrinsic::ppc_altivec_vcmpnew:
11058 CompareOpc = 135;
11059 break;
11060 case Intrinsic::ppc_altivec_vcmpnezb:
11061 CompareOpc = 263;
11062 break;
11063 case Intrinsic::ppc_altivec_vcmpnezh:
11064 CompareOpc = 327;
11065 break;
11066 case Intrinsic::ppc_altivec_vcmpnezw:
11067 CompareOpc = 391;
11068 break;
11069 }
11070 else
11071 return false;
11072 break;
11073 case Intrinsic::ppc_altivec_vcmpgefp:
11074 CompareOpc = 454;
11075 break;
11076 case Intrinsic::ppc_altivec_vcmpgtfp:
11077 CompareOpc = 710;
11078 break;
11079 case Intrinsic::ppc_altivec_vcmpgtsb:
11080 CompareOpc = 774;
11081 break;
11082 case Intrinsic::ppc_altivec_vcmpgtsh:
11083 CompareOpc = 838;
11084 break;
11085 case Intrinsic::ppc_altivec_vcmpgtsw:
11086 CompareOpc = 902;
11087 break;
11088 case Intrinsic::ppc_altivec_vcmpgtsd:
11089 if (Subtarget.hasP8Altivec())
11090 CompareOpc = 967;
11091 else
11092 return false;
11093 break;
11094 case Intrinsic::ppc_altivec_vcmpgtub:
11095 CompareOpc = 518;
11096 break;
11097 case Intrinsic::ppc_altivec_vcmpgtuh:
11098 CompareOpc = 582;
11099 break;
11100 case Intrinsic::ppc_altivec_vcmpgtuw:
11101 CompareOpc = 646;
11102 break;
11103 case Intrinsic::ppc_altivec_vcmpgtud:
11104 if (Subtarget.hasP8Altivec())
11105 CompareOpc = 711;
11106 else
11107 return false;
11108 break;
11109 case Intrinsic::ppc_altivec_vcmpequq_p:
11110 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11111 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11112 if (!Subtarget.isISA3_1())
11113 return false;
11114 switch (IntrinsicID) {
11115 default:
11116 llvm_unreachable("Unknown comparison intrinsic.");
11117 case Intrinsic::ppc_altivec_vcmpequq_p:
11118 CompareOpc = 455;
11119 break;
11120 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11121 CompareOpc = 903;
11122 break;
11123 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11124 CompareOpc = 647;
11125 break;
11126 }
11127 isDot = true;
11128 break;
11129 }
11130 return true;
11131}
11132
11133/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11134/// lower, do it, otherwise return null.
11135SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11136 SelectionDAG &DAG) const {
11137 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11138
11139 SDLoc dl(Op);
11140 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11141 // but the builtin provides it as a scalar. To satisfy the instruction
11142 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11143 auto MapNodeWithSplatVector =
11144 [&](unsigned Opcode,
11145 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11146 SDValue SplatVal =
11147 DAG.getNode(ISD::SPLAT_VECTOR, dl, MVT::v4i32, Op.getOperand(2));
11148
11149 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(1)};
11150 Ops.append(ExtraOps.begin(), ExtraOps.end());
11151 return DAG.getNode(Opcode, dl, MVT::v16i8, Ops);
11152 };
11153
11154 switch (IntrinsicID) {
11155 case Intrinsic::thread_pointer:
11156 // Reads the thread pointer register, used for __builtin_thread_pointer.
11157 if (Subtarget.isPPC64())
11158 return DAG.getRegister(PPC::X13, MVT::i64);
11159 return DAG.getRegister(PPC::R2, MVT::i32);
11160
11161 case Intrinsic::ppc_rldimi: {
11162 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11163 SDValue Src = Op.getOperand(1);
11164 APInt Mask = Op.getConstantOperandAPInt(4);
11165 if (Mask.isZero())
11166 return Op.getOperand(2);
11167 if (Mask.isAllOnes())
11168 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11169 uint64_t SH = Op.getConstantOperandVal(3);
11170 unsigned MB = 0, ME = 0;
11171 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11172 report_fatal_error("invalid rldimi mask!");
11173 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11174 if (ME < 63 - SH) {
11175 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11176 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11177 } else if (ME > 63 - SH) {
11178 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11179 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11180 }
11181 return SDValue(
11182 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11183 {Op.getOperand(2), Src,
11184 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11185 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11186 0);
11187 }
11188
11189 case Intrinsic::ppc_rlwimi: {
11190 APInt Mask = Op.getConstantOperandAPInt(4);
11191 if (Mask.isZero())
11192 return Op.getOperand(2);
11193 if (Mask.isAllOnes())
11194 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11195 Op.getOperand(3));
11196 unsigned MB = 0, ME = 0;
11197 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11198 report_fatal_error("invalid rlwimi mask!");
11199 return SDValue(DAG.getMachineNode(
11200 PPC::RLWIMI, dl, MVT::i32,
11201 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11202 DAG.getTargetConstant(MB, dl, MVT::i32),
11203 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11204 0);
11205 }
11206
11207 case Intrinsic::ppc_bcdshift:
11208 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(3)});
11209 case Intrinsic::ppc_bcdshiftround:
11210 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(3)});
11211 case Intrinsic::ppc_bcdtruncate:
11212 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(3)});
11213 case Intrinsic::ppc_bcdunsignedtruncate:
11214 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11215 case Intrinsic::ppc_bcdunsignedshift:
11216 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11217
11218 case Intrinsic::ppc_rlwnm: {
11219 if (Op.getConstantOperandVal(3) == 0)
11220 return DAG.getConstant(0, dl, MVT::i32);
11221 unsigned MB = 0, ME = 0;
11222 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11223 report_fatal_error("invalid rlwnm mask!");
11224 return SDValue(
11225 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11226 {Op.getOperand(1), Op.getOperand(2),
11227 DAG.getTargetConstant(MB, dl, MVT::i32),
11228 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11229 0);
11230 }
11231
11232 case Intrinsic::ppc_mma_disassemble_acc: {
11233 if (Subtarget.isISAFuture()) {
11234 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11235 SDValue WideVec =
11236 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11237 Op.getOperand(1)),
11238 0);
11240 SDValue Value = SDValue(WideVec.getNode(), 0);
11241 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11242
11243 SDValue Extract;
11244 Extract = DAG.getNode(
11245 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11246 Subtarget.isLittleEndian() ? Value2 : Value,
11247 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11248 dl, getPointerTy(DAG.getDataLayout())));
11249 RetOps.push_back(Extract);
11250 Extract = DAG.getNode(
11251 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11252 Subtarget.isLittleEndian() ? Value2 : Value,
11253 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11254 dl, getPointerTy(DAG.getDataLayout())));
11255 RetOps.push_back(Extract);
11256 Extract = DAG.getNode(
11257 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11258 Subtarget.isLittleEndian() ? Value : Value2,
11259 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11260 dl, getPointerTy(DAG.getDataLayout())));
11261 RetOps.push_back(Extract);
11262 Extract = DAG.getNode(
11263 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11264 Subtarget.isLittleEndian() ? Value : Value2,
11265 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11266 dl, getPointerTy(DAG.getDataLayout())));
11267 RetOps.push_back(Extract);
11268 return DAG.getMergeValues(RetOps, dl);
11269 }
11270 [[fallthrough]];
11271 }
11272 case Intrinsic::ppc_vsx_disassemble_pair: {
11273 int NumVecs = 2;
11274 SDValue WideVec = Op.getOperand(1);
11275 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11276 NumVecs = 4;
11277 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11278 }
11280 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11281 SDValue Extract = DAG.getNode(
11282 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11283 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11284 : VecNo,
11285 dl, getPointerTy(DAG.getDataLayout())));
11286 RetOps.push_back(Extract);
11287 }
11288 return DAG.getMergeValues(RetOps, dl);
11289 }
11290
11291 case Intrinsic::ppc_build_dmr: {
11294 for (int i = 1; i < 9; i += 2) {
11295 SDValue Hi = Op.getOperand(i);
11296 SDValue Lo = Op.getOperand(i + 1);
11297 if (Hi->getOpcode() == ISD::LOAD)
11298 Chains.push_back(Hi.getValue(1));
11299 if (Lo->getOpcode() == ISD::LOAD)
11300 Chains.push_back(Lo.getValue(1));
11301 Pairs.push_back(
11302 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11303 }
11304 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11305 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11306 return DAG.getMergeValues({Value, TF}, dl);
11307 }
11308
11309 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11310 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11311 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11312 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11313 "Specify P of 0 or 1 for lower or upper 512 bytes");
11314 unsigned HiLo = Idx->getSExtValue();
11315 unsigned Opcode;
11316 unsigned Subx;
11317 if (HiLo == 0) {
11318 Opcode = PPC::DMXXEXTFDMR512;
11319 Subx = PPC::sub_wacc_lo;
11320 } else {
11321 Opcode = PPC::DMXXEXTFDMR512_HI;
11322 Subx = PPC::sub_wacc_hi;
11323 }
11324 SDValue Subreg(
11325 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11326 Op.getOperand(1),
11327 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11328 0);
11329 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11330 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11331 }
11332
11333 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11334 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11335 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11336 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11337 "Specify a dmr row pair 0-3");
11338 unsigned IdxVal = Idx->getSExtValue();
11339 unsigned Subx;
11340 switch (IdxVal) {
11341 case 0:
11342 Subx = PPC::sub_dmrrowp0;
11343 break;
11344 case 1:
11345 Subx = PPC::sub_dmrrowp1;
11346 break;
11347 case 2:
11348 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11349 break;
11350 case 3:
11351 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11352 break;
11353 }
11354 SDValue Subreg(
11355 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11356 Op.getOperand(1),
11357 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11358 0);
11359 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11360 return SDValue(
11361 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11362 0);
11363 }
11364
11365 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11366 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11367 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11368 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11369 "Specify P of 0 or 1 for lower or upper 512 bytes");
11370 unsigned HiLo = Idx->getSExtValue();
11371 unsigned Opcode;
11372 unsigned Subx;
11373 if (HiLo == 0) {
11374 Opcode = PPCISD::INST512;
11375 Subx = PPC::sub_wacc_lo;
11376 } else {
11377 Opcode = PPCISD::INST512HI;
11378 Subx = PPC::sub_wacc_hi;
11379 }
11380 SDValue Wacc = DAG.getNode(Opcode, dl, MVT::v512i1, Op.getOperand(2),
11381 Op.getOperand(3));
11382 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11383 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11384 Op.getOperand(1), Wacc, SubReg),
11385 0);
11386 }
11387
11388 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11389 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11390 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11391 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11392 "Specify a dmr row pair 0-3");
11393 unsigned IdxVal = Idx->getSExtValue();
11394 unsigned Subx;
11395 switch (IdxVal) {
11396 case 0:
11397 Subx = PPC::sub_dmrrowp0;
11398 break;
11399 case 1:
11400 Subx = PPC::sub_dmrrowp1;
11401 break;
11402 case 2:
11403 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11404 break;
11405 case 3:
11406 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11407 break;
11408 }
11409 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11410 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11411 SDValue DMRRowp =
11412 DAG.getNode(PPCISD::INST256, dl, MVT::v256i1, Op.getOperand(2), P);
11413 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11414 Op.getOperand(1), DMRRowp, SubReg),
11415 0);
11416 }
11417
11418 case Intrinsic::ppc_mma_xxmfacc:
11419 case Intrinsic::ppc_mma_xxmtacc: {
11420 // Allow pre-isa-future subtargets to lower as normal.
11421 if (!Subtarget.isISAFuture())
11422 return SDValue();
11423 // The intrinsics for xxmtacc and xxmfacc take one argument of
11424 // type v512i1, for future cpu the corresponding wacc instruction
11425 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11426 // the need to produce the xxm[t|f]acc.
11427 SDValue WideVec = Op.getOperand(1);
11428 DAG.ReplaceAllUsesWith(Op, WideVec);
11429 return SDValue();
11430 }
11431
11432 case Intrinsic::ppc_unpack_longdouble: {
11433 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11434 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11435 "Argument of long double unpack must be 0 or 1!");
11436 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11437 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11438 Idx->getValueType(0)));
11439 }
11440
11441 case Intrinsic::ppc_compare_exp_lt:
11442 case Intrinsic::ppc_compare_exp_gt:
11443 case Intrinsic::ppc_compare_exp_eq:
11444 case Intrinsic::ppc_compare_exp_uo: {
11445 unsigned Pred;
11446 switch (IntrinsicID) {
11447 case Intrinsic::ppc_compare_exp_lt:
11448 Pred = PPC::PRED_LT;
11449 break;
11450 case Intrinsic::ppc_compare_exp_gt:
11451 Pred = PPC::PRED_GT;
11452 break;
11453 case Intrinsic::ppc_compare_exp_eq:
11454 Pred = PPC::PRED_EQ;
11455 break;
11456 case Intrinsic::ppc_compare_exp_uo:
11457 Pred = PPC::PRED_UN;
11458 break;
11459 }
11460 return SDValue(
11461 DAG.getMachineNode(
11462 PPC::SELECT_CC_I4, dl, MVT::i32,
11463 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11464 Op.getOperand(1), Op.getOperand(2)),
11465 0),
11466 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11467 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11468 0);
11469 }
11470 case Intrinsic::ppc_test_data_class: {
11471 EVT OpVT = Op.getOperand(1).getValueType();
11472 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11473 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11474 : PPC::XSTSTDCSP);
11475 // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
11476 // The XSTSTDC* instructions test if a floating-point value matches any of
11477 // the data classes specified in the mask, setting CR field bits
11478 // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11479 // convert it to an integer result (1 if match, 0 if no match).
11480 //
11481 // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
11482 // intrinsic provides (value, mask) as Op.getOperand(1) and
11483 // Op.getOperand(2).
11484 SDValue TestDataClass =
11485 SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
11486 {Op.getOperand(2), Op.getOperand(1)}),
11487 0);
11488 if (Subtarget.isISA3_1()) {
11489 // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11490 // This is more efficient than the SELECT_CC approach used in earlier
11491 // ISAs.
11492 SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
11493 SDValue CRBit =
11494 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11495 TestDataClass, SubRegIdx),
11496 0);
11497
11498 return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
11499 }
11500
11501 // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11502 return SDValue(
11503 DAG.getMachineNode(PPC::SELECT_CC_I4, dl, MVT::i32,
11504 {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
11505 DAG.getConstant(0, dl, MVT::i32),
11506 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11507 0);
11508 }
11509 case Intrinsic::ppc_fnmsub: {
11510 EVT VT = Op.getOperand(1).getValueType();
11511 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11512 return DAG.getNode(
11513 ISD::FNEG, dl, VT,
11514 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11515 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11516 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11517 Op.getOperand(2), Op.getOperand(3));
11518 }
11519 case Intrinsic::ppc_convert_f128_to_ppcf128:
11520 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11521 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11522 ? RTLIB::CONVERT_PPCF128_F128
11523 : RTLIB::CONVERT_F128_PPCF128;
11524 MakeLibCallOptions CallOptions;
11525 std::pair<SDValue, SDValue> Result =
11526 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11527 dl, SDValue());
11528 return Result.first;
11529 }
11530 case Intrinsic::ppc_maxfe:
11531 case Intrinsic::ppc_maxfl:
11532 case Intrinsic::ppc_maxfs:
11533 case Intrinsic::ppc_minfe:
11534 case Intrinsic::ppc_minfl:
11535 case Intrinsic::ppc_minfs: {
11536 EVT VT = Op.getValueType();
11537 assert(
11538 all_of(Op->ops().drop_front(4),
11539 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11540 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11541 (void)VT;
11543 if (IntrinsicID == Intrinsic::ppc_minfe ||
11544 IntrinsicID == Intrinsic::ppc_minfl ||
11545 IntrinsicID == Intrinsic::ppc_minfs)
11546 CC = ISD::SETLT;
11547 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11548 SDValue Res = Op.getOperand(I);
11549 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11550 Res =
11551 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11552 }
11553 return Res;
11554 }
11555 }
11556
11557 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11558 // opcode number of the comparison.
11559 int CompareOpc;
11560 bool isDot;
11561 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11562 return SDValue(); // Don't custom lower most intrinsics.
11563
11564 // If this is a non-dot comparison, make the VCMP node and we are done.
11565 if (!isDot) {
11566 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11567 Op.getOperand(1), Op.getOperand(2),
11568 DAG.getConstant(CompareOpc, dl, MVT::i32));
11569 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11570 }
11571
11572 // Create the PPCISD altivec 'dot' comparison node.
11573 SDValue Ops[] = {
11574 Op.getOperand(2), // LHS
11575 Op.getOperand(3), // RHS
11576 DAG.getConstant(CompareOpc, dl, MVT::i32)
11577 };
11578 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11579 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11580
11581 // Unpack the result based on how the target uses it.
11582 unsigned BitNo; // Bit # of CR6.
11583 bool InvertBit; // Invert result?
11584 unsigned Bitx;
11585 unsigned SetOp;
11586 switch (Op.getConstantOperandVal(1)) {
11587 default: // Can't happen, don't crash on invalid number though.
11588 case 0: // Return the value of the EQ bit of CR6.
11589 BitNo = 0;
11590 InvertBit = false;
11591 Bitx = PPC::sub_eq;
11592 SetOp = PPCISD::SETBC;
11593 break;
11594 case 1: // Return the inverted value of the EQ bit of CR6.
11595 BitNo = 0;
11596 InvertBit = true;
11597 Bitx = PPC::sub_eq;
11598 SetOp = PPCISD::SETBCR;
11599 break;
11600 case 2: // Return the value of the LT bit of CR6.
11601 BitNo = 2;
11602 InvertBit = false;
11603 Bitx = PPC::sub_lt;
11604 SetOp = PPCISD::SETBC;
11605 break;
11606 case 3: // Return the inverted value of the LT bit of CR6.
11607 BitNo = 2;
11608 InvertBit = true;
11609 Bitx = PPC::sub_lt;
11610 SetOp = PPCISD::SETBCR;
11611 break;
11612 }
11613
11614 SDValue GlueOp = CompNode.getValue(1);
11615 if (Subtarget.isISA3_1()) {
11616 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11617 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11618 SDValue CRBit =
11619 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11620 CR6Reg, SubRegIdx, GlueOp),
11621 0);
11622 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11623 }
11624
11625 // Now that we have the comparison, emit a copy from the CR to a GPR.
11626 // This is flagged to the above dot comparison.
11627 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11628 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11629
11630 // Shift the bit into the low position.
11631 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11632 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11633 // Isolate the bit.
11634 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11635 DAG.getConstant(1, dl, MVT::i32));
11636
11637 // If we are supposed to, toggle the bit.
11638 if (InvertBit)
11639 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11640 DAG.getConstant(1, dl, MVT::i32));
11641 return Flags;
11642}
11643
11644SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11645 SelectionDAG &DAG) const {
11646 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11647 // the beginning of the argument list.
11648 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11649 SDLoc DL(Op);
11650 switch (Op.getConstantOperandVal(ArgStart)) {
11651 case Intrinsic::ppc_cfence: {
11652 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11653 SDValue Val = Op.getOperand(ArgStart + 1);
11654 EVT Ty = Val.getValueType();
11655 if (Ty == MVT::i128) {
11656 // FIXME: Testing one of two paired registers is sufficient to guarantee
11657 // ordering?
11658 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11659 }
11660 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11661 return SDValue(
11662 DAG.getMachineNode(
11663 Opcode, DL, MVT::Other,
11664 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11665 Op.getOperand(0)),
11666 0);
11667 }
11668 case Intrinsic::ppc_disassemble_dmr: {
11669 assert(ArgStart == 1 &&
11670 "llvm.ppc.disassemble.dmr must carry a chain argument.");
11671 return DAG.getStore(Op.getOperand(0), DL, Op.getOperand(ArgStart + 2),
11672 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11673 }
11674 default:
11675 break;
11676 }
11677 return SDValue();
11678}
11679
11680// Lower scalar BSWAP64 to xxbrd.
11681SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11682 SDLoc dl(Op);
11683 if (!Subtarget.isPPC64())
11684 return Op;
11685
11686 if (Subtarget.hasP9Vector()) {
11687 // MTVSRDD
11688 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11689 Op.getOperand(0));
11690 // XXBRD
11691 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11692 // MFVSRD
11693 int VectorIndex = 0;
11694 if (Subtarget.isLittleEndian())
11695 VectorIndex = 1;
11696 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11697 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11698 return Op;
11699 }
11700
11701 // For Power8, use parallel rotate instructions for faster bswap64.
11702 SDValue Input = Op.getOperand(0);
11703 // Helper to create rotate-and-insert operations (RLWIMI/RLDIMI).
11704 auto CreateRotateInsert =
11705 [&](unsigned Opcode, MVT VT, SDValue Dest, SDValue Src, unsigned RotAmt,
11706 unsigned MaskBegin,
11707 std::optional<unsigned> MaskEnd = std::nullopt) -> SDValue {
11709 Dest, Src, DAG.getTargetConstant(RotAmt, dl, MVT::i32),
11710 DAG.getTargetConstant(MaskBegin, dl, MVT::i32)};
11711 if (MaskEnd.has_value())
11712 Ops.push_back(DAG.getTargetConstant(*MaskEnd, dl, MVT::i32));
11713
11714 return SDValue(DAG.getMachineNode(Opcode, dl, VT, Ops), 0);
11715 };
11716
11717 // Helper to perform 32-bit byte swap using rotl(8) + 2x rlwimi.
11718 auto Swap32 = [&](SDValue Val32) -> SDValue {
11719 SDValue Rot = DAG.getNode(ISD::ROTL, dl, MVT::i32, Val32,
11720 DAG.getConstant(8, dl, MVT::i32));
11721 // Insert bits [24:31] from Val32 into Rot at position [0:7].
11722 SDValue Swap =
11723 CreateRotateInsert(PPC::RLWIMI, MVT::i32, Rot, Val32, 24, 0, 7);
11724 // Insert bits [16:23] from Val32 into Swap at position [16:23].
11725 return CreateRotateInsert(PPC::RLWIMI, MVT::i32, Swap, Val32, 24, 16, 23);
11726 };
11727 // Extract and swap high and low 32-bit halves independently for parallelism.
11728 SDValue Hi32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
11729 DAG.getNode(ISD::SRL, dl, MVT::i64, Input,
11730 DAG.getConstant(32, dl, MVT::i64)));
11731 SDValue Lo32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Input);
11732
11733 // Combine swapped halves: rotate LoSwap left by 32 bits and insert into
11734 // HiSwap to swap their positions, completing the 64-bit byte reversal.
11735 SDValue HiSwap = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Swap32(Hi32));
11736 SDValue LoSwap = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Swap32(Lo32));
11737
11738 return CreateRotateInsert(PPC::RLDIMI, MVT::i64, HiSwap, LoSwap, 32, 0);
11739}
11740
11741// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11742// compared to a value that is atomically loaded (atomic loads zero-extend).
11743SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11744 SelectionDAG &DAG) const {
11745 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11746 "Expecting an atomic compare-and-swap here.");
11747 SDLoc dl(Op);
11748 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11749 EVT MemVT = AtomicNode->getMemoryVT();
11750 if (MemVT.getSizeInBits() >= 32)
11751 return Op;
11752
11753 SDValue CmpOp = Op.getOperand(2);
11754 // If this is already correctly zero-extended, leave it alone.
11755 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11756 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11757 return Op;
11758
11759 // Clear the high bits of the compare operand.
11760 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11761 SDValue NewCmpOp =
11762 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11763 DAG.getConstant(MaskVal, dl, MVT::i32));
11764
11765 // Replace the existing compare operand with the properly zero-extended one.
11767 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11768 Ops.push_back(AtomicNode->getOperand(i));
11769 Ops[2] = NewCmpOp;
11770 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11771 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11772 auto NodeTy =
11773 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11774 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11775}
11776
11777SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11778 SelectionDAG &DAG) const {
11779 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11780 EVT MemVT = N->getMemoryVT();
11781 assert(MemVT.getSimpleVT() == MVT::i128 &&
11782 "Expect quadword atomic operations");
11783 SDLoc dl(N);
11784 unsigned Opc = N->getOpcode();
11785 switch (Opc) {
11786 case ISD::ATOMIC_LOAD: {
11787 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11788 // lowered to ppc instructions by pattern matching instruction selector.
11789 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11791 N->getOperand(0),
11792 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11793 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11794 Ops.push_back(N->getOperand(I));
11795 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11796 Ops, MemVT, N->getMemOperand());
11797 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11798 SDValue ValHi =
11799 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11800 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11801 DAG.getConstant(64, dl, MVT::i32));
11802 SDValue Val =
11803 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11804 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11805 {Val, LoadedVal.getValue(2)});
11806 }
11807 case ISD::ATOMIC_STORE: {
11808 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11809 // lowered to ppc instructions by pattern matching instruction selector.
11810 SDVTList Tys = DAG.getVTList(MVT::Other);
11812 N->getOperand(0),
11813 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11814 SDValue Val = N->getOperand(1);
11815 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11816 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11817 DAG.getConstant(64, dl, MVT::i32));
11818 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11819 Ops.push_back(ValLo);
11820 Ops.push_back(ValHi);
11821 Ops.push_back(N->getOperand(2));
11822 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11823 N->getMemOperand());
11824 }
11825 default:
11826 llvm_unreachable("Unexpected atomic opcode");
11827 }
11828}
11829
11831 SelectionDAG &DAG,
11832 const PPCSubtarget &Subtarget) {
11833 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11834
11835 enum DataClassMask {
11836 DC_NAN = 1 << 6,
11837 DC_NEG_INF = 1 << 4,
11838 DC_POS_INF = 1 << 5,
11839 DC_NEG_ZERO = 1 << 2,
11840 DC_POS_ZERO = 1 << 3,
11841 DC_NEG_SUBNORM = 1,
11842 DC_POS_SUBNORM = 1 << 1,
11843 };
11844
11845 EVT VT = Op.getValueType();
11846
11847 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11848 : VT == MVT::f64 ? PPC::XSTSTDCDP
11849 : PPC::XSTSTDCSP;
11850
11851 if (Mask == fcAllFlags)
11852 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11853 if (Mask == 0)
11854 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11855
11856 // When it's cheaper or necessary to test reverse flags.
11857 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11858 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11859 return DAG.getNOT(Dl, Rev, MVT::i1);
11860 }
11861
11862 // Power doesn't support testing whether a value is 'normal'. Test the rest
11863 // first, and test if it's 'not not-normal' with expected sign.
11864 if (Mask & fcNormal) {
11865 SDValue Rev(DAG.getMachineNode(
11866 TestOp, Dl, MVT::i32,
11867 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11868 DC_NEG_ZERO | DC_POS_ZERO |
11869 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11870 Dl, MVT::i32),
11871 Op),
11872 0);
11873 // Sign are stored in CR bit 0, result are in CR bit 2.
11874 SDValue Sign(
11875 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11876 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11877 0);
11878 SDValue Normal(DAG.getNOT(
11879 Dl,
11881 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11882 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11883 0),
11884 MVT::i1));
11885 if (Mask & fcPosNormal)
11886 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11887 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11888 if (Mask == fcPosNormal || Mask == fcNegNormal)
11889 return Result;
11890
11891 return DAG.getNode(
11892 ISD::OR, Dl, MVT::i1,
11893 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11894 }
11895
11896 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11897 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11898 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11899 bool IsQuiet = Mask & fcQNan;
11900 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11901
11902 // Quietness is determined by the first bit in fraction field.
11903 uint64_t QuietMask = 0;
11904 SDValue HighWord;
11905 if (VT == MVT::f128) {
11906 HighWord = DAG.getNode(
11907 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11908 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11909 QuietMask = 0x8000;
11910 } else if (VT == MVT::f64) {
11911 if (Subtarget.isPPC64()) {
11912 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11913 DAG.getBitcast(MVT::i64, Op),
11914 DAG.getConstant(1, Dl, MVT::i32));
11915 } else {
11916 SDValue Vec = DAG.getBitcast(
11917 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11918 HighWord = DAG.getNode(
11919 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11920 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11921 }
11922 QuietMask = 0x80000;
11923 } else if (VT == MVT::f32) {
11924 HighWord = DAG.getBitcast(MVT::i32, Op);
11925 QuietMask = 0x400000;
11926 }
11927 SDValue NanRes = DAG.getSetCC(
11928 Dl, MVT::i1,
11929 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11930 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11931 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11932 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11933 if (Mask == fcQNan || Mask == fcSNan)
11934 return NanRes;
11935
11936 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11937 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11938 NanRes);
11939 }
11940
11941 unsigned NativeMask = 0;
11942 if ((Mask & fcNan) == fcNan)
11943 NativeMask |= DC_NAN;
11944 if (Mask & fcNegInf)
11945 NativeMask |= DC_NEG_INF;
11946 if (Mask & fcPosInf)
11947 NativeMask |= DC_POS_INF;
11948 if (Mask & fcNegZero)
11949 NativeMask |= DC_NEG_ZERO;
11950 if (Mask & fcPosZero)
11951 NativeMask |= DC_POS_ZERO;
11952 if (Mask & fcNegSubnormal)
11953 NativeMask |= DC_NEG_SUBNORM;
11954 if (Mask & fcPosSubnormal)
11955 NativeMask |= DC_POS_SUBNORM;
11956 return SDValue(
11957 DAG.getMachineNode(
11958 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11960 TestOp, Dl, MVT::i32,
11961 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11962 0),
11963 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11964 0);
11965}
11966
11967SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11968 SelectionDAG &DAG) const {
11969 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11970 SDValue LHS = Op.getOperand(0);
11971 uint64_t RHSC = Op.getConstantOperandVal(1);
11972 SDLoc Dl(Op);
11973 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11974 if (LHS.getValueType() == MVT::ppcf128) {
11975 // The higher part determines the value class.
11976 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11977 DAG.getConstant(1, Dl, MVT::i32));
11978 }
11979
11980 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11981}
11982
11983// Adjust the length value for a load/store with length to account for the
11984// instructions requiring a left justified length, and for non-byte element
11985// types requiring scaling by element size.
11986static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11987 SelectionDAG &DAG) {
11988 SDLoc dl(Val);
11989 EVT VT = Val->getValueType(0);
11990 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11991 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11992 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11993 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11994}
11995
11996SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11997 auto VPLD = cast<VPLoadSDNode>(Op);
11998 bool Future = Subtarget.isISAFuture();
11999 SDLoc dl(Op);
12000 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
12001 "Mask predication not supported");
12002 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12003 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
12004 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
12005 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
12006 Len = AdjustLength(Len, EltBits, !Future, DAG);
12007 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
12008 VPLD->getOperand(1), Len};
12009 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
12010 SDValue VPL =
12012 VPLD->getMemoryVT(), VPLD->getMemOperand());
12013 return VPL;
12014}
12015
12016SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
12017 auto VPST = cast<VPStoreSDNode>(Op);
12018 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
12019 "Mask predication not supported");
12020 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12021 SDLoc dl(Op);
12022 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
12023 unsigned EltBits =
12024 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
12025 bool Future = Subtarget.isISAFuture();
12026 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
12027 Len = AdjustLength(Len, EltBits, !Future, DAG);
12028 SDValue Ops[] = {
12029 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
12030 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
12031 VPST->getOperand(2), Len};
12032 SDVTList Tys = DAG.getVTList(MVT::Other);
12033 SDValue VPS =
12035 VPST->getMemoryVT(), VPST->getMemOperand());
12036 return VPS;
12037}
12038
12039SDValue PPCTargetLowering::LowerPartialReduce(SDValue Op,
12040 SelectionDAG &DAG) const {
12041 SDValue Acc = Op.getOperand(0);
12042 SDValue Op1 = Op.getOperand(1);
12043 SDValue Op2 = Op.getOperand(2);
12044
12045 assert(Op.getOpcode() == ISD::PARTIAL_REDUCE_UMLA &&
12046 "Unexpected partial reduction");
12047
12048 if (Acc.getValueType() != MVT::v4i32)
12049 return SDValue();
12050 if (Op1.getValueType() != MVT::v16i32 || Op1.getOpcode() != ISD::SIGN_EXTEND)
12051 return SDValue();
12052 SDValue Op1Input = Op1.getOperand(0);
12053 if (Op1Input.getValueType() != MVT::v16i8 || !llvm::isOneOrOneSplat(Op2))
12054 return SDValue();
12055
12056 SDLoc dl(Op);
12057 SDValue Ones = DAG.getConstant(1, dl, MVT::v16i8);
12058 return DAG.getNode(ISD::PARTIAL_REDUCE_SUMLA, dl, MVT::v4i32, Acc, Op1Input,
12059 Ones);
12060}
12061
12062SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
12063 SelectionDAG &DAG) const {
12064 SDLoc dl(Op);
12065
12066 MachineFunction &MF = DAG.getMachineFunction();
12067 SDValue Op0 = Op.getOperand(0);
12068 EVT ValVT = Op0.getValueType();
12069 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
12070 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
12071 int64_t IntVal = Op.getConstantOperandVal(0);
12072 if (IntVal >= -16 && IntVal <= 15)
12073 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
12074 dl);
12075 }
12076
12077 ReuseLoadInfo RLI;
12078 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
12079 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
12080 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
12081 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
12082
12083 MachineMemOperand *MMO =
12085 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
12086 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
12088 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
12089 MVT::i32, MMO);
12090 if (RLI.ResChain)
12091 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
12092 return Bits.getValue(0);
12093 }
12094
12095 // Create a stack slot that is 16-byte aligned.
12096 MachineFrameInfo &MFI = MF.getFrameInfo();
12097 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
12098 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12099 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
12100
12101 SDValue Val = Op0;
12102 // P10 hardware store forwarding requires that a single store contains all
12103 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
12104 // to avoid load hit store on P10 when running binaries compiled for older
12105 // processors by generating two mergeable scalar stores to forward with the
12106 // vector load.
12107 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
12108 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
12109 ValVT.getSizeInBits() <= 64) {
12110 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
12111 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
12112 SDValue ShiftBy = DAG.getConstant(
12113 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
12114 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
12115 SDValue Plus8 =
12116 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
12117 SDValue Store2 =
12118 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
12119 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
12120 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
12121 MachinePointerInfo());
12122 }
12123
12124 // Store the input value into Value#0 of the stack slot.
12125 SDValue Store =
12126 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
12127 // Load it out.
12128 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
12129}
12130
12131SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12132 SelectionDAG &DAG) const {
12133 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12134 "Should only be called for ISD::INSERT_VECTOR_ELT");
12135
12136 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
12137
12138 EVT VT = Op.getValueType();
12139 SDLoc dl(Op);
12140 SDValue V1 = Op.getOperand(0);
12141 SDValue V2 = Op.getOperand(1);
12142
12143 if (VT == MVT::v2f64 && C)
12144 return Op;
12145
12146 if (Subtarget.hasP9Vector()) {
12147 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12148 // because on P10, it allows this specific insert_vector_elt load pattern to
12149 // utilize the refactored load and store infrastructure in order to exploit
12150 // prefixed loads.
12151 // On targets with inexpensive direct moves (Power9 and up), a
12152 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12153 // load since a single precision load will involve conversion to double
12154 // precision on the load followed by another conversion to single precision.
12155 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12156 (isa<LoadSDNode>(V2))) {
12157 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12158 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12159 SDValue InsVecElt =
12160 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12161 BitcastLoad, Op.getOperand(2));
12162 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12163 }
12164 }
12165
12166 if (Subtarget.isISA3_1()) {
12167 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12168 return SDValue();
12169 // On P10, we have legal lowering for constant and variable indices for
12170 // all vectors.
12171 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12172 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12173 return Op;
12174 }
12175
12176 // Before P10, we have legal lowering for constant indices but not for
12177 // variable ones.
12178 if (!C)
12179 return SDValue();
12180
12181 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12182 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12183 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12184 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12185 unsigned InsertAtElement = C->getZExtValue();
12186 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12187 if (Subtarget.isLittleEndian()) {
12188 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12189 }
12190 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12191 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12192 }
12193 return Op;
12194}
12195
12196SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12197 SelectionDAG &DAG) const {
12198 SDLoc dl(Op);
12199 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12200 SDValue LoadChain = LN->getChain();
12201 SDValue BasePtr = LN->getBasePtr();
12202 EVT VT = Op.getValueType();
12203 bool IsV1024i1 = VT == MVT::v1024i1;
12204 bool IsV2048i1 = VT == MVT::v2048i1;
12205
12206 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12207 // Dense Math dmr pair registers, respectively.
12208 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12209 (void)IsV2048i1;
12210 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12211 "Dense Math support required.");
12212 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12213
12215 SmallVector<SDValue, 8> LoadChains;
12216
12217 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12218 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12219 MachineMemOperand *MMO = LN->getMemOperand();
12220 unsigned NumVecs = VT.getSizeInBits() / 256;
12221 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12222 MachineMemOperand *NewMMO =
12223 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12224 if (Idx > 0) {
12225 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12226 DAG.getConstant(32, dl, BasePtr.getValueType()));
12227 LoadOps[2] = BasePtr;
12228 }
12230 DAG.getVTList(MVT::v256i1, MVT::Other),
12231 LoadOps, MVT::v256i1, NewMMO);
12232 LoadChains.push_back(Ld.getValue(1));
12233 Loads.push_back(Ld);
12234 }
12235
12236 if (Subtarget.isLittleEndian()) {
12237 std::reverse(Loads.begin(), Loads.end());
12238 std::reverse(LoadChains.begin(), LoadChains.end());
12239 }
12240
12241 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12242 SDValue Value = DMFInsert1024(Loads, dl, DAG);
12243
12244 if (IsV1024i1) {
12245 return DAG.getMergeValues({Value, TF}, dl);
12246 }
12247
12248 // Handle Loads for V2048i1 which represents a dmr pair.
12249 SmallVector<SDValue, 4> MoreLoads{Loads[4], Loads[5], Loads[6], Loads[7]};
12250 SDValue Dmr1Value = DMFInsert1024(MoreLoads, dl, DAG);
12251
12252 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12253 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12254
12255 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12256 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12257
12258 SDValue DmrPValue = SDValue(
12259 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12260
12261 return DAG.getMergeValues({DmrPValue, TF}, dl);
12262}
12263
12264SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12265 const SDLoc &dl,
12266 SelectionDAG &DAG) const {
12267 SDValue Lo =
12268 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Pairs[0], Pairs[1]);
12269 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12270 SDValue Hi =
12271 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Pairs[2], Pairs[3]);
12272 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12273 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12274
12275 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12276 {RC, Lo, LoSub, Hi, HiSub}),
12277 0);
12278}
12279
12280SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12281 SelectionDAG &DAG) const {
12282 SDLoc dl(Op);
12283 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12284 SDValue LoadChain = LN->getChain();
12285 SDValue BasePtr = LN->getBasePtr();
12286 EVT VT = Op.getValueType();
12287
12288 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12289 return LowerDMFVectorLoad(Op, DAG);
12290
12291 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12292 return Op;
12293
12294 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12295 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12296 "Type unsupported without MMA");
12297 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12298 "Type unsupported without paired vector support");
12299
12300 // For v256i1 on ISA Future, let the load go through to instruction selection
12301 // where it will be matched to lxvp/plxvp by the instruction patterns.
12302 if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12303 return Op;
12304
12305 // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12306 // value in 2 or 4 vsx registers.
12307 Align Alignment = LN->getAlign();
12309 SmallVector<SDValue, 4> LoadChains;
12310 unsigned NumVecs = VT.getSizeInBits() / 128;
12311 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12312 SDValue Load =
12313 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12314 LN->getPointerInfo().getWithOffset(Idx * 16),
12315 commonAlignment(Alignment, Idx * 16),
12316 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12317 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12318 DAG.getConstant(16, dl, BasePtr.getValueType()));
12319 Loads.push_back(Load);
12320 LoadChains.push_back(Load.getValue(1));
12321 }
12322 if (Subtarget.isLittleEndian()) {
12323 std::reverse(Loads.begin(), Loads.end());
12324 std::reverse(LoadChains.begin(), LoadChains.end());
12325 }
12326 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12327 SDValue Value =
12328 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12329 dl, VT, Loads);
12330 SDValue RetOps[] = {Value, TF};
12331 return DAG.getMergeValues(RetOps, dl);
12332}
12333
12334SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12335 SelectionDAG &DAG) const {
12336
12337 SDLoc dl(Op);
12338 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12339 SDValue StoreChain = SN->getChain();
12340 SDValue BasePtr = SN->getBasePtr();
12343 EVT VT = SN->getValue().getValueType();
12344 bool IsV1024i1 = VT == MVT::v1024i1;
12345 bool IsV2048i1 = VT == MVT::v2048i1;
12346
12347 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12348 // Dense Math dmr pair registers, respectively.
12349 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12350 (void)IsV2048i1;
12351 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12352 "Dense Math support required.");
12353 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12354
12355 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12356 if (IsV1024i1) {
12358 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12359 Op.getOperand(1),
12360 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12361 0);
12363 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12364 Op.getOperand(1),
12365 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12366 0);
12367 MachineSDNode *ExtNode =
12368 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12369 Values.push_back(SDValue(ExtNode, 0));
12370 Values.push_back(SDValue(ExtNode, 1));
12371 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12372 Values.push_back(SDValue(ExtNode, 0));
12373 Values.push_back(SDValue(ExtNode, 1));
12374 } else {
12375 // This corresponds to v2048i1 which represents a dmr pair.
12376 SDValue Dmr0(
12377 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12378 Op.getOperand(1),
12379 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12380 0);
12381
12382 SDValue Dmr1(
12383 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12384 Op.getOperand(1),
12385 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12386 0);
12387
12388 SDValue Dmr0Lo(DAG.getMachineNode(
12389 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12390 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12391 0);
12392
12393 SDValue Dmr0Hi(DAG.getMachineNode(
12394 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12395 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12396 0);
12397
12398 SDValue Dmr1Lo(DAG.getMachineNode(
12399 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12400 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12401 0);
12402
12403 SDValue Dmr1Hi(DAG.getMachineNode(
12404 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12405 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12406 0);
12407
12408 MachineSDNode *ExtNode =
12409 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12410 Values.push_back(SDValue(ExtNode, 0));
12411 Values.push_back(SDValue(ExtNode, 1));
12412 ExtNode =
12413 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12414 Values.push_back(SDValue(ExtNode, 0));
12415 Values.push_back(SDValue(ExtNode, 1));
12416 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12417 Values.push_back(SDValue(ExtNode, 0));
12418 Values.push_back(SDValue(ExtNode, 1));
12419 ExtNode =
12420 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12421 Values.push_back(SDValue(ExtNode, 0));
12422 Values.push_back(SDValue(ExtNode, 1));
12423 }
12424
12425 if (Subtarget.isLittleEndian())
12426 std::reverse(Values.begin(), Values.end());
12427
12428 SDVTList Tys = DAG.getVTList(MVT::Other);
12430 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12431 Values[0], BasePtr};
12432 MachineMemOperand *MMO = SN->getMemOperand();
12433 unsigned NumVecs = VT.getSizeInBits() / 256;
12434 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12435 MachineMemOperand *NewMMO =
12436 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12437 if (Idx > 0) {
12438 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12439 DAG.getConstant(32, dl, BasePtr.getValueType()));
12440 Ops[3] = BasePtr;
12441 }
12442 Ops[2] = Values[Idx];
12444 MVT::v256i1, NewMMO);
12445 Stores.push_back(St);
12446 }
12447
12448 SDValue TF = DAG.getTokenFactor(dl, Stores);
12449 return TF;
12450}
12451
12452SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12453 SelectionDAG &DAG) const {
12454 SDLoc dl(Op);
12455 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12456 SDValue StoreChain = SN->getChain();
12457 SDValue BasePtr = SN->getBasePtr();
12458 SDValue Value = SN->getValue();
12459 SDValue Value2 = SN->getValue();
12460 EVT StoreVT = Value.getValueType();
12461
12462 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12463 return LowerDMFVectorStore(Op, DAG);
12464
12465 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12466 return Op;
12467
12468 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12469 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12470 "Type unsupported without MMA");
12471 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12472 "Type unsupported without paired vector support");
12473
12474 // For v256i1 on ISA Future, let the store go through to instruction selection
12475 // where it will be matched to stxvp/pstxvp by the instruction patterns.
12476 if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12478 return Op;
12479
12480 // For other cases, create 2 or 4 v16i8 stores to store the pair or
12481 // accumulator underlying registers individually.
12482 Align Alignment = SN->getAlign();
12484 unsigned NumVecs = 2;
12485 if (StoreVT == MVT::v512i1) {
12486 if (Subtarget.isISAFuture()) {
12487 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12488 MachineSDNode *ExtNode = DAG.getMachineNode(
12489 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12490
12491 Value = SDValue(ExtNode, 0);
12492 Value2 = SDValue(ExtNode, 1);
12493 } else
12494 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12495 NumVecs = 4;
12496 }
12497 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12498 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12499 SDValue Elt;
12500 if (Subtarget.isISAFuture()) {
12501 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12502 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12503 Idx > 1 ? Value2 : Value,
12504 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12505 } else
12506 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12507 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12508
12509 SDValue Store =
12510 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12511 SN->getPointerInfo().getWithOffset(Idx * 16),
12512 commonAlignment(Alignment, Idx * 16),
12513 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12514 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12515 DAG.getConstant(16, dl, BasePtr.getValueType()));
12516 Stores.push_back(Store);
12517 }
12518 SDValue TF = DAG.getTokenFactor(dl, Stores);
12519 return TF;
12520}
12521
12522SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12523 SDLoc dl(Op);
12524 if (Op.getValueType() == MVT::v4i32) {
12525 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12526
12527 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12528 // +16 as shift amt.
12529 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12530 SDValue RHSSwap = // = vrlw RHS, 16
12531 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12532
12533 // Shrinkify inputs to v8i16.
12534 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12535 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12536 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12537
12538 // Low parts multiplied together, generating 32-bit results (we ignore the
12539 // top parts).
12540 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12541 LHS, RHS, DAG, dl, MVT::v4i32);
12542
12543 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12544 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12545 // Shift the high parts up 16 bits.
12546 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12547 Neg16, DAG, dl);
12548 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12549 } else if (Op.getValueType() == MVT::v16i8) {
12550 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12551 bool isLittleEndian = Subtarget.isLittleEndian();
12552
12553 // Multiply the even 8-bit parts, producing 16-bit sums.
12554 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12555 LHS, RHS, DAG, dl, MVT::v8i16);
12556 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12557
12558 // Multiply the odd 8-bit parts, producing 16-bit sums.
12559 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12560 LHS, RHS, DAG, dl, MVT::v8i16);
12561 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12562
12563 // Merge the results together. Because vmuleub and vmuloub are
12564 // instructions with a big-endian bias, we must reverse the
12565 // element numbering and reverse the meaning of "odd" and "even"
12566 // when generating little endian code.
12567 int Ops[16];
12568 for (unsigned i = 0; i != 8; ++i) {
12569 if (isLittleEndian) {
12570 Ops[i*2 ] = 2*i;
12571 Ops[i*2+1] = 2*i+16;
12572 } else {
12573 Ops[i*2 ] = 2*i+1;
12574 Ops[i*2+1] = 2*i+1+16;
12575 }
12576 }
12577 if (isLittleEndian)
12578 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12579 else
12580 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12581 } else {
12582 llvm_unreachable("Unknown mul to lower!");
12583 }
12584}
12585
12586SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12587 bool IsStrict = Op->isStrictFPOpcode();
12588 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12589 !Subtarget.hasP9Vector())
12590 return SDValue();
12591
12592 return Op;
12593}
12594
12595// Custom lowering for fpext vf32 to v2f64
12596SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12597
12598 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12599 "Should only be called for ISD::FP_EXTEND");
12600
12601 // FIXME: handle extends from half precision float vectors on P9.
12602 // We only want to custom lower an extend from v2f32 to v2f64.
12603 if (Op.getValueType() != MVT::v2f64 ||
12604 Op.getOperand(0).getValueType() != MVT::v2f32)
12605 return SDValue();
12606
12607 SDLoc dl(Op);
12608 SDValue Op0 = Op.getOperand(0);
12609
12610 switch (Op0.getOpcode()) {
12611 default:
12612 return SDValue();
12614 assert(Op0.getNumOperands() == 2 &&
12616 "Node should have 2 operands with second one being a constant!");
12617
12618 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12619 return SDValue();
12620
12621 // Custom lower is only done for high or low doubleword.
12622 int Idx = Op0.getConstantOperandVal(1);
12623 if (Idx % 2 != 0)
12624 return SDValue();
12625
12626 // Since input is v4f32, at this point Idx is either 0 or 2.
12627 // Shift to get the doubleword position we want.
12628 int DWord = Idx >> 1;
12629
12630 // High and low word positions are different on little endian.
12631 if (Subtarget.isLittleEndian())
12632 DWord ^= 0x1;
12633
12634 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12635 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12636 }
12637 case ISD::FADD:
12638 case ISD::FMUL:
12639 case ISD::FSUB: {
12640 SDValue NewLoad[2];
12641 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12642 // Ensure both input are loads.
12643 SDValue LdOp = Op0.getOperand(i);
12644 if (LdOp.getOpcode() != ISD::LOAD)
12645 return SDValue();
12646 // Generate new load node.
12647 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12648 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12649 NewLoad[i] = DAG.getMemIntrinsicNode(
12650 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12651 LD->getMemoryVT(), LD->getMemOperand());
12652 }
12653 SDValue NewOp =
12654 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12655 NewLoad[1], Op0.getNode()->getFlags());
12656 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12657 DAG.getConstant(0, dl, MVT::i32));
12658 }
12659 case ISD::LOAD: {
12660 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12661 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12662 SDValue NewLd = DAG.getMemIntrinsicNode(
12663 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12664 LD->getMemoryVT(), LD->getMemOperand());
12665 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12666 DAG.getConstant(0, dl, MVT::i32));
12667 }
12668 }
12669 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12670}
12671
12673 SelectionDAG &DAG,
12674 const PPCSubtarget &STI) {
12675 SDLoc DL(Value);
12676 if (STI.useCRBits())
12677 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12678 DAG.getConstant(1, DL, SumType),
12679 DAG.getConstant(0, DL, SumType));
12680 else
12681 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12682 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12683 Value, DAG.getAllOnesConstant(DL, SumType));
12684 return Sum.getValue(1);
12685}
12686
12688 EVT CarryType, SelectionDAG &DAG,
12689 const PPCSubtarget &STI) {
12690 SDLoc DL(Flag);
12691 SDValue Zero = DAG.getConstant(0, DL, SumType);
12692 SDValue Carry = DAG.getNode(
12693 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12694 if (STI.useCRBits())
12695 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12696 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12697}
12698
12699SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12700
12701 SDLoc DL(Op);
12702 SDNode *N = Op.getNode();
12703 EVT VT = N->getValueType(0);
12704 EVT CarryType = N->getValueType(1);
12705 unsigned Opc = N->getOpcode();
12706 bool IsAdd = Opc == ISD::UADDO;
12707 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12708 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12709 N->getOperand(0), N->getOperand(1));
12710 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12711 DAG, Subtarget);
12712 if (!IsAdd)
12713 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12714 DAG.getConstant(1UL, DL, CarryType));
12715 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12716}
12717
12718SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12719 SelectionDAG &DAG) const {
12720 SDLoc DL(Op);
12721 SDNode *N = Op.getNode();
12722 unsigned Opc = N->getOpcode();
12723 EVT VT = N->getValueType(0);
12724 EVT CarryType = N->getValueType(1);
12725 SDValue CarryOp = N->getOperand(2);
12726 bool IsAdd = Opc == ISD::UADDO_CARRY;
12727 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12728 if (!IsAdd)
12729 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12730 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12731 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12732 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12733 Op.getOperand(0), Op.getOperand(1), CarryOp);
12734 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12735 Subtarget);
12736 if (!IsAdd)
12737 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12738 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12739 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12740}
12741
12742SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12743
12744 SDLoc dl(Op);
12745 SDValue LHS = Op.getOperand(0);
12746 SDValue RHS = Op.getOperand(1);
12747 EVT VT = Op.getNode()->getValueType(0);
12748
12749 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12750
12751 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12752 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12753
12754 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12755
12756 SDValue Overflow =
12757 DAG.getNode(ISD::SRL, dl, VT, And,
12758 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12759
12760 SDValue OverflowTrunc =
12761 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12762
12763 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12764}
12765
12766/// Implements signed add with overflow detection using the rule:
12767/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12768SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12769
12770 SDLoc dl(Op);
12771 SDValue LHS = Op.getOperand(0);
12772 SDValue RHS = Op.getOperand(1);
12773 EVT VT = Op.getNode()->getValueType(0);
12774
12775 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12776
12777 // Compute ~(x xor y)
12778 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12779 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12780 // Compute (s xor x)
12781 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12782
12783 // overflow = (x eqv y) & (s xor x)
12784 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12785
12786 // Shift sign bit down to LSB
12787 SDValue Overflow =
12788 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12789 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12790 // Truncate to the overflow type (i1)
12791 SDValue OverflowTrunc =
12792 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12793
12794 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12795}
12796
12797// Lower unsigned 3-way compare producing -1/0/1.
12798SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12799 SDLoc DL(Op);
12800 SDValue A = DAG.getFreeze(Op.getOperand(0));
12801 SDValue B = DAG.getFreeze(Op.getOperand(1));
12802 EVT OpVT = A.getValueType();
12803 EVT ResVT = Op.getValueType();
12804
12805 // First compute diff = A - B.
12806 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12807
12808 // Generate B - A using SUBC to capture carry.
12809 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12810 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12811 SDValue CA0 = SubC.getValue(1);
12812
12813 // t2 = A - B + CA0 using SUBE.
12814 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12815 SDValue CA1 = SubE1.getValue(1);
12816
12817 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12818 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12819
12820 // Extract the first result and truncate to result type if needed.
12821 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12822}
12823
12824/// LowerOperation - Provide custom lowering hooks for some operations.
12825///
12827 switch (Op.getOpcode()) {
12828 default:
12829 llvm_unreachable("Wasn't expecting to be able to lower this!");
12830 case ISD::FPOW: return lowerPow(Op, DAG);
12831 case ISD::FSIN: return lowerSin(Op, DAG);
12832 case ISD::FCOS: return lowerCos(Op, DAG);
12833 case ISD::FLOG: return lowerLog(Op, DAG);
12834 case ISD::FLOG10: return lowerLog10(Op, DAG);
12835 case ISD::FEXP: return lowerExp(Op, DAG);
12836 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12837 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12838 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12839 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12840 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12841 case ISD::STRICT_FSETCC:
12843 case ISD::SETCC: return LowerSETCC(Op, DAG);
12844 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
12845 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12846 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12847 case ISD::SSUBO:
12848 return LowerSSUBO(Op, DAG);
12849 case ISD::SADDO:
12850 return LowerSADDO(Op, DAG);
12851
12852 case ISD::INLINEASM:
12853 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12854 // Variable argument lowering.
12855 case ISD::VASTART: return LowerVASTART(Op, DAG);
12856 case ISD::VAARG: return LowerVAARG(Op, DAG);
12857 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12858
12859 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12860 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12862 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12863
12864 // Exception handling lowering.
12865 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12866 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12867 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12868
12869 case ISD::LOAD: return LowerLOAD(Op, DAG);
12870 case ISD::STORE: return LowerSTORE(Op, DAG);
12871 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12872 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12875 case ISD::FP_TO_UINT:
12876 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12879 case ISD::UINT_TO_FP:
12880 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12881 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12882 case ISD::SET_ROUNDING:
12883 return LowerSET_ROUNDING(Op, DAG);
12884
12885 // Lower 64-bit shifts.
12886 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12887 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12888 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12889
12890 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12891 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12892
12893 // Vector-related lowering.
12894 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12895 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12896 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12897 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12898 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12899 case ISD::MUL: return LowerMUL(Op, DAG);
12900 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12902 case ISD::FP_ROUND:
12903 return LowerFP_ROUND(Op, DAG);
12904 case ISD::ROTL: return LowerROTL(Op, DAG);
12905
12906 // For counter-based loop handling.
12908 return SDValue();
12909
12910 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12911
12912 // Frame & Return address.
12913 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12914 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12915
12917 return LowerINTRINSIC_VOID(Op, DAG);
12918 case ISD::BSWAP:
12919 return LowerBSWAP(Op, DAG);
12921 return LowerATOMIC_CMP_SWAP(Op, DAG);
12922 case ISD::ATOMIC_STORE:
12923 return LowerATOMIC_LOAD_STORE(Op, DAG);
12924 case ISD::IS_FPCLASS:
12925 return LowerIS_FPCLASS(Op, DAG);
12926 case ISD::UADDO:
12927 case ISD::USUBO:
12928 return LowerADDSUBO(Op, DAG);
12929 case ISD::UADDO_CARRY:
12930 case ISD::USUBO_CARRY:
12931 return LowerADDSUBO_CARRY(Op, DAG);
12932 case ISD::UCMP:
12933 return LowerUCMP(Op, DAG);
12934 case ISD::STRICT_LRINT:
12935 case ISD::STRICT_LLRINT:
12936 case ISD::STRICT_LROUND:
12939 if (Op->getFlags().hasNoFPExcept())
12940 return Op;
12941 return SDValue();
12942 case ISD::VP_LOAD:
12943 return LowerVP_LOAD(Op, DAG);
12944 case ISD::VP_STORE:
12945 return LowerVP_STORE(Op, DAG);
12947 return LowerPartialReduce(Op, DAG);
12948 }
12949}
12950
12953 SelectionDAG &DAG) const {
12954 SDLoc dl(N);
12955 switch (N->getOpcode()) {
12956 default:
12957 llvm_unreachable("Do not know how to custom type legalize this operation!");
12958 case ISD::ATOMIC_LOAD: {
12959 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12960 Results.push_back(Res);
12961 Results.push_back(Res.getValue(1));
12962 break;
12963 }
12964 case ISD::READCYCLECOUNTER: {
12965 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12966 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12967
12968 Results.push_back(
12969 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12970 Results.push_back(RTB.getValue(2));
12971 break;
12972 }
12974 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12975 break;
12976
12977 assert(N->getValueType(0) == MVT::i1 &&
12978 "Unexpected result type for CTR decrement intrinsic");
12979 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12980 N->getValueType(0));
12981 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12982 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12983 N->getOperand(1));
12984
12985 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12986 Results.push_back(NewInt.getValue(1));
12987 break;
12988 }
12990 switch (N->getConstantOperandVal(0)) {
12991 case Intrinsic::ppc_pack_longdouble:
12992 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12993 N->getOperand(2), N->getOperand(1)));
12994 break;
12995 case Intrinsic::ppc_maxfe:
12996 case Intrinsic::ppc_minfe:
12997 case Intrinsic::ppc_fnmsub:
12998 case Intrinsic::ppc_convert_f128_to_ppcf128:
12999 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
13000 break;
13001 }
13002 break;
13003 }
13004 case ISD::VAARG: {
13005 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
13006 return;
13007
13008 EVT VT = N->getValueType(0);
13009
13010 if (VT == MVT::i64) {
13011 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
13012
13013 Results.push_back(NewNode);
13014 Results.push_back(NewNode.getValue(1));
13015 }
13016 return;
13017 }
13020 case ISD::FP_TO_SINT:
13021 case ISD::FP_TO_UINT: {
13022 // LowerFP_TO_INT() can only handle f32 and f64.
13023 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
13024 MVT::ppcf128)
13025 return;
13026 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
13027 Results.push_back(LoweredValue);
13028 if (N->isStrictFPOpcode())
13029 Results.push_back(LoweredValue.getValue(1));
13030 return;
13031 }
13032 case ISD::TRUNCATE: {
13033 if (!N->getValueType(0).isVector())
13034 return;
13035 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
13036 if (Lowered)
13037 Results.push_back(Lowered);
13038 return;
13039 }
13040 case ISD::SCALAR_TO_VECTOR: {
13041 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
13042 if (Lowered)
13043 Results.push_back(Lowered);
13044 return;
13045 }
13046 case ISD::FSHL:
13047 case ISD::FSHR:
13048 // Don't handle funnel shifts here.
13049 return;
13050 case ISD::BITCAST:
13051 // Don't handle bitcast here.
13052 return;
13053 case ISD::FP_EXTEND:
13054 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
13055 if (Lowered)
13056 Results.push_back(Lowered);
13057 return;
13058 }
13059}
13060
13061//===----------------------------------------------------------------------===//
13062// Other Lowering Code
13063//===----------------------------------------------------------------------===//
13064
13066 return Builder.CreateIntrinsic(Id, {});
13067}
13068
13070 Value *Addr,
13071 AtomicOrdering Ord) const {
13072 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
13073
13074 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
13075 "Only 8/16/32/64-bit atomic loads supported");
13076 Intrinsic::ID IntID;
13077 switch (SZ) {
13078 default:
13079 llvm_unreachable("Unexpected PrimitiveSize");
13080 case 8:
13081 IntID = Intrinsic::ppc_lbarx;
13082 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13083 break;
13084 case 16:
13085 IntID = Intrinsic::ppc_lharx;
13086 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13087 break;
13088 case 32:
13089 IntID = Intrinsic::ppc_lwarx;
13090 break;
13091 case 64:
13092 IntID = Intrinsic::ppc_ldarx;
13093 break;
13094 }
13095 Value *Call =
13096 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
13097
13098 return Builder.CreateTruncOrBitCast(Call, ValueTy);
13099}
13100
13101// Perform a store-conditional operation to Addr. Return the status of the
13102// store. This should be 0 if the store succeeded, non-zero otherwise.
13104 Value *Val, Value *Addr,
13105 AtomicOrdering Ord) const {
13106 Type *Ty = Val->getType();
13107 unsigned SZ = Ty->getPrimitiveSizeInBits();
13108
13109 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
13110 "Only 8/16/32/64-bit atomic loads supported");
13111 Intrinsic::ID IntID;
13112 switch (SZ) {
13113 default:
13114 llvm_unreachable("Unexpected PrimitiveSize");
13115 case 8:
13116 IntID = Intrinsic::ppc_stbcx;
13117 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13118 break;
13119 case 16:
13120 IntID = Intrinsic::ppc_sthcx;
13121 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13122 break;
13123 case 32:
13124 IntID = Intrinsic::ppc_stwcx;
13125 break;
13126 case 64:
13127 IntID = Intrinsic::ppc_stdcx;
13128 break;
13129 }
13130
13131 if (SZ == 8 || SZ == 16)
13132 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
13133
13134 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
13135 /*FMFSource=*/nullptr, "stcx");
13136 return Builder.CreateXor(Call, Builder.getInt32(1));
13137}
13138
13139// The mappings for emitLeading/TrailingFence is taken from
13140// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13142 Instruction *Inst,
13143 AtomicOrdering Ord) const {
13145 return callIntrinsic(Builder, Intrinsic::ppc_sync);
13146 if (isReleaseOrStronger(Ord))
13147 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13148 return nullptr;
13149}
13150
13152 Instruction *Inst,
13153 AtomicOrdering Ord) const {
13154 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
13155 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13156 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13157 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13158 if (isa<LoadInst>(Inst))
13159 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
13160 {Inst});
13161 // FIXME: Can use isync for rmw operation.
13162 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13163 }
13164 return nullptr;
13165}
13166
13169 unsigned BinOpcode,
13170 unsigned CmpOpcode,
13171 unsigned CmpPred) const {
13172 // BinOpcode != 0: Handles atomic load with binary operator, e.g. NAND.
13173 // CmpOpcode != 0: Handles atomic load with MIN/MAX etc.
13174 // BinOpcode == 0 && CmpOpcode == 0: Handles ATOMIC_SWAP.
13175 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13176 unsigned AtomicSize = MI.getOperand(3).getImm();
13177
13178 auto LoadMnemonic = PPC::LDARX;
13179 auto StoreMnemonic = PPC::STDCX;
13180 switch (AtomicSize) {
13181 default:
13182 llvm_unreachable("Unexpected size of atomic entity");
13183 case 1:
13184 LoadMnemonic = PPC::LBARX;
13185 StoreMnemonic = PPC::STBCX;
13186 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13187 break;
13188 case 2:
13189 LoadMnemonic = PPC::LHARX;
13190 StoreMnemonic = PPC::STHCX;
13191 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13192 break;
13193 case 4:
13194 LoadMnemonic = PPC::LWARX;
13195 StoreMnemonic = PPC::STWCX;
13196 break;
13197 case 8:
13198 LoadMnemonic = PPC::LDARX;
13199 StoreMnemonic = PPC::STDCX;
13200 break;
13201 }
13202
13203 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13204 MachineFunction *F = BB->getParent();
13206
13207 if (CmpOpcode == PPC::CMPW && (AtomicSize == 1 || AtomicSize == 2))
13208 signExtendOperandIfUnknown(MI, BB, 4, /*IsByte=*/AtomicSize == 1, TII);
13209
13210 Register dest = MI.getOperand(0).getReg();
13211 Register ptrA = MI.getOperand(1).getReg();
13212 Register ptrB = MI.getOperand(2).getReg();
13213 Register incr = MI.getOperand(4).getReg();
13214 DebugLoc dl = MI.getDebugLoc();
13215
13216 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13217 MachineBasicBlock *loop2MBB =
13218 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13219 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13220 F->insert(It, loopMBB);
13221 if (CmpOpcode)
13222 F->insert(It, loop2MBB);
13223 F->insert(It, exitMBB);
13224 exitMBB->splice(exitMBB->begin(), BB,
13225 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13227
13228 MachineRegisterInfo &RegInfo = F->getRegInfo();
13229 Register TmpReg = (!BinOpcode) ? incr :
13230 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13231 : &PPC::GPRCRegClass);
13232
13233 // thisMBB:
13234 // ...
13235 // fallthrough --> loopMBB
13236 BB->addSuccessor(loopMBB);
13237
13238 // loopMBB:
13239 // l[wd]arx dest, ptr
13240 // add r0, dest, incr
13241 // st[wd]cx. r0, ptr
13242 // bne- loopMBB
13243 // fallthrough --> exitMBB
13244
13245 // For max/min...
13246 // loopMBB:
13247 // l[wd]arx dest, ptr
13248 // cmpl?[wd] dest, incr
13249 // bgt exitMBB
13250 // loop2MBB:
13251 // st[wd]cx. dest, ptr
13252 // bne- loopMBB
13253 // fallthrough --> exitMBB
13254
13255 BB = loopMBB;
13256 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13257 .addReg(ptrA).addReg(ptrB);
13258 if (BinOpcode)
13259 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13260 if (CmpOpcode) {
13261 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13262 // Signed comparisons of byte or halfword values must be sign-extended.
13263 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13264 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13265 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13266 ExtReg).addReg(dest);
13267 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13268 } else
13269 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13270
13271 BuildMI(BB, dl, TII->get(PPC::BCC))
13272 .addImm(CmpPred)
13273 .addReg(CrReg)
13274 .addMBB(exitMBB);
13275 BB->addSuccessor(loop2MBB);
13276 BB->addSuccessor(exitMBB);
13277 BB = loop2MBB;
13278 }
13279 BuildMI(BB, dl, TII->get(StoreMnemonic))
13280 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13281 BuildMI(BB, dl, TII->get(PPC::BCC))
13283 .addReg(PPC::CR0)
13284 .addMBB(loopMBB);
13285 BB->addSuccessor(loopMBB);
13286 BB->addSuccessor(exitMBB);
13287
13288 // exitMBB:
13289 // ...
13290 BB = exitMBB;
13291 return BB;
13292}
13293
13295 switch(MI.getOpcode()) {
13296 default:
13297 return false;
13298 case PPC::COPY:
13299 return TII->isSignExtended(MI.getOperand(1).getReg(),
13300 &MI.getMF()->getRegInfo());
13301 case PPC::LHA:
13302 case PPC::LHA8:
13303 case PPC::LHAU:
13304 case PPC::LHAU8:
13305 case PPC::LHAUX:
13306 case PPC::LHAUX8:
13307 case PPC::LHAX:
13308 case PPC::LHAX8:
13309 case PPC::LWA:
13310 case PPC::LWAUX:
13311 case PPC::LWAX:
13312 case PPC::LWAX_32:
13313 case PPC::LWA_32:
13314 case PPC::PLHA:
13315 case PPC::PLHA8:
13316 case PPC::PLHA8pc:
13317 case PPC::PLHApc:
13318 case PPC::PLWA:
13319 case PPC::PLWA8:
13320 case PPC::PLWA8pc:
13321 case PPC::PLWApc:
13322 case PPC::EXTSB:
13323 case PPC::EXTSB8:
13324 case PPC::EXTSB8_32_64:
13325 case PPC::EXTSB8_rec:
13326 case PPC::EXTSB_rec:
13327 case PPC::EXTSH:
13328 case PPC::EXTSH8:
13329 case PPC::EXTSH8_32_64:
13330 case PPC::EXTSH8_rec:
13331 case PPC::EXTSH_rec:
13332 case PPC::EXTSW:
13333 case PPC::EXTSWSLI:
13334 case PPC::EXTSWSLI_32_64:
13335 case PPC::EXTSWSLI_32_64_rec:
13336 case PPC::EXTSWSLI_rec:
13337 case PPC::EXTSW_32:
13338 case PPC::EXTSW_32_64:
13339 case PPC::EXTSW_32_64_rec:
13340 case PPC::EXTSW_rec:
13341 case PPC::SRAW:
13342 case PPC::SRAWI:
13343 case PPC::SRAWI_rec:
13344 case PPC::SRAW_rec:
13345 return true;
13346 }
13347 return false;
13348}
13349
13350// Sign extend operand OpIdx if the value is not known to be sign extended.
13351// Assumes the operand is a register. The flag IsByte controls which intruction
13352// is used for the sign extension.
13354 unsigned OpIdx, bool IsByte,
13355 const PPCInstrInfo *TII) {
13356 MachineFunction *F = MI.getMF();
13357 MachineRegisterInfo &RegInfo = F->getRegInfo();
13358 Register Reg = MI.getOperand(OpIdx).getReg();
13359 bool IsSignExtended =
13360 Reg.isVirtual() && isSignExtended(*RegInfo.getVRegDef(Reg), TII);
13361
13362 if (!IsSignExtended) {
13363 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13364 BuildMI(*BB, MI, MI.getDebugLoc(),
13365 TII->get(IsByte ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13366 .addReg(Reg);
13367 MI.getOperand(OpIdx).setReg(ValueReg);
13368 }
13369}
13370
13372 MachineInstr &MI, MachineBasicBlock *BB, unsigned BinOpcode,
13373 unsigned CmpOpcode, unsigned CmpPred) const {
13374 // BinOpcode != 0: Handles atomic load with binary operator, e.g. NAND.
13375 // CmpOpcode != 0: Handles atomic load with MIN/MAX etc.
13376 // BinOpcode == 0 && CmpOpcode == 0: Handles ATOMIC_SWAP.
13377 assert(!Subtarget.hasPartwordAtomics() &&
13378 "Assumes that part-word atomics are not available");
13379 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13380
13381 // If this is a signed comparison and the value being compared is not known
13382 // to be sign extended, sign extend it here.
13383 DebugLoc dl = MI.getDebugLoc();
13384 MachineFunction *F = BB->getParent();
13385 MachineRegisterInfo &RegInfo = F->getRegInfo();
13386 const bool is8bit = MI.getOperand(3).getImm() == 1;
13387 if (CmpOpcode == PPC::CMPW)
13388 signExtendOperandIfUnknown(MI, BB, 4, is8bit, TII);
13389 Register incr = MI.getOperand(4).getReg();
13390
13391 // In 64 bit mode we have to use 64 bits for addresses, even though the
13392 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13393 // registers without caring whether they're 32 or 64, but here we're
13394 // doing actual arithmetic on the addresses.
13395 bool is64bit = Subtarget.isPPC64();
13396 bool isLittleEndian = Subtarget.isLittleEndian();
13397 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13398
13399 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13401
13402 Register dest = MI.getOperand(0).getReg();
13403 Register ptrA = MI.getOperand(1).getReg();
13404 Register ptrB = MI.getOperand(2).getReg();
13405
13406 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13407 MachineBasicBlock *loop2MBB =
13408 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13409 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13410 F->insert(It, loopMBB);
13411 if (CmpOpcode)
13412 F->insert(It, loop2MBB);
13413 F->insert(It, exitMBB);
13414 exitMBB->splice(exitMBB->begin(), BB,
13415 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13417
13418 const TargetRegisterClass *RC =
13419 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13420 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13421
13422 Register PtrReg = RegInfo.createVirtualRegister(RC);
13423 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13424 Register ShiftReg =
13425 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13426 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13427 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13428 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13429 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13430 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13431 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13432 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13433 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13434 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13435 Register Ptr1Reg;
13436 Register TmpReg =
13437 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13438
13439 // thisMBB:
13440 // ...
13441 // fallthrough --> loopMBB
13442 BB->addSuccessor(loopMBB);
13443
13444 // The 4-byte load must be aligned, while a char or short may be
13445 // anywhere in the word. Hence all this nasty bookkeeping code.
13446 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13447 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13448 // xori shift, shift1, 24 [16]
13449 // rlwinm ptr, ptr1, 0, 0, 29
13450 // slw incr2, incr, shift
13451 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13452 // slw mask, mask2, shift
13453 // loopMBB:
13454 // lwarx tmpDest, ptr
13455 // add tmp, tmpDest, incr2
13456 // andc tmp2, tmpDest, mask
13457 // and tmp3, tmp, mask
13458 // or tmp4, tmp3, tmp2
13459 // stwcx. tmp4, ptr
13460 // bne- loopMBB
13461 // fallthrough --> exitMBB
13462 // srw SrwDest, tmpDest, shift
13463 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13464 if (ptrA != ZeroReg) {
13465 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13466 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13467 .addReg(ptrA)
13468 .addReg(ptrB);
13469 } else {
13470 Ptr1Reg = ptrB;
13471 }
13472 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13473 // mode.
13474 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13475 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
13476 .addImm(3)
13477 .addImm(27)
13478 .addImm(is8bit ? 28 : 27);
13479 if (!isLittleEndian)
13480 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13481 .addReg(Shift1Reg)
13482 .addImm(is8bit ? 24 : 16);
13483 if (is64bit)
13484 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13485 .addReg(Ptr1Reg)
13486 .addImm(0)
13487 .addImm(61);
13488 else
13489 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13490 .addReg(Ptr1Reg)
13491 .addImm(0)
13492 .addImm(0)
13493 .addImm(29);
13494 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13495 if (is8bit)
13496 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13497 else {
13498 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13499 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13500 .addReg(Mask3Reg)
13501 .addImm(65535);
13502 }
13503 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13504 .addReg(Mask2Reg)
13505 .addReg(ShiftReg);
13506
13507 BB = loopMBB;
13508 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13509 .addReg(ZeroReg)
13510 .addReg(PtrReg);
13511 if (BinOpcode)
13512 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13513 .addReg(Incr2Reg)
13514 .addReg(TmpDestReg);
13515 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13516 .addReg(TmpDestReg)
13517 .addReg(MaskReg);
13518 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13519 if (CmpOpcode) {
13520 // For unsigned comparisons, we can directly compare the shifted values.
13521 // For signed comparisons we shift and sign extend.
13522 Register SReg = RegInfo.createVirtualRegister(GPRC);
13523 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13524 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13525 .addReg(TmpDestReg)
13526 .addReg(MaskReg);
13527 unsigned ValueReg = SReg;
13528 unsigned CmpReg = Incr2Reg;
13529 if (CmpOpcode == PPC::CMPW) {
13530 ValueReg = RegInfo.createVirtualRegister(GPRC);
13531 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13532 .addReg(SReg)
13533 .addReg(ShiftReg);
13534 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13535 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13536 .addReg(ValueReg);
13537 ValueReg = ValueSReg;
13538 CmpReg = incr;
13539 }
13540 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13541 BuildMI(BB, dl, TII->get(PPC::BCC))
13542 .addImm(CmpPred)
13543 .addReg(CrReg)
13544 .addMBB(exitMBB);
13545 BB->addSuccessor(loop2MBB);
13546 BB->addSuccessor(exitMBB);
13547 BB = loop2MBB;
13548 }
13549 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13550 BuildMI(BB, dl, TII->get(PPC::STWCX))
13551 .addReg(Tmp4Reg)
13552 .addReg(ZeroReg)
13553 .addReg(PtrReg);
13554 BuildMI(BB, dl, TII->get(PPC::BCC))
13556 .addReg(PPC::CR0)
13557 .addMBB(loopMBB);
13558 BB->addSuccessor(loopMBB);
13559 BB->addSuccessor(exitMBB);
13560
13561 // exitMBB:
13562 // ...
13563 BB = exitMBB;
13564 // Since the shift amount is not a constant, we need to clear
13565 // the upper bits with a separate RLWINM.
13566 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13567 .addReg(SrwDestReg)
13568 .addImm(0)
13569 .addImm(is8bit ? 24 : 16)
13570 .addImm(31);
13571 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13572 .addReg(TmpDestReg)
13573 .addReg(ShiftReg);
13574 return BB;
13575}
13576
13579 MachineBasicBlock *MBB) const {
13580 DebugLoc DL = MI.getDebugLoc();
13581 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13582 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13583
13584 MachineFunction *MF = MBB->getParent();
13585 MachineRegisterInfo &MRI = MF->getRegInfo();
13586
13587 const BasicBlock *BB = MBB->getBasicBlock();
13588 MachineFunction::iterator I = ++MBB->getIterator();
13589
13590 Register DstReg = MI.getOperand(0).getReg();
13591 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13592 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13593 Register mainDstReg = MRI.createVirtualRegister(RC);
13594 Register restoreDstReg = MRI.createVirtualRegister(RC);
13595
13596 MVT PVT = getPointerTy(MF->getDataLayout());
13597 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13598 "Invalid Pointer Size!");
13599 // For v = setjmp(buf), we generate
13600 //
13601 // thisMBB:
13602 // SjLjSetup mainMBB
13603 // bl mainMBB
13604 // v_restore = 1
13605 // b sinkMBB
13606 //
13607 // mainMBB:
13608 // buf[LabelOffset] = LR
13609 // v_main = 0
13610 //
13611 // sinkMBB:
13612 // v = phi(main, restore)
13613 //
13614
13615 MachineBasicBlock *thisMBB = MBB;
13616 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13617 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13618 MF->insert(I, mainMBB);
13619 MF->insert(I, sinkMBB);
13620
13622
13623 // Transfer the remainder of BB and its successor edges to sinkMBB.
13624 sinkMBB->splice(sinkMBB->begin(), MBB,
13625 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13627
13628 // Note that the structure of the jmp_buf used here is not compatible
13629 // with that used by libc, and is not designed to be. Specifically, it
13630 // stores only those 'reserved' registers that LLVM does not otherwise
13631 // understand how to spill. Also, by convention, by the time this
13632 // intrinsic is called, Clang has already stored the frame address in the
13633 // first slot of the buffer and stack address in the third. Following the
13634 // X86 target code, we'll store the jump address in the second slot. We also
13635 // need to save the TOC pointer (R2) to handle jumps between shared
13636 // libraries, and that will be stored in the fourth slot. The thread
13637 // identifier (R13) is not affected.
13638
13639 // thisMBB:
13640 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13641 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13642 const int64_t BPOffset = 4 * PVT.getStoreSize();
13643
13644 // Prepare IP either in reg.
13645 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13646 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13647 Register BufReg = MI.getOperand(1).getReg();
13648
13649 if (Subtarget.is64BitELFABI()) {
13650 setUsesTOCBasePtr(*MBB->getParent());
13651 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13652 .addReg(PPC::X2)
13653 .addImm(TOCOffset)
13654 .addReg(BufReg)
13655 .cloneMemRefs(MI);
13656 }
13657
13658 // Naked functions never have a base pointer, and so we use r1. For all
13659 // other functions, this decision must be delayed until during PEI.
13660 unsigned BaseReg;
13661 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13662 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13663 else
13664 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13665
13666 MIB = BuildMI(*thisMBB, MI, DL,
13667 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13668 .addReg(BaseReg)
13669 .addImm(BPOffset)
13670 .addReg(BufReg)
13671 .cloneMemRefs(MI);
13672
13673 // Setup
13674 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13675 MIB.addRegMask(TRI->getNoPreservedMask());
13676
13677 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13678
13679 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13680 .addMBB(mainMBB);
13681 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13682
13683 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13684 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13685
13686 // mainMBB:
13687 // mainDstReg = 0
13688 MIB =
13689 BuildMI(mainMBB, DL,
13690 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13691
13692 // Store IP
13693 if (Subtarget.isPPC64()) {
13694 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13695 .addReg(LabelReg)
13696 .addImm(LabelOffset)
13697 .addReg(BufReg);
13698 } else {
13699 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13700 .addReg(LabelReg)
13701 .addImm(LabelOffset)
13702 .addReg(BufReg);
13703 }
13704 MIB.cloneMemRefs(MI);
13705
13706 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13707 mainMBB->addSuccessor(sinkMBB);
13708
13709 // sinkMBB:
13710 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13711 TII->get(PPC::PHI), DstReg)
13712 .addReg(mainDstReg).addMBB(mainMBB)
13713 .addReg(restoreDstReg).addMBB(thisMBB);
13714
13715 MI.eraseFromParent();
13716 return sinkMBB;
13717}
13718
13721 MachineBasicBlock *MBB) const {
13722 DebugLoc DL = MI.getDebugLoc();
13723 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13724
13725 MachineFunction *MF = MBB->getParent();
13726 MachineRegisterInfo &MRI = MF->getRegInfo();
13727
13728 MVT PVT = getPointerTy(MF->getDataLayout());
13729 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13730 "Invalid Pointer Size!");
13731
13732 const TargetRegisterClass *RC =
13733 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13734 Register Tmp = MRI.createVirtualRegister(RC);
13735 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13736 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13737 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13738 unsigned BP =
13739 (PVT == MVT::i64)
13740 ? PPC::X30
13741 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13742 : PPC::R30);
13743
13745
13746 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13747 const int64_t SPOffset = 2 * PVT.getStoreSize();
13748 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13749 const int64_t BPOffset = 4 * PVT.getStoreSize();
13750
13751 Register BufReg = MI.getOperand(0).getReg();
13752
13753 // Reload FP (the jumped-to function may not have had a
13754 // frame pointer, and if so, then its r31 will be restored
13755 // as necessary).
13756 if (PVT == MVT::i64) {
13757 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13758 .addImm(0)
13759 .addReg(BufReg);
13760 } else {
13761 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13762 .addImm(0)
13763 .addReg(BufReg);
13764 }
13765 MIB.cloneMemRefs(MI);
13766
13767 // Reload IP
13768 if (PVT == MVT::i64) {
13769 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13770 .addImm(LabelOffset)
13771 .addReg(BufReg);
13772 } else {
13773 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13774 .addImm(LabelOffset)
13775 .addReg(BufReg);
13776 }
13777 MIB.cloneMemRefs(MI);
13778
13779 // Reload SP
13780 if (PVT == MVT::i64) {
13781 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13782 .addImm(SPOffset)
13783 .addReg(BufReg);
13784 } else {
13785 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13786 .addImm(SPOffset)
13787 .addReg(BufReg);
13788 }
13789 MIB.cloneMemRefs(MI);
13790
13791 // Reload BP
13792 if (PVT == MVT::i64) {
13793 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13794 .addImm(BPOffset)
13795 .addReg(BufReg);
13796 } else {
13797 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13798 .addImm(BPOffset)
13799 .addReg(BufReg);
13800 }
13801 MIB.cloneMemRefs(MI);
13802
13803 // Reload TOC
13804 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13805 setUsesTOCBasePtr(*MBB->getParent());
13806 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13807 .addImm(TOCOffset)
13808 .addReg(BufReg)
13809 .cloneMemRefs(MI);
13810 }
13811
13812 // Jump
13813 BuildMI(*MBB, MI, DL,
13814 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13815 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13816
13817 MI.eraseFromParent();
13818 return MBB;
13819}
13820
13822 // If the function specifically requests inline stack probes, emit them.
13823 if (MF.getFunction().hasFnAttribute("probe-stack"))
13824 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13825 "inline-asm";
13826 return false;
13827}
13828
13830 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13831 unsigned StackAlign = TFI->getStackAlignment();
13832 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13833 "Unexpected stack alignment");
13834 // The default stack probe size is 4096 if the function has no
13835 // stack-probe-size attribute.
13836 const Function &Fn = MF.getFunction();
13837 unsigned StackProbeSize =
13838 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13839 // Round down to the stack alignment.
13840 StackProbeSize &= ~(StackAlign - 1);
13841 return StackProbeSize ? StackProbeSize : StackAlign;
13842}
13843
13844// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13845// into three phases. In the first phase, it uses pseudo instruction
13846// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13847// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13848// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13849// MaxCallFrameSize so that it can calculate correct data area pointer.
13852 MachineBasicBlock *MBB) const {
13853 const bool isPPC64 = Subtarget.isPPC64();
13854 MachineFunction *MF = MBB->getParent();
13855 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13856 DebugLoc DL = MI.getDebugLoc();
13857 const unsigned ProbeSize = getStackProbeSize(*MF);
13858 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13859 MachineRegisterInfo &MRI = MF->getRegInfo();
13860 // The CFG of probing stack looks as
13861 // +-----+
13862 // | MBB |
13863 // +--+--+
13864 // |
13865 // +----v----+
13866 // +--->+ TestMBB +---+
13867 // | +----+----+ |
13868 // | | |
13869 // | +-----v----+ |
13870 // +---+ BlockMBB | |
13871 // +----------+ |
13872 // |
13873 // +---------+ |
13874 // | TailMBB +<--+
13875 // +---------+
13876 // In MBB, calculate previous frame pointer and final stack pointer.
13877 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13878 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13879 // TailMBB is spliced via \p MI.
13880 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13881 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13882 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13883
13884 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13885 MF->insert(MBBIter, TestMBB);
13886 MF->insert(MBBIter, BlockMBB);
13887 MF->insert(MBBIter, TailMBB);
13888
13889 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13890 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13891
13892 Register DstReg = MI.getOperand(0).getReg();
13893 Register NegSizeReg = MI.getOperand(1).getReg();
13894 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13895 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13896 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13897 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13898
13899 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13900 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13901 // NegSize.
13902 unsigned ProbeOpc;
13903 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13904 ProbeOpc =
13905 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13906 else
13907 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13908 // and NegSizeReg will be allocated in the same phyreg to avoid
13909 // redundant copy when NegSizeReg has only one use which is current MI and
13910 // will be replaced by PREPARE_PROBED_ALLOCA then.
13911 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13912 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13913 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13914 .addDef(ActualNegSizeReg)
13915 .addReg(NegSizeReg)
13916 .add(MI.getOperand(2))
13917 .add(MI.getOperand(3));
13918
13919 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13920 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13921 FinalStackPtr)
13922 .addReg(SPReg)
13923 .addReg(ActualNegSizeReg);
13924
13925 // Materialize a scratch register for update.
13926 int64_t NegProbeSize = -(int64_t)ProbeSize;
13927 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13928 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13929 if (!isInt<16>(NegProbeSize)) {
13930 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13931 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13932 .addImm(NegProbeSize >> 16);
13933 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13934 ScratchReg)
13935 .addReg(TempReg)
13936 .addImm(NegProbeSize & 0xFFFF);
13937 } else
13938 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13939 .addImm(NegProbeSize);
13940
13941 {
13942 // Probing leading residual part.
13943 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13944 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13945 .addReg(ActualNegSizeReg)
13946 .addReg(ScratchReg);
13947 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13948 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13949 .addReg(Div)
13950 .addReg(ScratchReg);
13951 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13952 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13953 .addReg(Mul)
13954 .addReg(ActualNegSizeReg);
13955 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13956 .addReg(FramePointer)
13957 .addReg(SPReg)
13958 .addReg(NegMod);
13959 }
13960
13961 {
13962 // Remaining part should be multiple of ProbeSize.
13963 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13964 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13965 .addReg(SPReg)
13966 .addReg(FinalStackPtr);
13967 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13969 .addReg(CmpResult)
13970 .addMBB(TailMBB);
13971 TestMBB->addSuccessor(BlockMBB);
13972 TestMBB->addSuccessor(TailMBB);
13973 }
13974
13975 {
13976 // Touch the block.
13977 // |P...|P...|P...
13978 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13979 .addReg(FramePointer)
13980 .addReg(SPReg)
13981 .addReg(ScratchReg);
13982 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13983 BlockMBB->addSuccessor(TestMBB);
13984 }
13985
13986 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13987 // DYNAREAOFFSET pseudo instruction to get the future result.
13988 Register MaxCallFrameSizeReg =
13989 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13990 BuildMI(TailMBB, DL,
13991 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13992 MaxCallFrameSizeReg)
13993 .add(MI.getOperand(2))
13994 .add(MI.getOperand(3));
13995 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13996 .addReg(SPReg)
13997 .addReg(MaxCallFrameSizeReg);
13998
13999 // Splice instructions after MI to TailMBB.
14000 TailMBB->splice(TailMBB->end(), MBB,
14001 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
14003 MBB->addSuccessor(TestMBB);
14004
14005 // Delete the pseudo instruction.
14006 MI.eraseFromParent();
14007
14008 ++NumDynamicAllocaProbed;
14009 return TailMBB;
14010}
14011
14012/// Check if the opcode is a SELECT or SELECT_CC variant.
14013/// @param Opcode The opcode to check
14014/// @param CheckOnlyCC If true, only return true for SELECT_CC variants;
14015/// if false, return true for both SELECT and SELECT_CC
14016static bool IsSelect(unsigned Opcode, bool CheckOnlyCC = false) {
14017 switch (Opcode) {
14018 // SELECT_CC variants - always return true
14019 case PPC::SELECT_CC_I4:
14020 case PPC::SELECT_CC_I8:
14021 case PPC::SELECT_CC_F4:
14022 case PPC::SELECT_CC_F8:
14023 case PPC::SELECT_CC_F16:
14024 case PPC::SELECT_CC_VRRC:
14025 case PPC::SELECT_CC_VSFRC:
14026 case PPC::SELECT_CC_VSSRC:
14027 case PPC::SELECT_CC_VSRC:
14028 case PPC::SELECT_CC_SPE4:
14029 case PPC::SELECT_CC_SPE:
14030 return true;
14031 // SELECT variants - only return true if CheckOnlyCC is false
14032 case PPC::SELECT_I4:
14033 case PPC::SELECT_I8:
14034 case PPC::SELECT_F4:
14035 case PPC::SELECT_F8:
14036 case PPC::SELECT_F16:
14037 case PPC::SELECT_SPE:
14038 case PPC::SELECT_SPE4:
14039 case PPC::SELECT_VRRC:
14040 case PPC::SELECT_VSFRC:
14041 case PPC::SELECT_VSSRC:
14042 case PPC::SELECT_VSRC:
14043 return !CheckOnlyCC; // true if checking all SELECTs, false if only CC
14044 default:
14045 return false;
14046 }
14047}
14048static bool IsSelectCC(unsigned Opcode) { return IsSelect(Opcode, true); }
14049
14050/// Emit SELECT instruction, using ISEL if available, otherwise use
14051/// branch-based control flow.
14052///
14053/// For targets with ISEL support (SELECT_CC_I4/I8, SELECT_I4/I8), this
14054/// generates a single ISEL instruction. Otherwise, it creates a
14055/// branch-based control flow pattern with PHI nodes.
14057 const TargetInstrInfo *TII,
14058 const PPCSubtarget &Subtarget) {
14059 assert(IsSelect(MI.getOpcode()) && "Instruction must be a SELECT variant");
14060
14061 // Check if we can use ISEL for this SELECT
14062 if (Subtarget.hasISEL() &&
14063 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
14064 MI.getOpcode() == PPC::SELECT_CC_I8 ||
14065 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
14067 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
14068 MI.getOpcode() == PPC::SELECT_CC_I8)
14069 Cond.push_back(MI.getOperand(4));
14070 else
14072 Cond.push_back(MI.getOperand(1));
14073
14074 DebugLoc dl = MI.getDebugLoc();
14075 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
14076 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
14077 MI.eraseFromParent();
14078 return BB;
14079 }
14080
14081 // Fall back to branch-based SELECT implementation
14082 MachineFunction *F = BB->getParent();
14083 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14085 DebugLoc dl = MI.getDebugLoc();
14086
14087 MachineBasicBlock *thisMBB = BB;
14088 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
14089 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14090 F->insert(It, copy0MBB);
14091 F->insert(It, sinkMBB);
14092
14093 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
14094 copy0MBB->addLiveIn(PPC::CARRY);
14095 sinkMBB->addLiveIn(PPC::CARRY);
14096 }
14097
14098 // Set the call frame size on entry to the new basic blocks.
14099 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
14100 copy0MBB->setCallFrameSize(CallFrameSize);
14101 sinkMBB->setCallFrameSize(CallFrameSize);
14102
14103 // Transfer the remainder of BB and its successor edges to sinkMBB.
14104 sinkMBB->splice(sinkMBB->begin(), BB,
14105 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14107
14108 // Add successors
14109 BB->addSuccessor(copy0MBB);
14110 BB->addSuccessor(sinkMBB);
14111
14112 // Build branch instruction
14113 if (IsSelectCC(MI.getOpcode()))
14114 BuildMI(BB, dl, TII->get(PPC::BCC))
14115 .addImm(MI.getOperand(4).getImm())
14116 .addReg(MI.getOperand(1).getReg())
14117 .addMBB(sinkMBB);
14118 else
14119 BuildMI(BB, dl, TII->get(PPC::BC))
14120 .addReg(MI.getOperand(1).getReg())
14121 .addMBB(sinkMBB);
14122
14123 // copy0MBB: fallthrough to sinkMBB
14124 BB = copy0MBB;
14125 BB->addSuccessor(sinkMBB);
14126
14127 // sinkMBB: PHI instruction
14128 BB = sinkMBB;
14129 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
14130 .addReg(MI.getOperand(3).getReg())
14131 .addMBB(copy0MBB)
14132 .addReg(MI.getOperand(2).getReg())
14133 .addMBB(thisMBB);
14134 MI.eraseFromParent();
14135 return BB;
14136}
14137
14138/// Helper function to create basic blocks for atomic compare-and-swap.
14139/// Creates three basic blocks (loop1MBB, loop2MBB, exitMBB) and sets up
14140/// the control flow structure common to both hardware and software
14141/// implementations of atomic compare-and-swap operations.
14143 MachineBasicBlock *&loop1MBB,
14144 MachineBasicBlock *&loop2MBB,
14145 MachineBasicBlock *&exitMBB,
14148 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14149 loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14150 loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14151 exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14152 F->insert(It, loop1MBB);
14153 F->insert(It, loop2MBB);
14154 F->insert(It, exitMBB);
14155 exitMBB->splice(exitMBB->begin(), BB,
14156 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14158 BB->addSuccessor(loop1MBB);
14159}
14160
14161/// Emit hardware-supported atomic compare-and-swap for I32/I64 and I8/I16
14162/// with partword atomic support.
14163///
14164/// This uses native PowerPC atomic instructions (LBARX/LHARX/LWARX/LDARX for
14165/// load-and-reserve, STBCX/STHCX/STWCX/STDCX for store-conditional) to
14166/// implement atomic compare-and-swap at byte, halfword, word, or doubleword
14167/// granularity.
14168///
14169/// Control flow:
14170/// thisMBB -> loop1MBB -> loop2MBB -> exitMBB
14171/// | |
14172/// +------------+
14173///
14174/// loop1MBB:
14175/// - Load-and-reserve from memory
14176/// - Compare loaded value with expected old value
14177/// - Branch to exitMBB if not equal (CAS failed)
14178/// loop2MBB:
14179/// - Store-conditional new value to memory
14180/// - Branch back to loop1MBB if store failed (retry)
14181/// - Fall through to exitMBB on success
14182static MachineBasicBlock *
14184 const TargetInstrInfo *TII,
14185 const PPCSubtarget &Subtarget) {
14186 MachineFunction *F = BB->getParent();
14188
14189 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14190
14191 unsigned LoadMnemonic = PPC::LDARX;
14192 unsigned StoreMnemonic = PPC::STDCX;
14193 switch (MI.getOpcode()) {
14194 default:
14195 llvm_unreachable("Compare and swap of unknown size");
14196 case PPC::ATOMIC_CMP_SWAP_I8:
14197 LoadMnemonic = PPC::LBARX;
14198 StoreMnemonic = PPC::STBCX;
14199 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14200 break;
14201 case PPC::ATOMIC_CMP_SWAP_I16:
14202 LoadMnemonic = PPC::LHARX;
14203 StoreMnemonic = PPC::STHCX;
14204 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14205 break;
14206 case PPC::ATOMIC_CMP_SWAP_I32:
14207 LoadMnemonic = PPC::LWARX;
14208 StoreMnemonic = PPC::STWCX;
14209 break;
14210 case PPC::ATOMIC_CMP_SWAP_I64:
14211 LoadMnemonic = PPC::LDARX;
14212 StoreMnemonic = PPC::STDCX;
14213 break;
14214 }
14215
14216 MachineRegisterInfo &RegInfo = F->getRegInfo();
14217 Register dest = MI.getOperand(0).getReg();
14218 Register ptrA = MI.getOperand(1).getReg();
14219 Register ptrB = MI.getOperand(2).getReg();
14220 Register oldval = MI.getOperand(3).getReg();
14221 Register newval = MI.getOperand(4).getReg();
14222 DebugLoc dl = MI.getDebugLoc();
14223
14224 MachineBasicBlock *loop1MBB, *loop2MBB, *exitMBB;
14225 createAtomicLoopBlocks(F, BB, loop1MBB, loop2MBB, exitMBB, MI, It);
14226
14227 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14228
14229 // loop1MBB:
14230 // l[bhwd]arx dest, ptr
14231 // cmp[wd] dest, oldval
14232 // bne- exitBB
14233 BB = loop1MBB;
14234 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14235 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14236 .addReg(dest)
14237 .addReg(oldval);
14238 BuildMI(BB, dl, TII->get(PPC::BCC))
14240 .addReg(CrReg)
14241 .addMBB(exitMBB);
14242 BB->addSuccessor(loop2MBB);
14243 BB->addSuccessor(exitMBB);
14244
14245 // loop2MBB:
14246 // st[bhwd]cx. newval, ptr
14247 // bne- loopMBB
14248 // b exitBB
14249 BB = loop2MBB;
14250 BuildMI(BB, dl, TII->get(StoreMnemonic))
14251 .addReg(newval)
14252 .addReg(ptrA)
14253 .addReg(ptrB);
14254 BuildMI(BB, dl, TII->get(PPC::BCC))
14256 .addReg(PPC::CR0)
14257 .addMBB(loop1MBB);
14258 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14259 BB->addSuccessor(loop1MBB);
14260 BB->addSuccessor(exitMBB);
14261
14262 return exitMBB;
14263}
14264
14265/// Emit software-emulated atomic compare-and-swap for I8/I16 without
14266/// hardware partword atomic support.
14267///
14268/// This emulates byte/halfword atomic operations using word (32-bit) atomic
14269/// instructions. Since PowerPC atomic instructions work at word granularity,
14270/// we must:
14271/// 1. Align the pointer to a word boundary
14272/// 2. Calculate the bit shift for the target byte/halfword within the word
14273/// 3. Create masks to isolate the target byte/halfword
14274/// 4. Shift old/new values into the correct bit position
14275/// 5. Use LWARX/STWCX on the full word
14276/// 6. Mask and merge to preserve other bytes in the word
14277/// 7. Extract and shift the result back
14278///
14279/// Control flow:
14280/// thisMBB -> loop1MBB -> loop2MBB -> exitMBB
14281/// | |
14282/// +------------+
14283///
14284/// loop1MBB:
14285/// - LWARX: Load-and-reserve full word
14286/// - Mask to extract target byte/halfword
14287/// - Compare with expected old value
14288/// - Branch to exitMBB if not equal (CAS failed)
14289/// loop2MBB:
14290/// - Merge new value with other bytes in the word
14291/// - STWCX: Store-conditional full word
14292/// - Branch back to loop1MBB if store failed (retry)
14293/// - Fall through to exitMBB on success
14294/// exitMBB:
14295/// - Extract and return the loaded value
14296static MachineBasicBlock *
14298 const TargetInstrInfo *TII,
14299 const PPCSubtarget &Subtarget) {
14300 MachineFunction *F = BB->getParent();
14302
14303 bool is64bit = Subtarget.isPPC64();
14304 bool isLittleEndian = Subtarget.isLittleEndian();
14305 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14306
14307 Register dest = MI.getOperand(0).getReg();
14308 Register ptrA = MI.getOperand(1).getReg();
14309 Register ptrB = MI.getOperand(2).getReg();
14310 Register oldval = MI.getOperand(3).getReg();
14311 Register newval = MI.getOperand(4).getReg();
14312 DebugLoc dl = MI.getDebugLoc();
14313
14314 MachineBasicBlock *loop1MBB, *loop2MBB, *exitMBB;
14315 createAtomicLoopBlocks(F, BB, loop1MBB, loop2MBB, exitMBB, MI, It);
14316
14317 MachineRegisterInfo &RegInfo = F->getRegInfo();
14318 const TargetRegisterClass *RC =
14319 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14320 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14321
14322 // Lambda to create virtual registers
14323 auto createVReg = [&](const TargetRegisterClass *RC) {
14324 return RegInfo.createVirtualRegister(RC);
14325 };
14326
14327 Register PtrReg = createVReg(RC);
14328 Register Shift1Reg = createVReg(GPRC);
14329 Register ShiftReg = isLittleEndian ? Shift1Reg : createVReg(GPRC);
14330 Register NewVal2Reg = createVReg(GPRC);
14331 Register NewVal3Reg = createVReg(GPRC);
14332 Register OldVal2Reg = createVReg(GPRC);
14333 Register OldVal3Reg = createVReg(GPRC);
14334 Register MaskReg = createVReg(GPRC);
14335 Register Mask2Reg = createVReg(GPRC);
14336 Register Mask3Reg = createVReg(GPRC);
14337 Register Tmp2Reg = createVReg(GPRC);
14338 Register Tmp4Reg = createVReg(GPRC);
14339 Register TmpDestReg = createVReg(GPRC);
14340 Register TmpReg = createVReg(GPRC);
14341 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14342 Register CrReg = createVReg(&PPC::CRRCRegClass);
14343
14344 // Compute aligned pointer and shift amount
14345 Register Ptr1Reg;
14346 if (ptrA != ZeroReg) {
14347 Ptr1Reg = createVReg(RC);
14348 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14349 .addReg(ptrA)
14350 .addReg(ptrB);
14351 } else {
14352 Ptr1Reg = ptrB;
14353 }
14354
14355 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14356 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
14357 .addImm(3)
14358 .addImm(27)
14359 .addImm(is8bit ? 28 : 27);
14360 if (!isLittleEndian)
14361 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14362 .addReg(Shift1Reg)
14363 .addImm(is8bit ? 24 : 16);
14364 if (is64bit)
14365 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14366 .addReg(Ptr1Reg)
14367 .addImm(0)
14368 .addImm(61);
14369 else
14370 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14371 .addReg(Ptr1Reg)
14372 .addImm(0)
14373 .addImm(0)
14374 .addImm(29);
14375
14376 // Prepare masked values
14377 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14378 .addReg(newval)
14379 .addReg(ShiftReg);
14380 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14381 .addReg(oldval)
14382 .addReg(ShiftReg);
14383 if (is8bit)
14384 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14385 else {
14386 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14387 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14388 .addReg(Mask3Reg)
14389 .addImm(65535);
14390 }
14391 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14392 .addReg(Mask2Reg)
14393 .addReg(ShiftReg);
14394 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14395 .addReg(NewVal2Reg)
14396 .addReg(MaskReg);
14397 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14398 .addReg(OldVal2Reg)
14399 .addReg(MaskReg);
14400
14401 // loop1MBB:
14402 // lwarx tmpDest, ptr
14403 // and tmp, tmpDest, mask
14404 // cmpw tmp, oldval3
14405 // bne- exitBB
14406 BB = loop1MBB;
14407 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14408 .addReg(ZeroReg)
14409 .addReg(PtrReg);
14410 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14411 .addReg(TmpDestReg)
14412 .addReg(MaskReg);
14413 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg).addReg(TmpReg).addReg(OldVal3Reg);
14414 BuildMI(BB, dl, TII->get(PPC::BCC))
14416 .addReg(CrReg)
14417 .addMBB(exitMBB);
14418 BB->addSuccessor(loop2MBB);
14419 BB->addSuccessor(exitMBB);
14420
14421 // loop2MBB:
14422 // andc tmp2, tmpDest, mask
14423 // or tmp4, tmp2, newval3
14424 // stwcx. tmp4, ptr
14425 // bne- loop1MBB
14426 // b exitBB
14427 BB = loop2MBB;
14428 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14429 .addReg(TmpDestReg)
14430 .addReg(MaskReg);
14431 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14432 .addReg(Tmp2Reg)
14433 .addReg(NewVal3Reg);
14434 BuildMI(BB, dl, TII->get(PPC::STWCX))
14435 .addReg(Tmp4Reg)
14436 .addReg(ZeroReg)
14437 .addReg(PtrReg);
14438 BuildMI(BB, dl, TII->get(PPC::BCC))
14440 .addReg(PPC::CR0)
14441 .addMBB(loop1MBB);
14442 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14443 BB->addSuccessor(loop1MBB);
14444 BB->addSuccessor(exitMBB);
14445
14446 // exitMBB:
14447 // srw dest, tmpDest, shift
14448 BB = exitMBB;
14449 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14450 .addReg(TmpReg)
14451 .addReg(ShiftReg);
14452
14453 return BB;
14454}
14455
14458 MachineBasicBlock *BB) const {
14459 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14460
14461 // To "insert" these instructions we actually have to insert their
14462 // control-flow patterns.
14463 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14465
14466 MachineFunction *F = BB->getParent();
14467 MachineRegisterInfo &MRI = F->getRegInfo();
14468
14469 // Handle SELECT with ISEL support first (before generic SELECT handling)
14470 if (IsSelect(MI.getOpcode()))
14471 return emitSelect(MI, BB, TII, Subtarget);
14472
14473 switch (MI.getOpcode()) {
14474 case TargetOpcode::STACKMAP:
14475 return emitPatchPoint(MI, BB);
14476 case TargetOpcode::PATCHPOINT:
14477 // Call lowering should have added an r2 operand to indicate a dependence
14478 // on the TOC base pointer value. It can't however, because there is no
14479 // way to mark the dependence as implicit there, and so the stackmap code
14480 // will confuse it with a regular operand. Instead, add the dependence
14481 // here.
14482 if (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls())
14483 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
14484 return emitPatchPoint(MI, BB);
14485
14486 case PPC::EH_SjLj_SetJmp32:
14487 case PPC::EH_SjLj_SetJmp64:
14488 return emitEHSjLjSetJmp(MI, BB);
14489
14490 case PPC::EH_SjLj_LongJmp32:
14491 case PPC::EH_SjLj_LongJmp64:
14492 return emitEHSjLjLongJmp(MI, BB);
14493
14494 case PPC::ReadTB: {
14495 // To read the 64-bit time-base register on a 32-bit target, we read the
14496 // two halves. Should the counter have wrapped while it was being read, we
14497 // need to try again.
14498 // ...
14499 // readLoop:
14500 // mfspr Rx,TBU # load from TBU
14501 // mfspr Ry,TB # load from TB
14502 // mfspr Rz,TBU # load from TBU
14503 // cmpw crX,Rx,Rz # check if 'old'='new'
14504 // bne readLoop # branch if they're not equal
14505 // ...
14506
14507 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
14508 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14509 DebugLoc dl = MI.getDebugLoc();
14510 F->insert(It, readMBB);
14511 F->insert(It, sinkMBB);
14512
14513 // Transfer the remainder of BB and its successor edges to sinkMBB.
14514 sinkMBB->splice(sinkMBB->begin(), BB,
14515 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14517
14518 BB->addSuccessor(readMBB);
14519 BB = readMBB;
14520
14521 MachineRegisterInfo &RegInfo = F->getRegInfo();
14522 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
14523 Register LoReg = MI.getOperand(0).getReg();
14524 Register HiReg = MI.getOperand(1).getReg();
14525
14526 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
14527 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
14528 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
14529
14530 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14531
14532 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
14533 .addReg(HiReg)
14534 .addReg(ReadAgainReg);
14535 BuildMI(BB, dl, TII->get(PPC::BCC))
14537 .addReg(CmpReg)
14538 .addMBB(readMBB);
14539
14540 BB->addSuccessor(readMBB);
14541 BB->addSuccessor(sinkMBB);
14542 break;
14543 }
14544 case PPC::ATOMIC_LOAD_ADD_NOWP:
14545 BB = EmitPartwordAtomicBinary(MI, BB, PPC::ADD4);
14546 break;
14547 case PPC::ATOMIC_LOAD_ADD:
14548 BB = EmitAtomicBinary(MI, BB, PPC::ADD4);
14549 break;
14550 case PPC::ATOMIC_LOAD_ADD_I64:
14551 BB = EmitAtomicBinary(MI, BB, PPC::ADD8);
14552 break;
14553 case PPC::ATOMIC_LOAD_AND_NOWP:
14554 BB = EmitPartwordAtomicBinary(MI, BB, PPC::AND);
14555 break;
14556 case PPC::ATOMIC_LOAD_AND:
14557 BB = EmitAtomicBinary(MI, BB, PPC::AND);
14558 break;
14559 case PPC::ATOMIC_LOAD_AND_I64:
14560 BB = EmitAtomicBinary(MI, BB, PPC::AND8);
14561 break;
14562 case PPC::ATOMIC_LOAD_OR_NOWP:
14563 BB = EmitPartwordAtomicBinary(MI, BB, PPC::OR);
14564 break;
14565 case PPC::ATOMIC_LOAD_OR:
14566 BB = EmitAtomicBinary(MI, BB, PPC::OR);
14567 break;
14568 case PPC::ATOMIC_LOAD_OR_I64:
14569 BB = EmitAtomicBinary(MI, BB, PPC::OR8);
14570 break;
14571 case PPC::ATOMIC_LOAD_XOR_NOWP:
14572 BB = EmitPartwordAtomicBinary(MI, BB, PPC::XOR);
14573 break;
14574 case PPC::ATOMIC_LOAD_XOR:
14575 BB = EmitAtomicBinary(MI, BB, PPC::XOR);
14576 break;
14577 case PPC::ATOMIC_LOAD_XOR_I64:
14578 BB = EmitAtomicBinary(MI, BB, PPC::XOR8);
14579 break;
14580 case PPC::ATOMIC_LOAD_NAND_NOWP:
14581 BB = EmitPartwordAtomicBinary(MI, BB, PPC::NAND);
14582 break;
14583 case PPC::ATOMIC_LOAD_NAND:
14584 BB = EmitAtomicBinary(MI, BB, PPC::NAND);
14585 break;
14586 case PPC::ATOMIC_LOAD_NAND_I64:
14587 BB = EmitAtomicBinary(MI, BB, PPC::NAND8);
14588 break;
14589 case PPC::ATOMIC_LOAD_SUB_NOWP:
14590 BB = EmitPartwordAtomicBinary(MI, BB, PPC::SUBF);
14591 break;
14592 case PPC::ATOMIC_LOAD_SUB:
14593 BB = EmitAtomicBinary(MI, BB, PPC::SUBF);
14594 break;
14595 case PPC::ATOMIC_LOAD_SUB_I64:
14596 BB = EmitAtomicBinary(MI, BB, PPC::SUBF8);
14597 break;
14598 case PPC::ATOMIC_LOAD_MIN_NOWP:
14599 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_LT);
14600 break;
14601 case PPC::ATOMIC_LOAD_MIN:
14602 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_LT);
14603 break;
14604 case PPC::ATOMIC_LOAD_MIN_I64:
14605 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPD, PPC::PRED_LT);
14606 break;
14607 case PPC::ATOMIC_LOAD_MAX_NOWP:
14608 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_GT);
14609 break;
14610 case PPC::ATOMIC_LOAD_MAX:
14611 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_GT);
14612 break;
14613 case PPC::ATOMIC_LOAD_MAX_I64:
14614 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPD, PPC::PRED_GT);
14615 break;
14616 case PPC::ATOMIC_LOAD_UMIN_NOWP:
14617 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_LT);
14618 break;
14619 case PPC::ATOMIC_LOAD_UMIN:
14620 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_LT);
14621 break;
14622 case PPC::ATOMIC_LOAD_UMIN_I64:
14623 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLD, PPC::PRED_LT);
14624 break;
14625 case PPC::ATOMIC_LOAD_UMAX_NOWP:
14626 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_GT);
14627 break;
14628 case PPC::ATOMIC_LOAD_UMAX:
14629 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_GT);
14630 break;
14631 case PPC::ATOMIC_LOAD_UMAX_I64:
14632 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLD, PPC::PRED_GT);
14633 break;
14634 case PPC::ATOMIC_SWAP_NOWP:
14635 BB = EmitPartwordAtomicBinary(MI, BB, 0);
14636 break;
14637 case PPC::ATOMIC_SWAP:
14638 case PPC::ATOMIC_SWAP_I64:
14639 BB = EmitAtomicBinary(MI, BB, 0);
14640 break;
14641 case PPC::ATOMIC_CMP_SWAP_I32:
14642 case PPC::ATOMIC_CMP_SWAP_I64:
14643 case PPC::ATOMIC_CMP_SWAP_I8:
14644 case PPC::ATOMIC_CMP_SWAP_I16: {
14645 // Use hardware-supported atomic operations if available
14646 bool useHardware = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14647 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14648 (Subtarget.hasPartwordAtomics() &&
14649 (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14650 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16));
14651
14652 if (useHardware)
14653 BB = emitAtomicCmpSwapHardware(MI, BB, TII, Subtarget);
14654 else
14655 BB = emitAtomicCmpSwapSoftware(MI, BB, TII, Subtarget);
14656 break;
14657 }
14658 case PPC::FADDrtz: {
14659 // This pseudo performs an FADD with rounding mode temporarily forced
14660 // to round-to-zero. We emit this via custom inserter since the FPSCR
14661 // is not modeled at the SelectionDAG level.
14662 Register Dest = MI.getOperand(0).getReg();
14663 Register Src1 = MI.getOperand(1).getReg();
14664 Register Src2 = MI.getOperand(2).getReg();
14665 DebugLoc dl = MI.getDebugLoc();
14666
14667 MachineRegisterInfo &RegInfo = F->getRegInfo();
14668 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14669
14670 // Save FPSCR value.
14671 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14672
14673 // Set rounding mode to round-to-zero.
14674 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14675 .addImm(31)
14677
14678 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14679 .addImm(30)
14681
14682 // Perform addition.
14683 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14684 .addReg(Src1)
14685 .addReg(Src2);
14686 if (MI.getFlag(MachineInstr::NoFPExcept))
14688
14689 // Restore FPSCR value.
14690 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14691 break;
14692 }
14693 case PPC::ANDI_rec_1_EQ_BIT:
14694 case PPC::ANDI_rec_1_GT_BIT:
14695 case PPC::ANDI_rec_1_EQ_BIT8:
14696 case PPC::ANDI_rec_1_GT_BIT8: {
14697 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14698 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14699 ? PPC::ANDI8_rec
14700 : PPC::ANDI_rec;
14701 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14702 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14703
14704 MachineRegisterInfo &RegInfo = F->getRegInfo();
14705 Register Dest = RegInfo.createVirtualRegister(
14706 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14707
14708 DebugLoc Dl = MI.getDebugLoc();
14709 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14710 .addReg(MI.getOperand(1).getReg())
14711 .addImm(1);
14712 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14713 MI.getOperand(0).getReg())
14714 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14715 break;
14716 }
14717 case PPC::TCHECK_RET: {
14718 DebugLoc Dl = MI.getDebugLoc();
14719 MachineRegisterInfo &RegInfo = F->getRegInfo();
14720 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14721 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14722 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14723 MI.getOperand(0).getReg())
14724 .addReg(CRReg);
14725 break;
14726 }
14727 case PPC::TBEGIN_RET: {
14728 DebugLoc Dl = MI.getDebugLoc();
14729 unsigned Imm = MI.getOperand(1).getImm();
14730 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14731 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14732 MI.getOperand(0).getReg())
14733 .addReg(PPC::CR0EQ);
14734 break;
14735 }
14736 case PPC::SETRNDi: {
14737 DebugLoc dl = MI.getDebugLoc();
14738 Register OldFPSCRReg = MI.getOperand(0).getReg();
14739
14740 // Save FPSCR value.
14741 if (MRI.use_empty(OldFPSCRReg))
14742 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14743 else
14744 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14745
14746 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14747 // the following settings:
14748 // 00 Round to nearest
14749 // 01 Round to 0
14750 // 10 Round to +inf
14751 // 11 Round to -inf
14752
14753 // When the operand is immediate, using the two least significant bits of
14754 // the immediate to set the bits 62:63 of FPSCR.
14755 unsigned Mode = MI.getOperand(1).getImm();
14756 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14757 .addImm(31)
14759
14760 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14761 .addImm(30)
14763 break;
14764 }
14765 case PPC::SETRND: {
14766 DebugLoc dl = MI.getDebugLoc();
14767
14768 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14769 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14770 // If the target doesn't have DirectMove, we should use stack to do the
14771 // conversion, because the target doesn't have the instructions like mtvsrd
14772 // or mfvsrd to do this conversion directly.
14773 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14774 if (Subtarget.hasDirectMove()) {
14775 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14776 .addReg(SrcReg);
14777 } else {
14778 // Use stack to do the register copy.
14779 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14780 MachineRegisterInfo &RegInfo = F->getRegInfo();
14781 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14782 if (RC == &PPC::F8RCRegClass) {
14783 // Copy register from F8RCRegClass to G8RCRegclass.
14784 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14785 "Unsupported RegClass.");
14786
14787 StoreOp = PPC::STFD;
14788 LoadOp = PPC::LD;
14789 } else {
14790 // Copy register from G8RCRegClass to F8RCRegclass.
14791 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14792 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14793 "Unsupported RegClass.");
14794 }
14795
14796 MachineFrameInfo &MFI = F->getFrameInfo();
14797 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14798
14799 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14800 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14802 MFI.getObjectAlign(FrameIdx));
14803
14804 // Store the SrcReg into the stack.
14805 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14806 .addReg(SrcReg)
14807 .addImm(0)
14808 .addFrameIndex(FrameIdx)
14809 .addMemOperand(MMOStore);
14810
14811 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14812 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14814 MFI.getObjectAlign(FrameIdx));
14815
14816 // Load from the stack where SrcReg is stored, and save to DestReg,
14817 // so we have done the RegClass conversion from RegClass::SrcReg to
14818 // RegClass::DestReg.
14819 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14820 .addImm(0)
14821 .addFrameIndex(FrameIdx)
14822 .addMemOperand(MMOLoad);
14823 }
14824 };
14825
14826 Register OldFPSCRReg = MI.getOperand(0).getReg();
14827
14828 // Save FPSCR value.
14829 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14830
14831 // When the operand is gprc register, use two least significant bits of the
14832 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14833 //
14834 // copy OldFPSCRTmpReg, OldFPSCRReg
14835 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14836 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14837 // copy NewFPSCRReg, NewFPSCRTmpReg
14838 // mtfsf 255, NewFPSCRReg
14839 MachineOperand SrcOp = MI.getOperand(1);
14840 MachineRegisterInfo &RegInfo = F->getRegInfo();
14841 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14842
14843 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14844
14845 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14846 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14847
14848 // The first operand of INSERT_SUBREG should be a register which has
14849 // subregisters, we only care about its RegClass, so we should use an
14850 // IMPLICIT_DEF register.
14851 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14852 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14853 .addReg(ImDefReg)
14854 .add(SrcOp)
14855 .addImm(1);
14856
14857 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14858 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14859 .addReg(OldFPSCRTmpReg)
14860 .addReg(ExtSrcReg)
14861 .addImm(0)
14862 .addImm(62);
14863
14864 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14865 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14866
14867 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14868 // bits of FPSCR.
14869 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14870 .addImm(255)
14871 .addReg(NewFPSCRReg)
14872 .addImm(0)
14873 .addImm(0);
14874 break;
14875 }
14876 case PPC::SETFLM: {
14877 DebugLoc Dl = MI.getDebugLoc();
14878
14879 // Result of setflm is previous FPSCR content, so we need to save it first.
14880 Register OldFPSCRReg = MI.getOperand(0).getReg();
14881 if (MRI.use_empty(OldFPSCRReg))
14882 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14883 else
14884 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14885
14886 // Put bits in 32:63 to FPSCR.
14887 Register NewFPSCRReg = MI.getOperand(1).getReg();
14888 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14889 .addImm(255)
14890 .addReg(NewFPSCRReg)
14891 .addImm(0)
14892 .addImm(0);
14893 break;
14894 }
14895 case PPC::PROBED_ALLOCA_32:
14896 case PPC::PROBED_ALLOCA_64:
14897 return emitProbedAlloca(MI, BB);
14898
14899 case PPC::SPLIT_QUADWORD: {
14900 DebugLoc DL = MI.getDebugLoc();
14901 Register Src = MI.getOperand(2).getReg();
14902 Register Lo = MI.getOperand(0).getReg();
14903 Register Hi = MI.getOperand(1).getReg();
14904 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14905 .addDef(Lo)
14906 .addUse(Src, {}, PPC::sub_gp8_x1);
14907 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14908 .addDef(Hi)
14909 .addUse(Src, {}, PPC::sub_gp8_x0);
14910 break;
14911 }
14912 case PPC::LQX_PSEUDO:
14913 case PPC::STQX_PSEUDO: {
14914 DebugLoc DL = MI.getDebugLoc();
14915 // Ptr is used as the ptr_rc_no_r0 part
14916 // of LQ/STQ's memory operand and adding result of RA and RB,
14917 // so it has to be g8rc_and_g8rc_nox0.
14918 Register Ptr =
14919 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14920 Register Val = MI.getOperand(0).getReg();
14921 Register RA = MI.getOperand(1).getReg();
14922 Register RB = MI.getOperand(2).getReg();
14923 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14924 BuildMI(*BB, MI, DL,
14925 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14926 : TII->get(PPC::STQ))
14927 .addReg(Val, getDefRegState(MI.getOpcode() == PPC::LQX_PSEUDO))
14928 .addImm(0)
14929 .addReg(Ptr);
14930 break;
14931 }
14932 default:
14933 llvm_unreachable("Unexpected instr type to insert");
14934 }
14935
14936 MI.eraseFromParent(); // The pseudo instruction is gone now.
14937 return BB;
14938}
14939
14940//===----------------------------------------------------------------------===//
14941// Target Optimization Hooks
14942//===----------------------------------------------------------------------===//
14943
14944static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14945 // For the estimates, convergence is quadratic, so we essentially double the
14946 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14947 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14948 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14949 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14950 if (VT.getScalarType() == MVT::f64)
14951 RefinementSteps++;
14952 return RefinementSteps;
14953}
14954
14955SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14956 const DenormalMode &Mode,
14957 SDNodeFlags Flags) const {
14958 // We only have VSX Vector Test for software Square Root.
14959 EVT VT = Op.getValueType();
14960 if (!isTypeLegal(MVT::i1) ||
14961 (VT != MVT::f64 &&
14962 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14963 return TargetLowering::getSqrtInputTest(Op, DAG, Mode, Flags);
14964
14965 SDLoc DL(Op);
14966 // The output register of FTSQRT is CR field.
14967 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op, Flags);
14968 // ftsqrt BF,FRB
14969 // Let e_b be the unbiased exponent of the double-precision
14970 // floating-point operand in register FRB.
14971 // fe_flag is set to 1 if either of the following conditions occurs.
14972 // - The double-precision floating-point operand in register FRB is a zero,
14973 // a NaN, or an infinity, or a negative value.
14974 // - e_b is less than or equal to -970.
14975 // Otherwise fe_flag is set to 0.
14976 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14977 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14978 // exponent is less than -970)
14979 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14980 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14981 FTSQRT, SRIdxVal),
14982 0);
14983}
14984
14985SDValue
14986PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14987 SelectionDAG &DAG) const {
14988 // We only have VSX Vector Square Root.
14989 EVT VT = Op.getValueType();
14990 if (VT != MVT::f64 &&
14991 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14993
14994 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14995}
14996
14997SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14998 int Enabled, int &RefinementSteps,
14999 bool &UseOneConstNR,
15000 bool Reciprocal) const {
15001 EVT VT = Operand.getValueType();
15002 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
15003 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
15004 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
15005 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
15006 if (RefinementSteps == ReciprocalEstimate::Unspecified)
15007 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
15008
15009 // The Newton-Raphson computation with a single constant does not provide
15010 // enough accuracy on some CPUs.
15011 UseOneConstNR = !Subtarget.needsTwoConstNR();
15012 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
15013 }
15014 return SDValue();
15015}
15016
15017SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
15018 int Enabled,
15019 int &RefinementSteps) const {
15020 EVT VT = Operand.getValueType();
15021 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
15022 (VT == MVT::f64 && Subtarget.hasFRE()) ||
15023 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
15024 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
15025 if (RefinementSteps == ReciprocalEstimate::Unspecified)
15026 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
15027 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
15028 }
15029 return SDValue();
15030}
15031
15033 // Note: This functionality is used only when arcp is enabled, and
15034 // on cores with reciprocal estimates (which are used when arcp is
15035 // enabled for division), this functionality is redundant with the default
15036 // combiner logic (once the division -> reciprocal/multiply transformation
15037 // has taken place). As a result, this matters more for older cores than for
15038 // newer ones.
15039
15040 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
15041 // reciprocal if there are two or more FDIVs (for embedded cores with only
15042 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
15043 switch (Subtarget.getCPUDirective()) {
15044 default:
15045 return 3;
15046 case PPC::DIR_440:
15047 case PPC::DIR_A2:
15048 case PPC::DIR_E500:
15049 case PPC::DIR_E500mc:
15050 case PPC::DIR_E5500:
15051 return 2;
15052 }
15053}
15054
15055// isConsecutiveLSLoc needs to work even if all adds have not yet been
15056// collapsed, and so we need to look through chains of them.
15058 int64_t& Offset, SelectionDAG &DAG) {
15059 if (DAG.isBaseWithConstantOffset(Loc)) {
15060 Base = Loc.getOperand(0);
15061 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
15062
15063 // The base might itself be a base plus an offset, and if so, accumulate
15064 // that as well.
15065 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
15066 }
15067}
15068
15070 unsigned Bytes, int Dist,
15071 SelectionDAG &DAG) {
15072 if (VT.getSizeInBits() / 8 != Bytes)
15073 return false;
15074
15075 SDValue BaseLoc = Base->getBasePtr();
15076 if (Loc.getOpcode() == ISD::FrameIndex) {
15077 if (BaseLoc.getOpcode() != ISD::FrameIndex)
15078 return false;
15080 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
15081 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
15082 int FS = MFI.getObjectSize(FI);
15083 int BFS = MFI.getObjectSize(BFI);
15084 if (FS != BFS || FS != (int)Bytes) return false;
15085 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
15086 }
15087
15088 SDValue Base1 = Loc, Base2 = BaseLoc;
15089 int64_t Offset1 = 0, Offset2 = 0;
15090 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
15091 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
15092 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
15093 return true;
15094
15095 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15096 const GlobalValue *GV1 = nullptr;
15097 const GlobalValue *GV2 = nullptr;
15098 Offset1 = 0;
15099 Offset2 = 0;
15100 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
15101 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
15102 if (isGA1 && isGA2 && GV1 == GV2)
15103 return Offset1 == (Offset2 + Dist*Bytes);
15104 return false;
15105}
15106
15107// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
15108// not enforce equality of the chain operands.
15110 unsigned Bytes, int Dist,
15111 SelectionDAG &DAG) {
15113 EVT VT = LS->getMemoryVT();
15114 SDValue Loc = LS->getBasePtr();
15115 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
15116 }
15117
15118 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
15119 EVT VT;
15120 switch (N->getConstantOperandVal(1)) {
15121 default: return false;
15122 case Intrinsic::ppc_altivec_lvx:
15123 case Intrinsic::ppc_altivec_lvxl:
15124 case Intrinsic::ppc_vsx_lxvw4x:
15125 case Intrinsic::ppc_vsx_lxvw4x_be:
15126 VT = MVT::v4i32;
15127 break;
15128 case Intrinsic::ppc_vsx_lxvd2x:
15129 case Intrinsic::ppc_vsx_lxvd2x_be:
15130 VT = MVT::v2f64;
15131 break;
15132 case Intrinsic::ppc_altivec_lvebx:
15133 VT = MVT::i8;
15134 break;
15135 case Intrinsic::ppc_altivec_lvehx:
15136 VT = MVT::i16;
15137 break;
15138 case Intrinsic::ppc_altivec_lvewx:
15139 VT = MVT::i32;
15140 break;
15141 }
15142
15143 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
15144 }
15145
15146 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
15147 EVT VT;
15148 switch (N->getConstantOperandVal(1)) {
15149 default: return false;
15150 case Intrinsic::ppc_altivec_stvx:
15151 case Intrinsic::ppc_altivec_stvxl:
15152 case Intrinsic::ppc_vsx_stxvw4x:
15153 VT = MVT::v4i32;
15154 break;
15155 case Intrinsic::ppc_vsx_stxvd2x:
15156 VT = MVT::v2f64;
15157 break;
15158 case Intrinsic::ppc_vsx_stxvw4x_be:
15159 VT = MVT::v4i32;
15160 break;
15161 case Intrinsic::ppc_vsx_stxvd2x_be:
15162 VT = MVT::v2f64;
15163 break;
15164 case Intrinsic::ppc_altivec_stvebx:
15165 VT = MVT::i8;
15166 break;
15167 case Intrinsic::ppc_altivec_stvehx:
15168 VT = MVT::i16;
15169 break;
15170 case Intrinsic::ppc_altivec_stvewx:
15171 VT = MVT::i32;
15172 break;
15173 }
15174
15175 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
15176 }
15177
15178 return false;
15179}
15180
15181// Return true is there is a nearyby consecutive load to the one provided
15182// (regardless of alignment). We search up and down the chain, looking though
15183// token factors and other loads (but nothing else). As a result, a true result
15184// indicates that it is safe to create a new consecutive load adjacent to the
15185// load provided.
15187 SDValue Chain = LD->getChain();
15188 EVT VT = LD->getMemoryVT();
15189
15190 SmallPtrSet<SDNode *, 16> LoadRoots;
15191 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
15193
15194 // First, search up the chain, branching to follow all token-factor operands.
15195 // If we find a consecutive load, then we're done, otherwise, record all
15196 // nodes just above the top-level loads and token factors.
15197 while (!Queue.empty()) {
15198 SDNode *ChainNext = Queue.pop_back_val();
15199 if (!Visited.insert(ChainNext).second)
15200 continue;
15201
15202 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
15203 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15204 return true;
15205
15206 if (!Visited.count(ChainLD->getChain().getNode()))
15207 Queue.push_back(ChainLD->getChain().getNode());
15208 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15209 for (const SDUse &O : ChainNext->ops())
15210 if (!Visited.count(O.getNode()))
15211 Queue.push_back(O.getNode());
15212 } else
15213 LoadRoots.insert(ChainNext);
15214 }
15215
15216 // Second, search down the chain, starting from the top-level nodes recorded
15217 // in the first phase. These top-level nodes are the nodes just above all
15218 // loads and token factors. Starting with their uses, recursively look though
15219 // all loads (just the chain uses) and token factors to find a consecutive
15220 // load.
15221 Visited.clear();
15222 Queue.clear();
15223
15224 for (SDNode *I : LoadRoots) {
15225 Queue.push_back(I);
15226
15227 while (!Queue.empty()) {
15228 SDNode *LoadRoot = Queue.pop_back_val();
15229 if (!Visited.insert(LoadRoot).second)
15230 continue;
15231
15232 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
15233 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15234 return true;
15235
15236 for (SDNode *U : LoadRoot->users())
15237 if (((isa<MemSDNode>(U) &&
15238 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
15239 U->getOpcode() == ISD::TokenFactor) &&
15240 !Visited.count(U))
15241 Queue.push_back(U);
15242 }
15243 }
15244
15245 return false;
15246}
15247
15248/// This function is called when we have proved that a SETCC node can be replaced
15249/// by subtraction (and other supporting instructions) so that the result of
15250/// comparison is kept in a GPR instead of CR. This function is purely for
15251/// codegen purposes and has some flags to guide the codegen process.
15252static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15253 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15254 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15255
15256 // Zero extend the operands to the largest legal integer. Originally, they
15257 // must be of a strictly smaller size.
15258 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
15259 DAG.getConstant(Size, DL, MVT::i32));
15260 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
15261 DAG.getConstant(Size, DL, MVT::i32));
15262
15263 // Swap if needed. Depends on the condition code.
15264 if (Swap)
15265 std::swap(Op0, Op1);
15266
15267 // Subtract extended integers.
15268 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
15269
15270 // Move the sign bit to the least significant position and zero out the rest.
15271 // Now the least significant bit carries the result of original comparison.
15272 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
15273 DAG.getConstant(Size - 1, DL, MVT::i32));
15274 auto Final = Shifted;
15275
15276 // Complement the result if needed. Based on the condition code.
15277 if (Complement)
15278 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
15279 DAG.getConstant(1, DL, MVT::i64));
15280
15281 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
15282}
15283
15284SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15285 DAGCombinerInfo &DCI) const {
15286 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15287
15288 SelectionDAG &DAG = DCI.DAG;
15289 SDLoc DL(N);
15290
15291 // Size of integers being compared has a critical role in the following
15292 // analysis, so we prefer to do this when all types are legal.
15293 if (!DCI.isAfterLegalizeDAG())
15294 return SDValue();
15295
15296 // If all users of SETCC extend its value to a legal integer type
15297 // then we replace SETCC with a subtraction
15298 for (const SDNode *U : N->users())
15299 if (U->getOpcode() != ISD::ZERO_EXTEND)
15300 return SDValue();
15301
15302 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15303 auto OpSize = N->getOperand(0).getValueSizeInBits();
15304
15306
15307 if (OpSize < Size) {
15308 switch (CC) {
15309 default: break;
15310 case ISD::SETULT:
15311 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15312 case ISD::SETULE:
15313 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15314 case ISD::SETUGT:
15315 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15316 case ISD::SETUGE:
15317 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15318 }
15319 }
15320
15321 return SDValue();
15322}
15323
15324SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15325 DAGCombinerInfo &DCI) const {
15326 SelectionDAG &DAG = DCI.DAG;
15327 SDLoc dl(N);
15328
15329 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15330 // If we're tracking CR bits, we need to be careful that we don't have:
15331 // trunc(binary-ops(zext(x), zext(y)))
15332 // or
15333 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15334 // such that we're unnecessarily moving things into GPRs when it would be
15335 // better to keep them in CR bits.
15336
15337 // Note that trunc here can be an actual i1 trunc, or can be the effective
15338 // truncation that comes from a setcc or select_cc.
15339 if (N->getOpcode() == ISD::TRUNCATE &&
15340 N->getValueType(0) != MVT::i1)
15341 return SDValue();
15342
15343 if (N->getOperand(0).getValueType() != MVT::i32 &&
15344 N->getOperand(0).getValueType() != MVT::i64)
15345 return SDValue();
15346
15347 if (N->getOpcode() == ISD::SETCC ||
15348 N->getOpcode() == ISD::SELECT_CC) {
15349 // If we're looking at a comparison, then we need to make sure that the
15350 // high bits (all except for the first) don't matter the result.
15351 ISD::CondCode CC =
15352 cast<CondCodeSDNode>(N->getOperand(
15353 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15354 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15355
15356 if (ISD::isSignedIntSetCC(CC)) {
15357 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15358 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15359 return SDValue();
15360 } else if (ISD::isUnsignedIntSetCC(CC)) {
15361 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15362 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15363 !DAG.MaskedValueIsZero(N->getOperand(1),
15364 APInt::getHighBitsSet(OpBits, OpBits-1)))
15365 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15366 : SDValue());
15367 } else {
15368 // This is neither a signed nor an unsigned comparison, just make sure
15369 // that the high bits are equal.
15370 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15371 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15372
15373 // We don't really care about what is known about the first bit (if
15374 // anything), so pretend that it is known zero for both to ensure they can
15375 // be compared as constants.
15376 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15377 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15378
15379 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15380 Op1Known.getConstant() != Op2Known.getConstant())
15381 return SDValue();
15382 }
15383 }
15384
15385 // We now know that the higher-order bits are irrelevant, we just need to
15386 // make sure that all of the intermediate operations are bit operations, and
15387 // all inputs are extensions.
15388 if (N->getOperand(0).getOpcode() != ISD::AND &&
15389 N->getOperand(0).getOpcode() != ISD::OR &&
15390 N->getOperand(0).getOpcode() != ISD::XOR &&
15391 N->getOperand(0).getOpcode() != ISD::SELECT &&
15392 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15393 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15394 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15395 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15396 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15397 return SDValue();
15398
15399 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15400 N->getOperand(1).getOpcode() != ISD::AND &&
15401 N->getOperand(1).getOpcode() != ISD::OR &&
15402 N->getOperand(1).getOpcode() != ISD::XOR &&
15403 N->getOperand(1).getOpcode() != ISD::SELECT &&
15404 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15405 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15406 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15407 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15408 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15409 return SDValue();
15410
15412 SmallVector<SDValue, 8> BinOps, PromOps;
15413 SmallPtrSet<SDNode *, 16> Visited;
15414
15415 for (unsigned i = 0; i < 2; ++i) {
15416 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15417 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15418 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15419 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15420 isa<ConstantSDNode>(N->getOperand(i)))
15421 Inputs.push_back(N->getOperand(i));
15422 else
15423 BinOps.push_back(N->getOperand(i));
15424
15425 if (N->getOpcode() == ISD::TRUNCATE)
15426 break;
15427 }
15428
15429 // Visit all inputs, collect all binary operations (and, or, xor and
15430 // select) that are all fed by extensions.
15431 while (!BinOps.empty()) {
15432 SDValue BinOp = BinOps.pop_back_val();
15433
15434 if (!Visited.insert(BinOp.getNode()).second)
15435 continue;
15436
15437 PromOps.push_back(BinOp);
15438
15439 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15440 // The condition of the select is not promoted.
15441 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15442 continue;
15443 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15444 continue;
15445
15446 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15447 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15448 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15449 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15450 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15451 Inputs.push_back(BinOp.getOperand(i));
15452 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15453 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15454 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15455 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15456 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15457 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15458 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15459 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15460 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15461 BinOps.push_back(BinOp.getOperand(i));
15462 } else {
15463 // We have an input that is not an extension or another binary
15464 // operation; we'll abort this transformation.
15465 return SDValue();
15466 }
15467 }
15468 }
15469
15470 // Make sure that this is a self-contained cluster of operations (which
15471 // is not quite the same thing as saying that everything has only one
15472 // use).
15473 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15474 if (isa<ConstantSDNode>(Inputs[i]))
15475 continue;
15476
15477 for (const SDNode *User : Inputs[i].getNode()->users()) {
15478 if (User != N && !Visited.count(User))
15479 return SDValue();
15480
15481 // Make sure that we're not going to promote the non-output-value
15482 // operand(s) or SELECT or SELECT_CC.
15483 // FIXME: Although we could sometimes handle this, and it does occur in
15484 // practice that one of the condition inputs to the select is also one of
15485 // the outputs, we currently can't deal with this.
15486 if (User->getOpcode() == ISD::SELECT) {
15487 if (User->getOperand(0) == Inputs[i])
15488 return SDValue();
15489 } else if (User->getOpcode() == ISD::SELECT_CC) {
15490 if (User->getOperand(0) == Inputs[i] ||
15491 User->getOperand(1) == Inputs[i])
15492 return SDValue();
15493 }
15494 }
15495 }
15496
15497 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15498 for (const SDNode *User : PromOps[i].getNode()->users()) {
15499 if (User != N && !Visited.count(User))
15500 return SDValue();
15501
15502 // Make sure that we're not going to promote the non-output-value
15503 // operand(s) or SELECT or SELECT_CC.
15504 // FIXME: Although we could sometimes handle this, and it does occur in
15505 // practice that one of the condition inputs to the select is also one of
15506 // the outputs, we currently can't deal with this.
15507 if (User->getOpcode() == ISD::SELECT) {
15508 if (User->getOperand(0) == PromOps[i])
15509 return SDValue();
15510 } else if (User->getOpcode() == ISD::SELECT_CC) {
15511 if (User->getOperand(0) == PromOps[i] ||
15512 User->getOperand(1) == PromOps[i])
15513 return SDValue();
15514 }
15515 }
15516 }
15517
15518 // Replace all inputs with the extension operand.
15519 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15520 // Constants may have users outside the cluster of to-be-promoted nodes,
15521 // and so we need to replace those as we do the promotions.
15522 if (isa<ConstantSDNode>(Inputs[i]))
15523 continue;
15524 else
15525 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15526 }
15527
15528 std::list<HandleSDNode> PromOpHandles;
15529 for (auto &PromOp : PromOps)
15530 PromOpHandles.emplace_back(PromOp);
15531
15532 // Replace all operations (these are all the same, but have a different
15533 // (i1) return type). DAG.getNode will validate that the types of
15534 // a binary operator match, so go through the list in reverse so that
15535 // we've likely promoted both operands first. Any intermediate truncations or
15536 // extensions disappear.
15537 while (!PromOpHandles.empty()) {
15538 SDValue PromOp = PromOpHandles.back().getValue();
15539 PromOpHandles.pop_back();
15540
15541 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15542 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15543 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15544 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15545 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15546 PromOp.getOperand(0).getValueType() != MVT::i1) {
15547 // The operand is not yet ready (see comment below).
15548 PromOpHandles.emplace_front(PromOp);
15549 continue;
15550 }
15551
15552 SDValue RepValue = PromOp.getOperand(0);
15553 if (isa<ConstantSDNode>(RepValue))
15554 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15555
15556 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15557 continue;
15558 }
15559
15560 unsigned C;
15561 switch (PromOp.getOpcode()) {
15562 default: C = 0; break;
15563 case ISD::SELECT: C = 1; break;
15564 case ISD::SELECT_CC: C = 2; break;
15565 }
15566
15567 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15568 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15569 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15570 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15571 // The to-be-promoted operands of this node have not yet been
15572 // promoted (this should be rare because we're going through the
15573 // list backward, but if one of the operands has several users in
15574 // this cluster of to-be-promoted nodes, it is possible).
15575 PromOpHandles.emplace_front(PromOp);
15576 continue;
15577 }
15578
15580
15581 // If there are any constant inputs, make sure they're replaced now.
15582 for (unsigned i = 0; i < 2; ++i)
15583 if (isa<ConstantSDNode>(Ops[C+i]))
15584 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15585
15586 DAG.ReplaceAllUsesOfValueWith(PromOp,
15587 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15588 }
15589
15590 // Now we're left with the initial truncation itself.
15591 if (N->getOpcode() == ISD::TRUNCATE)
15592 return N->getOperand(0);
15593
15594 // Otherwise, this is a comparison. The operands to be compared have just
15595 // changed type (to i1), but everything else is the same.
15596 return SDValue(N, 0);
15597}
15598
15599SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15600 DAGCombinerInfo &DCI) const {
15601 SelectionDAG &DAG = DCI.DAG;
15602 SDLoc dl(N);
15603
15604 // If we're tracking CR bits, we need to be careful that we don't have:
15605 // zext(binary-ops(trunc(x), trunc(y)))
15606 // or
15607 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15608 // such that we're unnecessarily moving things into CR bits that can more
15609 // efficiently stay in GPRs. Note that if we're not certain that the high
15610 // bits are set as required by the final extension, we still may need to do
15611 // some masking to get the proper behavior.
15612
15613 // This same functionality is important on PPC64 when dealing with
15614 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15615 // the return values of functions. Because it is so similar, it is handled
15616 // here as well.
15617
15618 if (N->getValueType(0) != MVT::i32 &&
15619 N->getValueType(0) != MVT::i64)
15620 return SDValue();
15621
15622 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15623 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15624 return SDValue();
15625
15626 if (N->getOperand(0).getOpcode() != ISD::AND &&
15627 N->getOperand(0).getOpcode() != ISD::OR &&
15628 N->getOperand(0).getOpcode() != ISD::XOR &&
15629 N->getOperand(0).getOpcode() != ISD::SELECT &&
15630 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15631 return SDValue();
15632
15634 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15635 SmallPtrSet<SDNode *, 16> Visited;
15636
15637 // Visit all inputs, collect all binary operations (and, or, xor and
15638 // select) that are all fed by truncations.
15639 while (!BinOps.empty()) {
15640 SDValue BinOp = BinOps.pop_back_val();
15641
15642 if (!Visited.insert(BinOp.getNode()).second)
15643 continue;
15644
15645 PromOps.push_back(BinOp);
15646
15647 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15648 // The condition of the select is not promoted.
15649 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15650 continue;
15651 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15652 continue;
15653
15654 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15655 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15656 Inputs.push_back(BinOp.getOperand(i));
15657 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15658 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15659 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15660 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15661 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15662 BinOps.push_back(BinOp.getOperand(i));
15663 } else {
15664 // We have an input that is not a truncation or another binary
15665 // operation; we'll abort this transformation.
15666 return SDValue();
15667 }
15668 }
15669 }
15670
15671 // The operands of a select that must be truncated when the select is
15672 // promoted because the operand is actually part of the to-be-promoted set.
15673 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15674
15675 // Make sure that this is a self-contained cluster of operations (which
15676 // is not quite the same thing as saying that everything has only one
15677 // use).
15678 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15679 if (isa<ConstantSDNode>(Inputs[i]))
15680 continue;
15681
15682 for (SDNode *User : Inputs[i].getNode()->users()) {
15683 if (User != N && !Visited.count(User))
15684 return SDValue();
15685
15686 // If we're going to promote the non-output-value operand(s) or SELECT or
15687 // SELECT_CC, record them for truncation.
15688 if (User->getOpcode() == ISD::SELECT) {
15689 if (User->getOperand(0) == Inputs[i])
15690 SelectTruncOp[0].insert(std::make_pair(User,
15691 User->getOperand(0).getValueType()));
15692 } else if (User->getOpcode() == ISD::SELECT_CC) {
15693 if (User->getOperand(0) == Inputs[i])
15694 SelectTruncOp[0].insert(std::make_pair(User,
15695 User->getOperand(0).getValueType()));
15696 if (User->getOperand(1) == Inputs[i])
15697 SelectTruncOp[1].insert(std::make_pair(User,
15698 User->getOperand(1).getValueType()));
15699 }
15700 }
15701 }
15702
15703 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15704 for (SDNode *User : PromOps[i].getNode()->users()) {
15705 if (User != N && !Visited.count(User))
15706 return SDValue();
15707
15708 // If we're going to promote the non-output-value operand(s) or SELECT or
15709 // SELECT_CC, record them for truncation.
15710 if (User->getOpcode() == ISD::SELECT) {
15711 if (User->getOperand(0) == PromOps[i])
15712 SelectTruncOp[0].insert(std::make_pair(User,
15713 User->getOperand(0).getValueType()));
15714 } else if (User->getOpcode() == ISD::SELECT_CC) {
15715 if (User->getOperand(0) == PromOps[i])
15716 SelectTruncOp[0].insert(std::make_pair(User,
15717 User->getOperand(0).getValueType()));
15718 if (User->getOperand(1) == PromOps[i])
15719 SelectTruncOp[1].insert(std::make_pair(User,
15720 User->getOperand(1).getValueType()));
15721 }
15722 }
15723 }
15724
15725 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15726 bool ReallyNeedsExt = false;
15727 if (N->getOpcode() != ISD::ANY_EXTEND) {
15728 // If all of the inputs are not already sign/zero extended, then
15729 // we'll still need to do that at the end.
15730 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15731 if (isa<ConstantSDNode>(Inputs[i]))
15732 continue;
15733
15734 unsigned OpBits =
15735 Inputs[i].getOperand(0).getValueSizeInBits();
15736 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15737
15738 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15739 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15740 APInt::getHighBitsSet(OpBits,
15741 OpBits-PromBits))) ||
15742 (N->getOpcode() == ISD::SIGN_EXTEND &&
15743 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15744 (OpBits-(PromBits-1)))) {
15745 ReallyNeedsExt = true;
15746 break;
15747 }
15748 }
15749 }
15750
15751 // Convert PromOps to handles before doing any RAUW operations, as these
15752 // may CSE with existing nodes, deleting the originals.
15753 std::list<HandleSDNode> PromOpHandles;
15754 for (auto &PromOp : PromOps)
15755 PromOpHandles.emplace_back(PromOp);
15756
15757 // Replace all inputs, either with the truncation operand, or a
15758 // truncation or extension to the final output type.
15759 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15760 // Constant inputs need to be replaced with the to-be-promoted nodes that
15761 // use them because they might have users outside of the cluster of
15762 // promoted nodes.
15763 if (isa<ConstantSDNode>(Inputs[i]))
15764 continue;
15765
15766 SDValue InSrc = Inputs[i].getOperand(0);
15767 if (Inputs[i].getValueType() == N->getValueType(0))
15768 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15769 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15770 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15771 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15772 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15773 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15774 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15775 else
15776 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15777 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15778 }
15779
15780 // Replace all operations (these are all the same, but have a different
15781 // (promoted) return type). DAG.getNode will validate that the types of
15782 // a binary operator match, so go through the list in reverse so that
15783 // we've likely promoted both operands first.
15784 while (!PromOpHandles.empty()) {
15785 SDValue PromOp = PromOpHandles.back().getValue();
15786 PromOpHandles.pop_back();
15787
15788 unsigned C;
15789 switch (PromOp.getOpcode()) {
15790 default: C = 0; break;
15791 case ISD::SELECT: C = 1; break;
15792 case ISD::SELECT_CC: C = 2; break;
15793 }
15794
15795 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15796 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15797 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15798 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15799 // The to-be-promoted operands of this node have not yet been
15800 // promoted (this should be rare because we're going through the
15801 // list backward, but if one of the operands has several users in
15802 // this cluster of to-be-promoted nodes, it is possible).
15803 PromOpHandles.emplace_front(PromOp);
15804 continue;
15805 }
15806
15807 // For SELECT and SELECT_CC nodes, we do a similar check for any
15808 // to-be-promoted comparison inputs.
15809 if (PromOp.getOpcode() == ISD::SELECT ||
15810 PromOp.getOpcode() == ISD::SELECT_CC) {
15811 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15812 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15813 (SelectTruncOp[1].count(PromOp.getNode()) &&
15814 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15815 PromOpHandles.emplace_front(PromOp);
15816 continue;
15817 }
15818 }
15819
15821
15822 // If this node has constant inputs, then they'll need to be promoted here.
15823 for (unsigned i = 0; i < 2; ++i) {
15824 if (!isa<ConstantSDNode>(Ops[C+i]))
15825 continue;
15826 if (Ops[C+i].getValueType() == N->getValueType(0))
15827 continue;
15828
15829 if (N->getOpcode() == ISD::SIGN_EXTEND)
15830 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15831 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15832 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15833 else
15834 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15835 }
15836
15837 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15838 // truncate them again to the original value type.
15839 if (PromOp.getOpcode() == ISD::SELECT ||
15840 PromOp.getOpcode() == ISD::SELECT_CC) {
15841 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15842 if (SI0 != SelectTruncOp[0].end())
15843 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15844 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15845 if (SI1 != SelectTruncOp[1].end())
15846 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15847 }
15848
15849 DAG.ReplaceAllUsesOfValueWith(PromOp,
15850 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15851 }
15852
15853 // Now we're left with the initial extension itself.
15854 if (!ReallyNeedsExt)
15855 return N->getOperand(0);
15856
15857 // To zero extend, just mask off everything except for the first bit (in the
15858 // i1 case).
15859 if (N->getOpcode() == ISD::ZERO_EXTEND)
15860 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15862 N->getValueSizeInBits(0), PromBits),
15863 dl, N->getValueType(0)));
15864
15865 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15866 "Invalid extension type");
15867 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15868 SDValue ShiftCst =
15869 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15870 return DAG.getNode(
15871 ISD::SRA, dl, N->getValueType(0),
15872 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15873 ShiftCst);
15874}
15875
15876// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15877static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS, bool IsPPC64) {
15878
15879 auto isValidForConvert = [IsPPC64](SDValue &Operand) {
15880 if (!Operand.hasOneUse())
15881 return false;
15882
15883 if (Operand.getValueType() != MVT::i128)
15884 return false;
15885
15886 if (Operand.getOpcode() == ISD::Constant) {
15887 auto *C = cast<ConstantSDNode>(Operand);
15888 const APInt &Val = C->getAPIntValue();
15889 // On PPC64, comparing an i128 value loaded from memory against a
15890 // constant smaller than 2^16 is usually better left to scalar lowering.
15891 // In that case, the compare can be lowered using xori (since xori has a
15892 // 16-bit immediate field), which is cheaper than materializing a vector
15893 // constant and using vcmpequb.
15894 if (IsPPC64 && Val.ult(1ULL << 16))
15895 return false;
15896 return true;
15897 }
15898
15899 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15900 if (!LoadNode)
15901 return false;
15902
15903 // If memory operation is volatile, do not perform any
15904 // optimization or transformation. Volatile operations must be preserved
15905 // as written to ensure correct program behavior, so we return an empty
15906 // SDValue to indicate no action.
15907
15908 if (LoadNode->isVolatile())
15909 return false;
15910
15911 // Only combine loads if both use the unindexed addressing mode.
15912 // PowerPC AltiVec/VMX does not support vector loads or stores with
15913 // pre/post-increment addressing. Indexed modes may imply implicit
15914 // pointer updates, which are not compatible with AltiVec vector
15915 // instructions.
15916 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15917 return false;
15918
15919 // Only combine loads if both are non-extending loads
15920 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15921 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15922 // loaded value's semantics and are not compatible with vector loads.
15923 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15924 return false;
15925
15926 return true;
15927 };
15928
15929 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15930}
15931
15933 const SDLoc &DL) {
15934
15935 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15936
15937 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15938 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15939 "CC mus be ISD::SETNE or ISD::SETEQ");
15940
15941 auto getV16i8Load = [&](const SDValue &Operand) {
15942 if (Operand.getOpcode() == ISD::Constant)
15943 return DAG.getBitcast(MVT::v16i8, Operand);
15944
15945 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15946
15947 auto *LoadNode = cast<LoadSDNode>(Operand);
15948 // Create a new MachineMemOperand without range metadata.
15949 // Range metadata is only valid for integer scalar types, not vectors.
15950 // The original i128 load may have range metadata, but when we convert
15951 // to v16i8, that metadata is no longer semantically valid.
15952 MachineMemOperand *MMO = LoadNode->getMemOperand();
15955 MMO->getPointerInfo(), MMO->getFlags(), MMO->getSize(), MMO->getAlign(),
15956 MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(),
15957 MMO->getSuccessOrdering(), MMO->getFailureOrdering());
15958 SDValue NewLoad = DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15959 LoadNode->getBasePtr(), NewMMO);
15960 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadNode, 1), NewLoad.getValue(1));
15961 return NewLoad;
15962 };
15963
15964 // Following code transforms the DAG
15965 // t0: ch,glue = EntryToken
15966 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15967 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15968 // undef:i64
15969 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15970 // t5: i128,ch =
15971 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15972 // setcc t3, t5, setne:ch
15973 //
15974 // ---->
15975 //
15976 // t0: ch,glue = EntryToken
15977 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15978 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15979 // undef:i64
15980 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15981 // t5: v16i8,ch =
15982 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15983 // t6: i32 =
15984 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15985 // Constant:i32<2>, t3, t5
15986 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15987
15988 // Or transforms the DAG
15989 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15990 // t8: i1 =
15991 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15992 //
15993 // --->
15994 //
15995 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15996 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15997 // t7: i32 =
15998 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15999
16000 SDValue LHSVec = getV16i8Load(N->getOperand(0));
16001 SDValue RHSVec = getV16i8Load(N->getOperand(1));
16002
16003 SDValue IntrID =
16004 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
16005 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
16006 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
16007 IntrID, CRSel, LHSVec, RHSVec);
16008 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
16009 // so we need to invert the CC opcode.
16010 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
16011 DAG.getConstant(0, DL, MVT::i32),
16012 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
16013}
16014
16015// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
16016// If it is , return true; otherwise return false.
16018 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
16019
16020 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
16021 if (CC != ISD::SETEQ)
16022 return false;
16023
16024 SDValue LHS = N->getOperand(0);
16025 SDValue RHS = N->getOperand(1);
16026
16027 // Check the `SDValue &V` is from `and` with `1`.
16028 auto IsAndWithOne = [](SDValue &V) {
16029 if (V.getOpcode() == ISD::AND) {
16030 for (const SDValue &Op : V->ops())
16031 if (auto *C = dyn_cast<ConstantSDNode>(Op))
16032 if (C->isOne())
16033 return true;
16034 }
16035 return false;
16036 };
16037
16038 // Check whether the SETCC compare with zero.
16039 auto IsCompareWithZero = [](SDValue &V) {
16040 if (auto *C = dyn_cast<ConstantSDNode>(V))
16041 if (C->isZero())
16042 return true;
16043 return false;
16044 };
16045
16046 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
16047 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
16048}
16049
16050// You must check whether the `SDNode* N` can be converted to Xori using
16051// the function `static bool canConvertSETCCToXori(SDNode *N)`
16052// before calling the function; otherwise, it may produce incorrect results.
16054
16055 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
16056 SDValue LHS = N->getOperand(0);
16057 SDValue RHS = N->getOperand(1);
16058 SDLoc DL(N);
16059
16060 [[maybe_unused]] ISD::CondCode CC =
16061 cast<CondCodeSDNode>(N->getOperand(2))->get();
16062 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
16063 // Rewrite it as XORI (and X, 1), 1.
16064 auto MakeXor1 = [&](SDValue V) {
16065 EVT VT = V.getValueType();
16066 SDValue One = DAG.getConstant(1, DL, VT);
16067 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
16068 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
16069 };
16070
16071 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
16072 return MakeXor1(LHS);
16073
16074 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
16075 return MakeXor1(RHS);
16076
16077 llvm_unreachable("Should not reach here.");
16078}
16079
16080// Match `sext(setcc X, 0, eq)` and turn it into an ADDIC/SUBFE sequence.
16081//
16082// This generates code for:
16083// X == 0 ? -1 : 0
16084//
16085// On pre-ISA 3.1 targets, this is better than the longer CNTLZW/SRWI/NEG
16086// sequence. This is useful for cases like:
16087// uint8_t f(uint8_t x) { return (x == 0) ? -1 : 0; }
16088//
16089// ISA 3.1+ is skipped because those targets can use SETBC.
16090
16091SDValue PPCTargetLowering::combineSignExtendSetCC(SDNode *N,
16092 DAGCombinerInfo &DCI) const {
16093 if (Subtarget.isISA3_1())
16094 return SDValue();
16095
16096 EVT VT = N->getValueType(0);
16097 if (VT != MVT::i32 && VT != MVT::i64)
16098 return SDValue();
16099
16100 SDValue N0 = N->getOperand(0);
16101 if (N0.getOpcode() != ISD::SETCC)
16102 return SDValue();
16103
16105 SDValue LHS = N0.getOperand(0);
16106 SDValue RHS = N0.getOperand(1);
16107
16108 // Not match: sext (setcc x, 0, eq) or sext (setcc 0, x, eq)
16109 if (CC != ISD::SETEQ || (!isNullConstant(LHS) && !isNullConstant(RHS)))
16110 return SDValue();
16111
16112 SDLoc dl(N);
16113 SelectionDAG &DAG = DCI.DAG;
16115 EVT XVT = X.getValueType(); // The type of x in the setcc x, 0, eq.
16116
16117 if ((XVT == MVT::i64 || VT == MVT::i64) && !Subtarget.isPPC64())
16118 return SDValue();
16119
16120 // On PPC64, i32 carry operations use the full 64-bit XER register,
16121 // so we must use i64 operations to avoid incorrect results.
16122 // Use i64 operations and truncate the result if needed.
16123 if (XVT != MVT::i64 && Subtarget.isPPC64())
16124 // Zero-extend if input type is not 64bits.
16125 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, X);
16126
16127 EVT OpVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
16128
16129 // Generate: SUBFE(ADDC(X, -1)).
16130 SDValue MinusOne = DAG.getAllOnesConstant(dl, OpVT);
16131 SDValue Addc =
16132 DAG.getNode(PPCISD::ADDC, dl, DAG.getVTList(OpVT, MVT::i32), X, MinusOne);
16133 SDValue Carry = Addc.getValue(1);
16134 SDValue Sube = DAG.getNode(PPCISD::SUBE, dl, DAG.getVTList(OpVT, MVT::i32),
16135 Addc, Addc, Carry);
16136
16137 // Truncate back to i32 if we used i64 operations.
16138 if (OpVT == MVT::i64 && VT == MVT::i32)
16139 return DAG.getNode(ISD::TRUNCATE, dl, VT, Sube);
16140
16141 return Sube;
16142}
16143
16144SDValue PPCTargetLowering::combineSetCC(SDNode *N,
16145 DAGCombinerInfo &DCI) const {
16146 assert(N->getOpcode() == ISD::SETCC &&
16147 "Should be called with a SETCC node");
16148
16149 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
16150 // If it is, rewrite it as XORI (and X, 1), 1.
16152 return ConvertSETCCToXori(N, DCI.DAG);
16153
16154 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
16155 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
16156 SDValue LHS = N->getOperand(0);
16157 SDValue RHS = N->getOperand(1);
16158
16159 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
16160 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
16161 LHS.hasOneUse())
16162 std::swap(LHS, RHS);
16163
16164 // x == 0-y --> x+y == 0
16165 // x != 0-y --> x+y != 0
16166 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
16167 RHS.hasOneUse()) {
16168 SDLoc DL(N);
16169 SelectionDAG &DAG = DCI.DAG;
16170 EVT VT = N->getValueType(0);
16171 EVT OpVT = LHS.getValueType();
16172 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
16173 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
16174 }
16175
16176 // Optimization: Fold i128 equality/inequality compares of two loads into a
16177 // vectorized compare using vcmpequb.p when Altivec is available.
16178 //
16179 // Rationale:
16180 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
16181 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
16182 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
16183 // perform a full 128-bit equality check in a single vector compare.
16184 //
16185 // Example Result:
16186 // This transformation replaces memcmp(a, b, 16) with two vector loads
16187 // and one vector compare instruction.
16188
16189 if (Subtarget.hasAltivec() &&
16190 canConvertToVcmpequb(LHS, RHS, Subtarget.isPPC64()))
16191 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
16192 }
16193
16194 return DAGCombineTruncBoolExt(N, DCI);
16195}
16196
16197// Is this an extending load from an f32 to an f64?
16198static bool isFPExtLoad(SDValue Op) {
16199 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
16200 return LD->getExtensionType() == ISD::EXTLOAD &&
16201 Op.getValueType() == MVT::f64;
16202 return false;
16203}
16204
16205/// Reduces the number of fp-to-int conversion when building a vector.
16206///
16207/// If this vector is built out of floating to integer conversions,
16208/// transform it to a vector built out of floating point values followed by a
16209/// single floating to integer conversion of the vector.
16210/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
16211/// becomes (fptosi (build_vector ($A, $B, ...)))
16212SDValue PPCTargetLowering::
16213combineElementTruncationToVectorTruncation(SDNode *N,
16214 DAGCombinerInfo &DCI) const {
16215 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16216 "Should be called with a BUILD_VECTOR node");
16217
16218 SelectionDAG &DAG = DCI.DAG;
16219 SDLoc dl(N);
16220
16221 SDValue FirstInput = N->getOperand(0);
16222 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
16223 "The input operand must be an fp-to-int conversion.");
16224
16225 // This combine happens after legalization so the fp_to_[su]i nodes are
16226 // already converted to PPCSISD nodes.
16227 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
16228 if (FirstConversion == PPCISD::FCTIDZ ||
16229 FirstConversion == PPCISD::FCTIDUZ ||
16230 FirstConversion == PPCISD::FCTIWZ ||
16231 FirstConversion == PPCISD::FCTIWUZ) {
16232 bool IsSplat = true;
16233 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
16234 FirstConversion == PPCISD::FCTIWUZ;
16235 EVT SrcVT = FirstInput.getOperand(0).getValueType();
16237 EVT TargetVT = N->getValueType(0);
16238 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16239 SDValue NextOp = N->getOperand(i);
16240 if (NextOp.getOpcode() != PPCISD::MFVSR)
16241 return SDValue();
16242 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
16243 if (NextConversion != FirstConversion)
16244 return SDValue();
16245 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
16246 // This is not valid if the input was originally double precision. It is
16247 // also not profitable to do unless this is an extending load in which
16248 // case doing this combine will allow us to combine consecutive loads.
16249 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
16250 return SDValue();
16251 if (N->getOperand(i) != FirstInput)
16252 IsSplat = false;
16253 }
16254
16255 // If this is a splat, we leave it as-is since there will be only a single
16256 // fp-to-int conversion followed by a splat of the integer. This is better
16257 // for 32-bit and smaller ints and neutral for 64-bit ints.
16258 if (IsSplat)
16259 return SDValue();
16260
16261 // Now that we know we have the right type of node, get its operands
16262 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16263 SDValue In = N->getOperand(i).getOperand(0);
16264 if (Is32Bit) {
16265 // For 32-bit values, we need to add an FP_ROUND node (if we made it
16266 // here, we know that all inputs are extending loads so this is safe).
16267 if (In.isUndef())
16268 Ops.push_back(DAG.getUNDEF(SrcVT));
16269 else {
16270 SDValue Trunc =
16271 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
16272 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
16273 Ops.push_back(Trunc);
16274 }
16275 } else
16276 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
16277 }
16278
16279 unsigned Opcode;
16280 if (FirstConversion == PPCISD::FCTIDZ ||
16281 FirstConversion == PPCISD::FCTIWZ)
16282 Opcode = ISD::FP_TO_SINT;
16283 else
16284 Opcode = ISD::FP_TO_UINT;
16285
16286 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
16287 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
16288 return DAG.getNode(Opcode, dl, TargetVT, BV);
16289 }
16290 return SDValue();
16291}
16292
16293// LXVKQ instruction load VSX vector with a special quadword value
16294// based on an immediate value. This helper method returns the details of the
16295// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16296// to help generate the LXVKQ instruction and the subsequent shift instruction
16297// required to match the original build vector pattern.
16298
16299// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16300using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16301
16302static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16303
16304 // LXVKQ instruction loads the Quadword value:
16305 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16306 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
16307 static const uint32_t Uim = 16;
16308
16309 // Check for direct LXVKQ match (no shift needed)
16310 if (FullVal == BasePattern)
16311 return std::make_tuple(Uim, uint8_t{0});
16312
16313 // Check if FullValue is 1 (the result of the base pattern >> 127)
16314 if (FullVal == APInt(128, 1))
16315 return std::make_tuple(Uim, uint8_t{127});
16316
16317 return std::nullopt;
16318}
16319
16320/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16321/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16322/// LXVKQ instruction load VSX vector with a special quadword value based on an
16323/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16324/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16325/// This can be used to inline the build vector constants that have the
16326/// following patterns:
16327///
16328/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16329/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16330/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16331/// combination of splatting and right shift instructions.
16332
16333SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16334 SelectionDAG &DAG) const {
16335
16336 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16337 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16338
16339 // This transformation is only supported if we are loading either a byte,
16340 // halfword, word, or doubleword.
16341 EVT VT = Op.getValueType();
16342 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16343 VT == MVT::v2i64))
16344 return SDValue();
16345
16346 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16347 << VT.getEVTString() << "): ";
16348 Op->dump());
16349
16350 unsigned NumElems = VT.getVectorNumElements();
16351 unsigned ElemBits = VT.getScalarSizeInBits();
16352
16353 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16354
16355 // Check for Non-constant operand in the build vector.
16356 for (const SDValue &Operand : Op.getNode()->op_values()) {
16357 if (!isa<ConstantSDNode>(Operand))
16358 return SDValue();
16359 }
16360
16361 // Assemble build vector operands as a 128-bit register value
16362 // We need to reconstruct what the 128-bit register pattern would be
16363 // that produces this vector when interpreted with the current endianness
16364 APInt FullVal = APInt::getZero(128);
16365
16366 for (unsigned Index = 0; Index < NumElems; ++Index) {
16367 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
16368
16369 // Get element value as raw bits (zero-extended)
16370 uint64_t ElemValue = C->getZExtValue();
16371
16372 // Mask to element size to ensure we only get the relevant bits
16373 if (ElemBits < 64)
16374 ElemValue &= ((1ULL << ElemBits) - 1);
16375
16376 // Calculate bit position for this element in the 128-bit register
16377 unsigned BitPos =
16378 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16379
16380 // Create APInt for the element value and shift it to correct position
16381 APInt ElemAPInt(128, ElemValue);
16382 ElemAPInt <<= BitPos;
16383
16384 // Place the element value at the correct bit position
16385 FullVal |= ElemAPInt;
16386 }
16387
16388 if (FullVal.isZero() || FullVal.isAllOnes())
16389 return SDValue();
16390
16391 if (auto UIMOpt = getPatternInfo(FullVal)) {
16392 const auto &[Uim, ShiftAmount] = *UIMOpt;
16393 SDLoc Dl(Op);
16394
16395 // Generate LXVKQ instruction if the shift amount is zero.
16396 if (ShiftAmount == 0) {
16397 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
16398 SDValue LxvkqInstr =
16399 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
16401 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16402 LxvkqInstr.dump());
16403 return LxvkqInstr;
16404 }
16405
16406 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16407
16408 // The right shifted pattern can be constructed using a combination of
16409 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16410 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16411 // value 255.
16412 SDValue ShiftAmountVec =
16413 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16414 DAG.getTargetConstant(255, Dl, MVT::i32)),
16415 0);
16416 // Generate appropriate right shift instruction
16417 SDValue ShiftVec = SDValue(
16418 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16419 0);
16421 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16422 ShiftVec.dump());
16423 return ShiftVec;
16424 }
16425 // No patterns matched for build vectors.
16426 return SDValue();
16427}
16428
16429/// Reduce the number of loads when building a vector.
16430///
16431/// Building a vector out of multiple loads can be converted to a load
16432/// of the vector type if the loads are consecutive. If the loads are
16433/// consecutive but in descending order, a shuffle is added at the end
16434/// to reorder the vector.
16436 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16437 "Should be called with a BUILD_VECTOR node");
16438
16439 SDLoc dl(N);
16440
16441 // Return early for non byte-sized type, as they can't be consecutive.
16442 if (!N->getValueType(0).getVectorElementType().isByteSized())
16443 return SDValue();
16444
16445 bool InputsAreConsecutiveLoads = true;
16446 bool InputsAreReverseConsecutive = true;
16447 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16448 SDValue FirstInput = N->getOperand(0);
16449 bool IsRoundOfExtLoad = false;
16450 LoadSDNode *FirstLoad = nullptr;
16451
16452 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16453 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16454 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16455 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16456 }
16457 // Not a build vector of (possibly fp_rounded) loads.
16458 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16459 N->getNumOperands() == 1)
16460 return SDValue();
16461
16462 if (!IsRoundOfExtLoad)
16463 FirstLoad = cast<LoadSDNode>(FirstInput);
16464
16466 InputLoads.push_back(FirstLoad);
16467 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16468 // If any inputs are fp_round(extload), they all must be.
16469 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16470 return SDValue();
16471
16472 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16473 N->getOperand(i);
16474 if (NextInput.getOpcode() != ISD::LOAD)
16475 return SDValue();
16476
16477 SDValue PreviousInput =
16478 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16479 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16480 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16481
16482 // If any inputs are fp_round(extload), they all must be.
16483 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16484 return SDValue();
16485
16486 // We only care about regular loads. The PPC-specific load intrinsics
16487 // will not lead to a merge opportunity.
16488 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16489 InputsAreConsecutiveLoads = false;
16490 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16491 InputsAreReverseConsecutive = false;
16492
16493 // Exit early if the loads are neither consecutive nor reverse consecutive.
16494 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16495 return SDValue();
16496 InputLoads.push_back(LD2);
16497 }
16498
16499 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16500 "The loads cannot be both consecutive and reverse consecutive.");
16501
16502 SDValue WideLoad;
16503 SDValue ReturnSDVal;
16504 if (InputsAreConsecutiveLoads) {
16505 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16506 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16507 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16508 FirstLoad->getAlign());
16509 ReturnSDVal = WideLoad;
16510 } else if (InputsAreReverseConsecutive) {
16511 LoadSDNode *LastLoad = InputLoads.back();
16512 assert(LastLoad && "Input needs to be a LoadSDNode.");
16513 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16514 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16515 LastLoad->getAlign());
16517 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16518 Ops.push_back(i);
16519
16520 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16521 DAG.getUNDEF(N->getValueType(0)), Ops);
16522 } else
16523 return SDValue();
16524
16525 for (auto *LD : InputLoads)
16526 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16527 return ReturnSDVal;
16528}
16529
16530// This function adds the required vector_shuffle needed to get
16531// the elements of the vector extract in the correct position
16532// as specified by the CorrectElems encoding.
16534 SDValue Input, uint64_t Elems,
16535 uint64_t CorrectElems) {
16536 SDLoc dl(N);
16537
16538 unsigned NumElems = Input.getValueType().getVectorNumElements();
16539 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16540
16541 // Knowing the element indices being extracted from the original
16542 // vector and the order in which they're being inserted, just put
16543 // them at element indices required for the instruction.
16544 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16545 if (DAG.getDataLayout().isLittleEndian())
16546 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16547 else
16548 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16549 CorrectElems = CorrectElems >> 8;
16550 Elems = Elems >> 8;
16551 }
16552
16553 SDValue Shuffle =
16554 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16555 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16556
16557 EVT VT = N->getValueType(0);
16558 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16559
16560 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16561 Input.getValueType().getVectorElementType(),
16563 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16564 DAG.getValueType(ExtVT));
16565}
16566
16567// Look for build vector patterns where input operands come from sign
16568// extended vector_extract elements of specific indices. If the correct indices
16569// aren't used, add a vector shuffle to fix up the indices and create
16570// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16571// during instruction selection.
16573 // This array encodes the indices that the vector sign extend instructions
16574 // extract from when extending from one type to another for both BE and LE.
16575 // The right nibble of each byte corresponds to the LE incides.
16576 // and the left nibble of each byte corresponds to the BE incides.
16577 // For example: 0x3074B8FC byte->word
16578 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16579 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16580 // For example: 0x000070F8 byte->double word
16581 // For LE: the allowed indices are: 0x0,0x8
16582 // For BE: the allowed indices are: 0x7,0xF
16583 uint64_t TargetElems[] = {
16584 0x3074B8FC, // b->w
16585 0x000070F8, // b->d
16586 0x10325476, // h->w
16587 0x00003074, // h->d
16588 0x00001032, // w->d
16589 };
16590
16591 uint64_t Elems = 0;
16592 int Index;
16593 SDValue Input;
16594
16595 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16596 if (!Op)
16597 return false;
16598 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16599 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16600 return false;
16601
16602 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16603 // of the right width.
16604 SDValue Extract = Op.getOperand(0);
16605 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16606 Extract = Extract.getOperand(0);
16607 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16608 return false;
16609
16611 if (!ExtOp)
16612 return false;
16613
16614 Index = ExtOp->getZExtValue();
16615 if (Input && Input != Extract.getOperand(0))
16616 return false;
16617
16618 if (!Input)
16619 Input = Extract.getOperand(0);
16620
16621 Elems = Elems << 8;
16622 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16623 Elems |= Index;
16624
16625 return true;
16626 };
16627
16628 // If the build vector operands aren't sign extended vector extracts,
16629 // of the same input vector, then return.
16630 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16631 if (!isSExtOfVecExtract(N->getOperand(i))) {
16632 return SDValue();
16633 }
16634 }
16635
16636 // If the vector extract indices are not correct, add the appropriate
16637 // vector_shuffle.
16638 int TgtElemArrayIdx;
16639 int InputSize = Input.getValueType().getScalarSizeInBits();
16640 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16641 if (InputSize + OutputSize == 40)
16642 TgtElemArrayIdx = 0;
16643 else if (InputSize + OutputSize == 72)
16644 TgtElemArrayIdx = 1;
16645 else if (InputSize + OutputSize == 48)
16646 TgtElemArrayIdx = 2;
16647 else if (InputSize + OutputSize == 80)
16648 TgtElemArrayIdx = 3;
16649 else if (InputSize + OutputSize == 96)
16650 TgtElemArrayIdx = 4;
16651 else
16652 return SDValue();
16653
16654 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16655 CorrectElems = DAG.getDataLayout().isLittleEndian()
16656 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16657 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16658 if (Elems != CorrectElems) {
16659 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16660 }
16661
16662 // Regular lowering will catch cases where a shuffle is not needed.
16663 return SDValue();
16664}
16665
16666// Look for the pattern of a load from a narrow width to i128, feeding
16667// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16668// (LXVRZX). This node represents a zero extending load that will be matched
16669// to the Load VSX Vector Rightmost instructions.
16671 SDLoc DL(N);
16672
16673 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16674 if (N->getValueType(0) != MVT::v1i128)
16675 return SDValue();
16676
16677 SDValue Operand = N->getOperand(0);
16678 // Proceed with the transformation if the operand to the BUILD_VECTOR
16679 // is a load instruction.
16680 if (Operand.getOpcode() != ISD::LOAD)
16681 return SDValue();
16682
16683 auto *LD = cast<LoadSDNode>(Operand);
16684 EVT MemoryType = LD->getMemoryVT();
16685
16686 // This transformation is only valid if the we are loading either a byte,
16687 // halfword, word, or doubleword.
16688 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16689 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16690
16691 // Ensure that the load from the narrow width is being zero extended to i128.
16692 if (!ValidLDType ||
16693 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16694 LD->getExtensionType() != ISD::EXTLOAD))
16695 return SDValue();
16696
16697 SDValue LoadOps[] = {
16698 LD->getChain(), LD->getBasePtr(),
16699 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16700
16701 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16702 DAG.getVTList(MVT::v1i128, MVT::Other),
16703 LoadOps, MemoryType, LD->getMemOperand());
16704}
16705
16706SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16707 DAGCombinerInfo &DCI) const {
16708 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16709 "Should be called with a BUILD_VECTOR node");
16710
16711 SelectionDAG &DAG = DCI.DAG;
16712 SDLoc dl(N);
16713
16714 if (!Subtarget.hasVSX())
16715 return SDValue();
16716
16717 // The target independent DAG combiner will leave a build_vector of
16718 // float-to-int conversions intact. We can generate MUCH better code for
16719 // a float-to-int conversion of a vector of floats.
16720 SDValue FirstInput = N->getOperand(0);
16721 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16722 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16723 if (Reduced)
16724 return Reduced;
16725 }
16726
16727 // If we're building a vector out of consecutive loads, just load that
16728 // vector type.
16729 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16730 if (Reduced)
16731 return Reduced;
16732
16733 // If we're building a vector out of extended elements from another vector
16734 // we have P9 vector integer extend instructions. The code assumes legal
16735 // input types (i.e. it can't handle things like v4i16) so do not run before
16736 // legalization.
16737 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16738 Reduced = combineBVOfVecSExt(N, DAG);
16739 if (Reduced)
16740 return Reduced;
16741 }
16742
16743 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16744 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16745 // is a load from <valid narrow width> to i128.
16746 if (Subtarget.isISA3_1()) {
16747 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16748 if (BVOfZLoad)
16749 return BVOfZLoad;
16750 }
16751
16752 if (N->getValueType(0) != MVT::v2f64)
16753 return SDValue();
16754
16755 // Looking for:
16756 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16757 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16758 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16759 return SDValue();
16760 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16761 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16762 return SDValue();
16763 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16764 return SDValue();
16765
16766 SDValue Ext1 = FirstInput.getOperand(0);
16767 SDValue Ext2 = N->getOperand(1).getOperand(0);
16768 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16770 return SDValue();
16771
16772 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16773 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16774 if (!Ext1Op || !Ext2Op)
16775 return SDValue();
16776 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16777 Ext1.getOperand(0) != Ext2.getOperand(0))
16778 return SDValue();
16779
16780 int FirstElem = Ext1Op->getZExtValue();
16781 int SecondElem = Ext2Op->getZExtValue();
16782 int SubvecIdx;
16783 if (FirstElem == 0 && SecondElem == 1)
16784 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16785 else if (FirstElem == 2 && SecondElem == 3)
16786 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16787 else
16788 return SDValue();
16789
16790 SDValue SrcVec = Ext1.getOperand(0);
16791 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16792 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16793 return DAG.getNode(NodeType, dl, MVT::v2f64,
16794 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16795}
16796
16797SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16798 DAGCombinerInfo &DCI) const {
16799 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16800 N->getOpcode() == ISD::UINT_TO_FP) &&
16801 "Need an int -> FP conversion node here");
16802
16803 if (useSoftFloat() || !Subtarget.has64BitSupport())
16804 return SDValue();
16805
16806 SelectionDAG &DAG = DCI.DAG;
16807 SDLoc dl(N);
16808 SDValue Op(N, 0);
16809
16810 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16811 // from the hardware.
16812 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16813 return SDValue();
16814 if (!Op.getOperand(0).getValueType().isSimple())
16815 return SDValue();
16816 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16817 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16818 return SDValue();
16819
16820 SDValue FirstOperand(Op.getOperand(0));
16821 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16822 (FirstOperand.getValueType() == MVT::i8 ||
16823 FirstOperand.getValueType() == MVT::i16);
16824 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16825 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16826 bool DstDouble = Op.getValueType() == MVT::f64;
16827 unsigned ConvOp = Signed ?
16828 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16829 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16830 SDValue WidthConst =
16831 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16832 dl, false);
16833 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16834 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16835 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16836 DAG.getVTList(MVT::f64, MVT::Other),
16837 Ops, MVT::i8, LDN->getMemOperand());
16838 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16839
16840 // For signed conversion, we need to sign-extend the value in the VSR
16841 if (Signed) {
16842 SDValue ExtOps[] = { Ld, WidthConst };
16843 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16844 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16845 } else
16846 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16847 }
16848
16849
16850 // For i32 intermediate values, unfortunately, the conversion functions
16851 // leave the upper 32 bits of the value are undefined. Within the set of
16852 // scalar instructions, we have no method for zero- or sign-extending the
16853 // value. Thus, we cannot handle i32 intermediate values here.
16854 if (Op.getOperand(0).getValueType() == MVT::i32)
16855 return SDValue();
16856
16857 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16858 "UINT_TO_FP is supported only with FPCVT");
16859
16860 // If we have FCFIDS, then use it when converting to single-precision.
16861 // Otherwise, convert to double-precision and then round.
16862 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16863 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16864 : PPCISD::FCFIDS)
16865 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16866 : PPCISD::FCFID);
16867 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16868 ? MVT::f32
16869 : MVT::f64;
16870
16871 // If we're converting from a float, to an int, and back to a float again,
16872 // then we don't need the store/load pair at all.
16873 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16874 Subtarget.hasFPCVT()) ||
16875 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16876 SDValue Src = Op.getOperand(0).getOperand(0);
16877 if (Src.getValueType() == MVT::f32) {
16878 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16879 DCI.AddToWorklist(Src.getNode());
16880 } else if (Src.getValueType() != MVT::f64) {
16881 // Make sure that we don't pick up a ppc_fp128 source value.
16882 return SDValue();
16883 }
16884
16885 unsigned FCTOp =
16886 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16887 PPCISD::FCTIDUZ;
16888
16889 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16890 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16891
16892 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16893 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16894 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16895 DCI.AddToWorklist(FP.getNode());
16896 }
16897
16898 return FP;
16899 }
16900
16901 return SDValue();
16902}
16903
16904// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16905// builtins) into loads with swaps.
16907 DAGCombinerInfo &DCI) const {
16908 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16909 // load combines.
16910 if (DCI.isBeforeLegalizeOps())
16911 return SDValue();
16912
16913 SelectionDAG &DAG = DCI.DAG;
16914 SDLoc dl(N);
16915 SDValue Chain;
16916 SDValue Base;
16917 MachineMemOperand *MMO;
16918
16919 switch (N->getOpcode()) {
16920 default:
16921 llvm_unreachable("Unexpected opcode for little endian VSX load");
16922 case ISD::LOAD: {
16924 Chain = LD->getChain();
16925 Base = LD->getBasePtr();
16926 MMO = LD->getMemOperand();
16927 // If the MMO suggests this isn't a load of a full vector, leave
16928 // things alone. For a built-in, we have to make the change for
16929 // correctness, so if there is a size problem that will be a bug.
16930 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16931 return SDValue();
16932 break;
16933 }
16936 Chain = Intrin->getChain();
16937 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16938 // us what we want. Get operand 2 instead.
16939 Base = Intrin->getOperand(2);
16940 MMO = Intrin->getMemOperand();
16941 break;
16942 }
16943 }
16944
16945 MVT VecTy = N->getValueType(0).getSimpleVT();
16946
16947 SDValue LoadOps[] = { Chain, Base };
16948 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16949 DAG.getVTList(MVT::v2f64, MVT::Other),
16950 LoadOps, MVT::v2f64, MMO);
16951
16952 DCI.AddToWorklist(Load.getNode());
16953 Chain = Load.getValue(1);
16954 SDValue Swap = DAG.getNode(
16955 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16956 DCI.AddToWorklist(Swap.getNode());
16957
16958 // Add a bitcast if the resulting load type doesn't match v2f64.
16959 if (VecTy != MVT::v2f64) {
16960 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16961 DCI.AddToWorklist(N.getNode());
16962 // Package {bitcast value, swap's chain} to match Load's shape.
16963 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16964 N, Swap.getValue(1));
16965 }
16966
16967 return Swap;
16968}
16969
16970// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16971// builtins) into stores with swaps.
16973 DAGCombinerInfo &DCI) const {
16974 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16975 // store combines.
16976 if (DCI.isBeforeLegalizeOps())
16977 return SDValue();
16978
16979 SelectionDAG &DAG = DCI.DAG;
16980 SDLoc dl(N);
16981 SDValue Chain;
16982 SDValue Base;
16983 unsigned SrcOpnd;
16984 MachineMemOperand *MMO;
16985
16986 switch (N->getOpcode()) {
16987 default:
16988 llvm_unreachable("Unexpected opcode for little endian VSX store");
16989 case ISD::STORE: {
16991 Chain = ST->getChain();
16992 Base = ST->getBasePtr();
16993 MMO = ST->getMemOperand();
16994 SrcOpnd = 1;
16995 // If the MMO suggests this isn't a store of a full vector, leave
16996 // things alone. For a built-in, we have to make the change for
16997 // correctness, so if there is a size problem that will be a bug.
16998 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16999 return SDValue();
17000 break;
17001 }
17002 case ISD::INTRINSIC_VOID: {
17004 Chain = Intrin->getChain();
17005 // Intrin->getBasePtr() oddly does not get what we want.
17006 Base = Intrin->getOperand(3);
17007 MMO = Intrin->getMemOperand();
17008 SrcOpnd = 2;
17009 break;
17010 }
17011 }
17012
17013 SDValue Src = N->getOperand(SrcOpnd);
17014 MVT VecTy = Src.getValueType().getSimpleVT();
17015
17016 // All stores are done as v2f64 and possible bit cast.
17017 if (VecTy != MVT::v2f64) {
17018 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
17019 DCI.AddToWorklist(Src.getNode());
17020 }
17021
17022 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
17023 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
17024 DCI.AddToWorklist(Swap.getNode());
17025 Chain = Swap.getValue(1);
17026 SDValue StoreOps[] = { Chain, Swap, Base };
17027 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
17028 DAG.getVTList(MVT::Other),
17029 StoreOps, VecTy, MMO);
17030 DCI.AddToWorklist(Store.getNode());
17031 return Store;
17032}
17033
17034// Handle DAG combine for STORE (FP_TO_INT F).
17035SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
17036 DAGCombinerInfo &DCI) const {
17037 SelectionDAG &DAG = DCI.DAG;
17038 SDLoc dl(N);
17039 unsigned Opcode = N->getOperand(1).getOpcode();
17040 (void)Opcode;
17041 bool Strict = N->getOperand(1)->isStrictFPOpcode();
17042
17043 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17044 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
17045 && "Not a FP_TO_INT Instruction!");
17046
17047 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
17048 EVT Op1VT = N->getOperand(1).getValueType();
17049 EVT ResVT = Val.getValueType();
17050
17051 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
17052 return SDValue();
17053
17054 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
17055 bool ValidTypeForStoreFltAsInt =
17056 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
17057 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
17058
17059 // TODO: Lower conversion from f128 on all VSX targets
17060 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
17061 return SDValue();
17062
17063 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
17064 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
17065 return SDValue();
17066
17067 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
17068
17069 // Set number of bytes being converted.
17070 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
17071 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
17072 DAG.getIntPtrConstant(ByteSize, dl, false),
17073 DAG.getValueType(Op1VT)};
17074
17075 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
17076 DAG.getVTList(MVT::Other), Ops,
17077 cast<StoreSDNode>(N)->getMemoryVT(),
17078 cast<StoreSDNode>(N)->getMemOperand());
17079
17080 return Val;
17081}
17082
17083static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
17084 // Check that the source of the element keeps flipping
17085 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
17086 bool PrevElemFromFirstVec = Mask[0] < NumElts;
17087 for (int i = 1, e = Mask.size(); i < e; i++) {
17088 if (PrevElemFromFirstVec && Mask[i] < NumElts)
17089 return false;
17090 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
17091 return false;
17092 PrevElemFromFirstVec = !PrevElemFromFirstVec;
17093 }
17094 return true;
17095}
17096
17097static bool isSplatBV(SDValue Op) {
17098 if (Op.getOpcode() != ISD::BUILD_VECTOR)
17099 return false;
17100 SDValue FirstOp;
17101
17102 // Find first non-undef input.
17103 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
17104 FirstOp = Op.getOperand(i);
17105 if (!FirstOp.isUndef())
17106 break;
17107 }
17108
17109 // All inputs are undef or the same as the first non-undef input.
17110 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
17111 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
17112 return false;
17113 return true;
17114}
17115
17117 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
17118 return Op;
17119 if (Op.getOpcode() != ISD::BITCAST)
17120 return SDValue();
17121 Op = Op.getOperand(0);
17122 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
17123 return Op;
17124 return SDValue();
17125}
17126
17127// Fix up the shuffle mask to account for the fact that the result of
17128// scalar_to_vector is not in lane zero. This just takes all values in
17129// the ranges specified by the min/max indices and adds the number of
17130// elements required to ensure each element comes from the respective
17131// position in the valid lane.
17132// On little endian, that's just the corresponding element in the other
17133// half of the vector. On big endian, it is in the same half but right
17134// justified rather than left justified in that half.
17136 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
17137 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
17138 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
17139 int LHSEltFixup =
17140 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
17141 int RHSEltFixup =
17142 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
17143 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
17144 int Idx = ShuffV[I];
17145 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
17146 ShuffV[I] += LHSEltFixup;
17147 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
17148 ShuffV[I] += RHSEltFixup;
17149 }
17150}
17151
17152// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
17153// the original is:
17154// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
17155// In such a case, just change the shuffle mask to extract the element
17156// from the permuted index.
17158 const PPCSubtarget &Subtarget) {
17159 SDLoc dl(OrigSToV);
17160 EVT VT = OrigSToV.getValueType();
17161 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17162 "Expecting a SCALAR_TO_VECTOR here");
17163 SDValue Input = OrigSToV.getOperand(0);
17164
17165 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17166 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
17167 SDValue OrigVector = Input.getOperand(0);
17168
17169 // Can't handle non-const element indices or different vector types
17170 // for the input to the extract and the output of the scalar_to_vector.
17171 if (Idx && VT == OrigVector.getValueType()) {
17172 unsigned NumElts = VT.getVectorNumElements();
17173 assert(
17174 NumElts > 1 &&
17175 "Cannot produce a permuted scalar_to_vector for one element vector");
17176 SmallVector<int, 16> NewMask(NumElts, -1);
17177 unsigned ResultInElt = NumElts / 2;
17178 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
17179 NewMask[ResultInElt] = Idx->getZExtValue();
17180 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
17181 }
17182 }
17183 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
17184 OrigSToV.getOperand(0));
17185}
17186
17188 int HalfVec, int LHSLastElementDefined,
17189 int RHSLastElementDefined) {
17190 for (int Index : ShuffV) {
17191 if (Index < 0) // Skip explicitly undefined mask indices.
17192 continue;
17193 // Handle first input vector of the vector_shuffle.
17194 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
17195 (Index > LHSLastElementDefined))
17196 return false;
17197 // Handle second input vector of the vector_shuffle.
17198 if ((RHSLastElementDefined >= 0) &&
17199 (Index > HalfVec + RHSLastElementDefined))
17200 return false;
17201 }
17202 return true;
17203}
17204
17206 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
17207 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
17208 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
17209 EVT VecShuffOperandType = VecShuffOperand.getValueType();
17210 // Set up the values for the shuffle vector fixup.
17211 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
17212 // The last element depends on if the input comes from the LHS or RHS.
17213 //
17214 // For example:
17215 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
17216 //
17217 // For the LHS: The last element that comes from the LHS is actually 0, not 3
17218 // because elements 1 and higher of a scalar_to_vector are undefined.
17219 // For the RHS: The last element that comes from the RHS is actually 5, not 7
17220 // because elements 1 and higher of a scalar_to_vector are undefined.
17221 // It is also not 4 because the original scalar_to_vector is wider and
17222 // actually contains two i32 elements.
17223 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
17224 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
17225 : FirstElt;
17226 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
17227 if (SToVPermuted.getValueType() != VecShuffOperandType)
17228 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
17229 return SToVPermuted;
17230}
17231
17232// On little endian subtargets, combine shuffles such as:
17233// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
17234// into:
17235// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
17236// because the latter can be matched to a single instruction merge.
17237// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
17238// to put the value into element zero. Adjust the shuffle mask so that the
17239// vector can remain in permuted form (to prevent a swap prior to a shuffle).
17240// On big endian targets, this is still useful for SCALAR_TO_VECTOR
17241// nodes with elements smaller than doubleword because all the ways
17242// of getting scalar data into a vector register put the value in the
17243// rightmost element of the left half of the vector.
17244SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
17245 SelectionDAG &DAG) const {
17246 SDValue LHS = SVN->getOperand(0);
17247 SDValue RHS = SVN->getOperand(1);
17248 auto Mask = SVN->getMask();
17249 int NumElts = LHS.getValueType().getVectorNumElements();
17250 SDValue Res(SVN, 0);
17251 SDLoc dl(SVN);
17252 bool IsLittleEndian = Subtarget.isLittleEndian();
17253
17254 // On big endian targets this is only useful for subtargets with direct moves.
17255 // On little endian targets it would be useful for all subtargets with VSX.
17256 // However adding special handling for LE subtargets without direct moves
17257 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
17258 // which includes direct moves.
17259 if (!Subtarget.hasDirectMove())
17260 return Res;
17261
17262 // If this is not a shuffle of a shuffle and the first element comes from
17263 // the second vector, canonicalize to the commuted form. This will make it
17264 // more likely to match one of the single instruction patterns.
17265 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
17266 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
17267 std::swap(LHS, RHS);
17268 Res = DAG.getCommutedVectorShuffle(*SVN);
17269
17270 if (!isa<ShuffleVectorSDNode>(Res))
17271 return Res;
17272
17273 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17274 }
17275
17276 // Adjust the shuffle mask if either input vector comes from a
17277 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
17278 // form (to prevent the need for a swap).
17279 SmallVector<int, 16> ShuffV(Mask);
17280 SDValue SToVLHS = isScalarToVec(LHS);
17281 SDValue SToVRHS = isScalarToVec(RHS);
17282 if (SToVLHS || SToVRHS) {
17283 EVT VT = SVN->getValueType(0);
17284 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
17285 int ShuffleNumElts = ShuffV.size();
17286 int HalfVec = ShuffleNumElts / 2;
17287 // The width of the "valid lane" (i.e. the lane that contains the value that
17288 // is vectorized) needs to be expressed in terms of the number of elements
17289 // of the shuffle. It is thereby the ratio of the values before and after
17290 // any bitcast, which will be set later on if the LHS or RHS are
17291 // SCALAR_TO_VECTOR nodes.
17292 unsigned LHSNumValidElts = HalfVec;
17293 unsigned RHSNumValidElts = HalfVec;
17294
17295 // Initially assume that neither input is permuted. These will be adjusted
17296 // accordingly if either input is. Note, that -1 means that all elements
17297 // are undefined.
17298 int LHSFirstElt = 0;
17299 int RHSFirstElt = ShuffleNumElts;
17300 int LHSLastElt = -1;
17301 int RHSLastElt = -1;
17302
17303 // Get the permuted scalar to vector nodes for the source(s) that come from
17304 // ISD::SCALAR_TO_VECTOR.
17305 // On big endian systems, this only makes sense for element sizes smaller
17306 // than 64 bits since for 64-bit elements, all instructions already put
17307 // the value into element zero. Since scalar size of LHS and RHS may differ
17308 // after isScalarToVec, this should be checked using their own sizes.
17309 int LHSScalarSize = 0;
17310 int RHSScalarSize = 0;
17311 if (SToVLHS) {
17312 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17313 if (!IsLittleEndian && LHSScalarSize >= 64)
17314 return Res;
17315 }
17316 if (SToVRHS) {
17317 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17318 if (!IsLittleEndian && RHSScalarSize >= 64)
17319 return Res;
17320 }
17321 if (LHSScalarSize != 0)
17323 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
17324 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
17325 if (RHSScalarSize != 0)
17327 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
17328 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
17329
17330 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
17331 return Res;
17332
17333 // Fix up the shuffle mask to reflect where the desired element actually is.
17334 // The minimum and maximum indices that correspond to element zero for both
17335 // the LHS and RHS are computed and will control which shuffle mask entries
17336 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17337 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17339 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17340 LHSNumValidElts, RHSNumValidElts, Subtarget);
17341 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17342
17343 // We may have simplified away the shuffle. We won't be able to do anything
17344 // further with it here.
17345 if (!isa<ShuffleVectorSDNode>(Res))
17346 return Res;
17347 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17348 }
17349
17350 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17351 // The common case after we commuted the shuffle is that the RHS is a splat
17352 // and we have elements coming in from the splat at indices that are not
17353 // conducive to using a merge.
17354 // Example:
17355 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17356 if (!isSplatBV(TheSplat))
17357 return Res;
17358
17359 // We are looking for a mask such that all even elements are from
17360 // one vector and all odd elements from the other.
17361 if (!isAlternatingShuffMask(Mask, NumElts))
17362 return Res;
17363
17364 // Adjust the mask so we are pulling in the same index from the splat
17365 // as the index from the interesting vector in consecutive elements.
17366 if (IsLittleEndian) {
17367 // Example (even elements from first vector):
17368 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17369 if (Mask[0] < NumElts)
17370 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17371 if (ShuffV[i] < 0)
17372 continue;
17373 // If element from non-splat is undef, pick first element from splat.
17374 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17375 }
17376 // Example (odd elements from first vector):
17377 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17378 else
17379 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17380 if (ShuffV[i] < 0)
17381 continue;
17382 // If element from non-splat is undef, pick first element from splat.
17383 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17384 }
17385 } else {
17386 // Example (even elements from first vector):
17387 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17388 if (Mask[0] < NumElts)
17389 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17390 if (ShuffV[i] < 0)
17391 continue;
17392 // If element from non-splat is undef, pick first element from splat.
17393 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17394 }
17395 // Example (odd elements from first vector):
17396 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17397 else
17398 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17399 if (ShuffV[i] < 0)
17400 continue;
17401 // If element from non-splat is undef, pick first element from splat.
17402 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17403 }
17404 }
17405
17406 // If the RHS has undefs, we need to remove them since we may have created
17407 // a shuffle that adds those instead of the splat value.
17408 SDValue SplatVal =
17409 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
17410 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17411
17412 if (IsLittleEndian)
17413 RHS = TheSplat;
17414 else
17415 LHS = TheSplat;
17416 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17417}
17418
17419SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17420 LSBaseSDNode *LSBase,
17421 DAGCombinerInfo &DCI) const {
17422 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17423 "Not a reverse memop pattern!");
17424
17425 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17426 auto Mask = SVN->getMask();
17427 int i = 0;
17428 auto I = Mask.rbegin();
17429 auto E = Mask.rend();
17430
17431 for (; I != E; ++I) {
17432 if (*I != i)
17433 return false;
17434 i++;
17435 }
17436 return true;
17437 };
17438
17439 SelectionDAG &DAG = DCI.DAG;
17440 EVT VT = SVN->getValueType(0);
17441
17442 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17443 return SDValue();
17444
17445 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17446 // See comment in PPCVSXSwapRemoval.cpp.
17447 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17448 if (!Subtarget.hasP9Vector())
17449 return SDValue();
17450
17451 if(!IsElementReverse(SVN))
17452 return SDValue();
17453
17454 if (LSBase->getOpcode() == ISD::LOAD) {
17455 // If the load return value 0 has more than one user except the
17456 // shufflevector instruction, it is not profitable to replace the
17457 // shufflevector with a reverse load.
17458 for (SDUse &Use : LSBase->uses())
17459 if (Use.getResNo() == 0 &&
17460 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17461 return SDValue();
17462
17463 SDLoc dl(LSBase);
17464 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17465 return DAG.getMemIntrinsicNode(
17466 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17467 LSBase->getMemoryVT(), LSBase->getMemOperand());
17468 }
17469
17470 if (LSBase->getOpcode() == ISD::STORE) {
17471 // If there are other uses of the shuffle, the swap cannot be avoided.
17472 // Forcing the use of an X-Form (since swapped stores only have
17473 // X-Forms) without removing the swap is unprofitable.
17474 if (!SVN->hasOneUse())
17475 return SDValue();
17476
17477 SDLoc dl(LSBase);
17478 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17479 LSBase->getBasePtr()};
17480 return DAG.getMemIntrinsicNode(
17481 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17482 LSBase->getMemoryVT(), LSBase->getMemOperand());
17483 }
17484
17485 llvm_unreachable("Expected a load or store node here");
17486}
17487
17488static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17489 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17490 if (IntrinsicID == Intrinsic::ppc_stdcx)
17491 StoreWidth = 8;
17492 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17493 StoreWidth = 4;
17494 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17495 StoreWidth = 2;
17496 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17497 StoreWidth = 1;
17498 else
17499 return false;
17500 return true;
17501}
17502
17505 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17506 // (ADDC (ADDE 0, 0, C), -1) -> C
17507 SDValue LHS = N->getOperand(0);
17508 SDValue RHS = N->getOperand(1);
17509 if (LHS->getOpcode() == PPCISD::ADDE &&
17510 isNullConstant(LHS->getOperand(0)) &&
17511 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17512 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17513 }
17514 }
17515 return SDValue();
17516}
17517
17518/// Optimize the bitfloor(X) pattern for PowerPC.
17519/// Transforms: select_cc X, 0, 0, (srl MinSignedValue, (ctlz X)), seteq
17520/// Into: srl MinSignedValue, (ctlz X)
17521///
17522/// This is safe on PowerPC because the srw instruction returns 0 when the
17523/// shift amount is == bitwidth, which matches the behavior we need for X=0.
17525 if (N->getOpcode() != ISD::SELECT_CC)
17526 return SDValue();
17527
17528 // SELECT_CC operands: LHS, RHS, TrueVal, FalseVal, CC
17529 SDValue CmpLHS = N->getOperand(0);
17530 SDValue CmpRHS = N->getOperand(1);
17531 SDValue TrueVal = N->getOperand(2);
17532 SDValue FalseVal = N->getOperand(3);
17533 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
17534
17535 // Check if condition is (X == 0)
17536 if (CC != ISD::SETEQ || !isNullConstant(CmpRHS))
17537 return SDValue();
17538
17539 // Check if TrueVal is constant 0
17540 if (!isNullConstant(TrueVal))
17541 return SDValue();
17542
17543 // This combine is replacing a select_cc with a PPC srl, not an srl with a
17544 // PPC srl. If the original srl had multiple uses it would just remain in the
17545 // code. This is at most a performance consideration.
17546 if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
17547 return SDValue();
17548
17549 SDValue ShiftVal = FalseVal.getOperand(0);
17550 SDValue ShiftAmt = FalseVal.getOperand(1);
17551
17552 // Check if ShiftVal is MinSignedValue
17553 auto *ShiftConst = dyn_cast<ConstantSDNode>(ShiftVal);
17554 if (!ShiftConst || !ShiftConst->getAPIntValue().isMinSignedValue())
17555 return SDValue();
17556
17557 SDValue CtlzArg;
17558 // Check if ShiftAmt is (ctlz CmpLHS) or (truncate (ctlz ...))
17559 if (ShiftAmt.getOpcode() != ISD::CTLZ) {
17560 // Look through truncate if present (for i64 ctlz truncated to i32 shift
17561 // amount)
17562 if (ShiftAmt.getOpcode() != ISD::TRUNCATE)
17563 return SDValue();
17564
17565 // Verify the truncate target type is appropriate for shift amount (i32, not
17566 // i1 or other)
17567 if (ShiftAmt.getValueType() != MVT::i32)
17568 return SDValue();
17569
17570 SDValue CtlzNode = ShiftAmt.getOperand(0);
17571
17572 if (CtlzNode.getOpcode() != ISD::CTLZ)
17573 return SDValue();
17574
17575 CtlzArg = CtlzNode.getOperand(0);
17576 } else {
17577 CtlzArg = ShiftAmt.getOperand(0);
17578 }
17579
17580 // Check if ctlz operates on the same value as the comparison
17581 if (CtlzArg != CmpLHS)
17582 return SDValue();
17583
17584 // Using PPCISD::SRL to ensure well-defined behavior.
17585 // On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
17586 // which is exactly what we need for the bitfloor(0) case.
17587 SDLoc DL(N);
17588 SDValue PPCSrl =
17589 DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(), ShiftVal, ShiftAmt);
17590 return PPCSrl;
17591}
17592
17593// Optimize zero-extension of setcc when the compared value is known to be 0
17594// or 1.
17595//
17596// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17597// -> zext(xor(Value, 1)) for seteq
17598// -> zext(Value) for setne
17599//
17600// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17601// by keeping the value in its original i32 type throughout.
17602//
17603// Example:
17604// Before: zext(setcc(test_data_class(...), 0, seteq))
17605// // test_data_class returns 0 or 1 in i32
17606// // setcc converts i32 -> i1
17607// // zext converts i1 -> i64
17608// After: zext(xor(test_data_class(...), 1))
17609// // Stays in i32, then extends to i64
17610//
17611// This is beneficial because:
17612// 1. Eliminates the setcc instruction
17613// 2. Avoids i32 -> i1 truncation
17614// 3. Keeps computation in native integer width
17615
17617 // Check if this is a zero_extend
17618 if (N->getOpcode() != ISD::ZERO_EXTEND)
17619 return SDValue();
17620
17621 SDValue Src = N->getOperand(0);
17622
17623 // Check if the source is a setcc
17624 if (Src.getOpcode() != ISD::SETCC)
17625 return SDValue();
17626
17627 SDValue LHS = Src.getOperand(0);
17628 SDValue RHS = Src.getOperand(1);
17629 ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
17630
17632 return SDValue();
17633
17634 SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS;
17635
17636 auto isZeroOrOne = [=](SDValue &V) {
17637 if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17638 V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
17639 return true;
17640 return false;
17641 };
17642
17643 if (!isZeroOrOne(NonNullConstant))
17644 return SDValue();
17645
17646 // Check for pattern: zext(setcc (Value), 0, seteq)) or
17647 // zext(setcc (Value), 0, setne))
17648 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
17649 // Replace with: zext(xor(Value, 1)) for seteq
17650 // or: zext(Value) for setne
17651 // This keeps the value in i32 instead of converting to i1
17652 SDLoc DL(N);
17653 EVT VType = N->getValueType(0);
17654 SDValue NewNonNullConstant = DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
17655
17656 if (CC == ISD::SETNE)
17657 return NewNonNullConstant;
17658
17659 SDValue One = DAG.getConstant(1, DL, VType);
17660 return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
17661 }
17662
17663 return SDValue();
17664}
17665
17666// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17667// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17668// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17669// 1, cc))
17670// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17671// 0, 1, cc))
17672// 4. etc
17674 assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17675
17676 EVT XorVT = N->getValueType(0);
17677 if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17678 return SDValue();
17679
17680 SDValue LHS = N->getOperand(0);
17681 SDValue RHS = N->getOperand(1);
17682
17683 // Check for XOR with constant 1
17685 if (!XorConst || !XorConst->isOne()) {
17686 XorConst = dyn_cast<ConstantSDNode>(LHS);
17687 if (!XorConst || !XorConst->isOne())
17688 return SDValue();
17689 // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17690 std::swap(LHS, RHS);
17691 }
17692
17693 // Check if LHS has only one use
17694 if (!LHS.hasOneUse())
17695 return SDValue();
17696
17697 // Handle extensions: ZEXT, ANYEXT
17698 SDValue SelectNode = LHS;
17699
17700 if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
17701 LHS.getOpcode() == ISD::ANY_EXTEND) {
17702 SelectNode = LHS.getOperand(0);
17703
17704 // Check if the extension input has only one use
17705 if (!SelectNode.hasOneUse())
17706 return SDValue();
17707 }
17708
17709 // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17710 if (!SelectNode.isMachineOpcode())
17711 return SDValue();
17712
17713 unsigned MachineOpc = SelectNode.getMachineOpcode();
17714
17715 // Handle both SELECT_CC_I4 and SELECT_CC_I8
17716 if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17717 return SDValue();
17718
17719 // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17720 if (SelectNode.getNumOperands() != 4)
17721 return SDValue();
17722
17723 ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
17724 ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
17725
17726 if (!ConstOp1 || !ConstOp2)
17727 return SDValue();
17728
17729 // Only optimize if operands are {0, 1} or {1, 0}
17730 if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
17731 (ConstOp1->isZero() && ConstOp2->isOne())))
17732 return SDValue();
17733
17734 // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17735 // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17736 // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17737 // create SELECT_CC(cond, 1, 0, pred).
17738 SDLoc DL(N);
17739 MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17740
17741 bool ConstOp1IsOne = ConstOp1->isOne();
17742 return SDValue(
17743 DAG.getMachineNode(MachineOpc, DL, XorVT,
17744 {SelectNode.getOperand(0),
17745 DAG.getConstant(ConstOp1IsOne ? 0 : 1, DL, XorVT),
17746 DAG.getConstant(ConstOp1IsOne ? 1 : 0, DL, XorVT),
17747 SelectNode.getOperand(3)}),
17748 0);
17749}
17750
17752 DAGCombinerInfo &DCI) const {
17753 SelectionDAG &DAG = DCI.DAG;
17754 SDLoc dl(N);
17755 switch (N->getOpcode()) {
17756 default: break;
17757 case ISD::ADD:
17758 return combineADD(N, DCI);
17759 case ISD::AND: {
17760 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17761 // original input as that will prevent us from selecting optimal rotates.
17762 // This only matters if the input to the extend is i32 widened to i64.
17763 SDValue Op1 = N->getOperand(0);
17764 SDValue Op2 = N->getOperand(1);
17765 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17766 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17767 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17768 Op1.getOperand(0).getValueType() != MVT::i32)
17769 break;
17770 SDValue NarrowOp = Op1.getOperand(0);
17771 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17772 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17773 break;
17774
17775 uint64_t Imm = Op2->getAsZExtVal();
17776 // Make sure that the constant is narrow enough to fit in the narrow type.
17777 if (!isUInt<32>(Imm))
17778 break;
17779 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17780 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17781 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17782 }
17783 case ISD::XOR: {
17784 // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17785 if (SDValue V = combineXorSelectCC(N, DAG))
17786 return V;
17787 break;
17788 }
17789 case ISD::SHL:
17790 return combineSHL(N, DCI);
17791 case ISD::SRA:
17792 return combineSRA(N, DCI);
17793 case ISD::SRL:
17794 return combineSRL(N, DCI);
17795 case ISD::MUL:
17796 return combineMUL(N, DCI);
17797 case ISD::FMA:
17798 case PPCISD::FNMSUB:
17799 return combineFMALike(N, DCI);
17800 case PPCISD::SHL:
17801 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17802 return N->getOperand(0);
17803 break;
17804 case PPCISD::SRL:
17805 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17806 return N->getOperand(0);
17807 break;
17808 case PPCISD::SRA:
17809 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17810 if (C->isZero() || // 0 >>s V -> 0.
17811 C->isAllOnes()) // -1 >>s V -> -1.
17812 return N->getOperand(0);
17813 }
17814 break;
17815 case ISD::SIGN_EXTEND:
17816 if (SDValue SECC = combineSignExtendSetCC(N, DCI))
17817 return SECC;
17818 [[fallthrough]];
17819 case ISD::ZERO_EXTEND:
17820 if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
17821 return RetV;
17822 [[fallthrough]];
17823 case ISD::ANY_EXTEND:
17824 return DAGCombineExtBoolTrunc(N, DCI);
17825 case ISD::TRUNCATE:
17826 return combineTRUNCATE(N, DCI);
17827 case ISD::SETCC:
17828 if (SDValue CSCC = combineSetCC(N, DCI))
17829 return CSCC;
17830 [[fallthrough]];
17831 case ISD::SELECT_CC:
17832 if (SDValue V = combineSELECT_CCBitFloor(N, DAG))
17833 return V;
17834 return DAGCombineTruncBoolExt(N, DCI);
17835 case ISD::SINT_TO_FP:
17836 case ISD::UINT_TO_FP:
17837 return combineFPToIntToFP(N, DCI);
17839 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17840 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17841 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17842 }
17843 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17844 case ISD::STORE: {
17845
17846 EVT Op1VT = N->getOperand(1).getValueType();
17847 unsigned Opcode = N->getOperand(1).getOpcode();
17848
17849 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17850 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17851 SDValue Val = combineStoreFPToInt(N, DCI);
17852 if (Val)
17853 return Val;
17854 }
17855
17856 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17857 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17858 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17859 if (Val)
17860 return Val;
17861 }
17862
17863 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17864 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17865 N->getOperand(1).getNode()->hasOneUse() &&
17866 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17867 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17868
17869 // STBRX can only handle simple types and it makes no sense to store less
17870 // two bytes in byte-reversed order.
17871 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17872 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17873 break;
17874
17875 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17876 // Do an any-extend to 32-bits if this is a half-word input.
17877 if (BSwapOp.getValueType() == MVT::i16)
17878 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17879
17880 // If the type of BSWAP operand is wider than stored memory width
17881 // it need to be shifted to the right side before STBRX.
17882 if (Op1VT.bitsGT(mVT)) {
17883 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17884 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17885 DAG.getConstant(Shift, dl, MVT::i32));
17886 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17887 if (Op1VT == MVT::i64)
17888 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17889 }
17890
17891 SDValue Ops[] = {
17892 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17893 };
17894 return
17895 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17896 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17897 cast<StoreSDNode>(N)->getMemOperand());
17898 }
17899
17900 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17901 // So it can increase the chance of CSE constant construction.
17902 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17903 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17904 // Need to sign-extended to 64-bits to handle negative values.
17905 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17906 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17907 MemVT.getSizeInBits());
17908 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17909
17910 auto *ST = cast<StoreSDNode>(N);
17911 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17912 ST->getBasePtr(), ST->getOffset(), MemVT,
17913 ST->getMemOperand(), ST->getAddressingMode(),
17914 /*IsTruncating=*/true);
17915 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17916 // new store which will change the constant by removing non-demanded bits.
17917 return ST->isUnindexed()
17918 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17919 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17920 }
17921
17922 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17923 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17924 if (Op1VT.isSimple()) {
17925 MVT StoreVT = Op1VT.getSimpleVT();
17926 if (Subtarget.needsSwapsForVSXMemOps() &&
17927 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17928 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17929 return expandVSXStoreForLE(N, DCI);
17930 }
17931 break;
17932 }
17933 case ISD::LOAD: {
17935 EVT VT = LD->getValueType(0);
17936
17937 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17938 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17939 if (VT.isSimple()) {
17940 MVT LoadVT = VT.getSimpleVT();
17941 if (Subtarget.needsSwapsForVSXMemOps() &&
17942 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17943 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17944 return expandVSXLoadForLE(N, DCI);
17945 }
17946
17947 // We sometimes end up with a 64-bit integer load, from which we extract
17948 // two single-precision floating-point numbers. This happens with
17949 // std::complex<float>, and other similar structures, because of the way we
17950 // canonicalize structure copies. However, if we lack direct moves,
17951 // then the final bitcasts from the extracted integer values to the
17952 // floating-point numbers turn into store/load pairs. Even with direct moves,
17953 // just loading the two floating-point numbers is likely better.
17954 auto ReplaceTwoFloatLoad = [&]() {
17955 if (VT != MVT::i64)
17956 return false;
17957
17958 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17959 LD->isVolatile())
17960 return false;
17961
17962 // We're looking for a sequence like this:
17963 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17964 // t16: i64 = srl t13, Constant:i32<32>
17965 // t17: i32 = truncate t16
17966 // t18: f32 = bitcast t17
17967 // t19: i32 = truncate t13
17968 // t20: f32 = bitcast t19
17969
17970 if (!LD->hasNUsesOfValue(2, 0))
17971 return false;
17972
17973 auto UI = LD->user_begin();
17974 while (UI.getUse().getResNo() != 0) ++UI;
17975 SDNode *Trunc = *UI++;
17976 while (UI.getUse().getResNo() != 0) ++UI;
17977 SDNode *RightShift = *UI;
17978 if (Trunc->getOpcode() != ISD::TRUNCATE)
17979 std::swap(Trunc, RightShift);
17980
17981 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17982 Trunc->getValueType(0) != MVT::i32 ||
17983 !Trunc->hasOneUse())
17984 return false;
17985 if (RightShift->getOpcode() != ISD::SRL ||
17986 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17987 RightShift->getConstantOperandVal(1) != 32 ||
17988 !RightShift->hasOneUse())
17989 return false;
17990
17991 SDNode *Trunc2 = *RightShift->user_begin();
17992 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17993 Trunc2->getValueType(0) != MVT::i32 ||
17994 !Trunc2->hasOneUse())
17995 return false;
17996
17997 SDNode *Bitcast = *Trunc->user_begin();
17998 SDNode *Bitcast2 = *Trunc2->user_begin();
17999
18000 if (Bitcast->getOpcode() != ISD::BITCAST ||
18001 Bitcast->getValueType(0) != MVT::f32)
18002 return false;
18003 if (Bitcast2->getOpcode() != ISD::BITCAST ||
18004 Bitcast2->getValueType(0) != MVT::f32)
18005 return false;
18006
18007 if (Subtarget.isLittleEndian())
18008 std::swap(Bitcast, Bitcast2);
18009
18010 // Bitcast has the second float (in memory-layout order) and Bitcast2
18011 // has the first one.
18012
18013 SDValue BasePtr = LD->getBasePtr();
18014 if (LD->isIndexed()) {
18015 assert(LD->getAddressingMode() == ISD::PRE_INC &&
18016 "Non-pre-inc AM on PPC?");
18017 BasePtr =
18018 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
18019 LD->getOffset());
18020 }
18021
18022 auto MMOFlags =
18023 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
18024 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
18025 LD->getPointerInfo(), LD->getAlign(),
18026 MMOFlags, LD->getAAInfo());
18027 SDValue AddPtr =
18028 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
18029 BasePtr, DAG.getIntPtrConstant(4, dl));
18030 SDValue FloatLoad2 = DAG.getLoad(
18031 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
18032 LD->getPointerInfo().getWithOffset(4),
18033 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
18034
18035 if (LD->isIndexed()) {
18036 // Note that DAGCombine should re-form any pre-increment load(s) from
18037 // what is produced here if that makes sense.
18038 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
18039 }
18040
18041 DCI.CombineTo(Bitcast2, FloatLoad);
18042 DCI.CombineTo(Bitcast, FloatLoad2);
18043
18044 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
18045 SDValue(FloatLoad2.getNode(), 1));
18046 return true;
18047 };
18048
18049 if (ReplaceTwoFloatLoad())
18050 return SDValue(N, 0);
18051
18052 EVT MemVT = LD->getMemoryVT();
18053 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
18054 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
18055 if (LD->isUnindexed() && VT.isVector() &&
18056 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
18057 // P8 and later hardware should just use LOAD.
18058 !Subtarget.hasP8Vector() &&
18059 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
18060 VT == MVT::v4f32))) &&
18061 LD->getAlign() < ABIAlignment) {
18062 // This is a type-legal unaligned Altivec load.
18063 SDValue Chain = LD->getChain();
18064 SDValue Ptr = LD->getBasePtr();
18065 bool isLittleEndian = Subtarget.isLittleEndian();
18066
18067 // This implements the loading of unaligned vectors as described in
18068 // the venerable Apple Velocity Engine overview. Specifically:
18069 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
18070 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
18071 //
18072 // The general idea is to expand a sequence of one or more unaligned
18073 // loads into an alignment-based permutation-control instruction (lvsl
18074 // or lvsr), a series of regular vector loads (which always truncate
18075 // their input address to an aligned address), and a series of
18076 // permutations. The results of these permutations are the requested
18077 // loaded values. The trick is that the last "extra" load is not taken
18078 // from the address you might suspect (sizeof(vector) bytes after the
18079 // last requested load), but rather sizeof(vector) - 1 bytes after the
18080 // last requested vector. The point of this is to avoid a page fault if
18081 // the base address happened to be aligned. This works because if the
18082 // base address is aligned, then adding less than a full vector length
18083 // will cause the last vector in the sequence to be (re)loaded.
18084 // Otherwise, the next vector will be fetched as you might suspect was
18085 // necessary.
18086
18087 // We might be able to reuse the permutation generation from
18088 // a different base address offset from this one by an aligned amount.
18089 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
18090 // optimization later.
18091 Intrinsic::ID Intr, IntrLD, IntrPerm;
18092 MVT PermCntlTy, PermTy, LDTy;
18093 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
18094 : Intrinsic::ppc_altivec_lvsl;
18095 IntrLD = Intrinsic::ppc_altivec_lvx;
18096 IntrPerm = Intrinsic::ppc_altivec_vperm;
18097 PermCntlTy = MVT::v16i8;
18098 PermTy = MVT::v4i32;
18099 LDTy = MVT::v4i32;
18100
18101 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
18102
18103 // Create the new MMO for the new base load. It is like the original MMO,
18104 // but represents an area in memory almost twice the vector size centered
18105 // on the original address. If the address is unaligned, we might start
18106 // reading up to (sizeof(vector)-1) bytes below the address of the
18107 // original unaligned load.
18109 MachineMemOperand *BaseMMO =
18110 MF.getMachineMemOperand(LD->getMemOperand(),
18111 -(int64_t)MemVT.getStoreSize()+1,
18112 2*MemVT.getStoreSize()-1);
18113
18114 // Create the new base load.
18115 SDValue LDXIntID =
18116 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
18117 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
18118 SDValue BaseLoad =
18120 DAG.getVTList(PermTy, MVT::Other),
18121 BaseLoadOps, LDTy, BaseMMO);
18122
18123 // Note that the value of IncOffset (which is provided to the next
18124 // load's pointer info offset value, and thus used to calculate the
18125 // alignment), and the value of IncValue (which is actually used to
18126 // increment the pointer value) are different! This is because we
18127 // require the next load to appear to be aligned, even though it
18128 // is actually offset from the base pointer by a lesser amount.
18129 int IncOffset = VT.getSizeInBits() / 8;
18130 int IncValue = IncOffset;
18131
18132 // Walk (both up and down) the chain looking for another load at the real
18133 // (aligned) offset (the alignment of the other load does not matter in
18134 // this case). If found, then do not use the offset reduction trick, as
18135 // that will prevent the loads from being later combined (as they would
18136 // otherwise be duplicates).
18137 if (!findConsecutiveLoad(LD, DAG))
18138 --IncValue;
18139
18141 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
18142 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
18143
18144 MachineMemOperand *ExtraMMO =
18145 MF.getMachineMemOperand(LD->getMemOperand(),
18146 1, 2*MemVT.getStoreSize()-1);
18147 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
18148 SDValue ExtraLoad =
18150 DAG.getVTList(PermTy, MVT::Other),
18151 ExtraLoadOps, LDTy, ExtraMMO);
18152
18153 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18154 BaseLoad.getValue(1), ExtraLoad.getValue(1));
18155
18156 // Because vperm has a big-endian bias, we must reverse the order
18157 // of the input vectors and complement the permute control vector
18158 // when generating little endian code. We have already handled the
18159 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
18160 // and ExtraLoad here.
18161 SDValue Perm;
18162 if (isLittleEndian)
18163 Perm = BuildIntrinsicOp(IntrPerm,
18164 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
18165 else
18166 Perm = BuildIntrinsicOp(IntrPerm,
18167 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
18168
18169 if (VT != PermTy)
18170 Perm = Subtarget.hasAltivec()
18171 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
18172 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
18173 DAG.getTargetConstant(1, dl, MVT::i64));
18174 // second argument is 1 because this rounding
18175 // is always exact.
18176
18177 // The output of the permutation is our loaded result, the TokenFactor is
18178 // our new chain.
18179 DCI.CombineTo(N, Perm, TF);
18180 return SDValue(N, 0);
18181 }
18182 }
18183 break;
18185 bool isLittleEndian = Subtarget.isLittleEndian();
18186 unsigned IID = N->getConstantOperandVal(0);
18187 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
18188 : Intrinsic::ppc_altivec_lvsl);
18189 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
18190 SDValue Add = N->getOperand(1);
18191
18192 int Bits = 4 /* 16 byte alignment */;
18193
18194 if (DAG.MaskedValueIsZero(Add->getOperand(1),
18195 APInt::getAllOnes(Bits /* alignment */)
18196 .zext(Add.getScalarValueSizeInBits()))) {
18197 SDNode *BasePtr = Add->getOperand(0).getNode();
18198 for (SDNode *U : BasePtr->users()) {
18199 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18200 U->getConstantOperandVal(0) == IID) {
18201 // We've found another LVSL/LVSR, and this address is an aligned
18202 // multiple of that one. The results will be the same, so use the
18203 // one we've just found instead.
18204
18205 return SDValue(U, 0);
18206 }
18207 }
18208 }
18209
18210 if (isa<ConstantSDNode>(Add->getOperand(1))) {
18211 SDNode *BasePtr = Add->getOperand(0).getNode();
18212 for (SDNode *U : BasePtr->users()) {
18213 if (U->getOpcode() == ISD::ADD &&
18214 isa<ConstantSDNode>(U->getOperand(1)) &&
18215 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
18216 (1ULL << Bits) ==
18217 0) {
18218 SDNode *OtherAdd = U;
18219 for (SDNode *V : OtherAdd->users()) {
18220 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18221 V->getConstantOperandVal(0) == IID) {
18222 return SDValue(V, 0);
18223 }
18224 }
18225 }
18226 }
18227 }
18228 }
18229
18230 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
18231 // Expose the vabsduw/h/b opportunity for down stream
18232 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
18233 (IID == Intrinsic::ppc_altivec_vmaxsw ||
18234 IID == Intrinsic::ppc_altivec_vmaxsh ||
18235 IID == Intrinsic::ppc_altivec_vmaxsb)) {
18236 SDValue V1 = N->getOperand(1);
18237 SDValue V2 = N->getOperand(2);
18238 if ((V1.getSimpleValueType() == MVT::v4i32 ||
18239 V1.getSimpleValueType() == MVT::v8i16 ||
18240 V1.getSimpleValueType() == MVT::v16i8) &&
18241 V1.getSimpleValueType() == V2.getSimpleValueType()) {
18242 // (0-a, a)
18243 if (V1.getOpcode() == ISD::SUB &&
18244 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
18245 V1.getOperand(1) == V2) {
18246 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
18247 }
18248 // (a, 0-a)
18249 if (V2.getOpcode() == ISD::SUB &&
18251 V2.getOperand(1) == V1) {
18252 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
18253 }
18254 // (x-y, y-x)
18255 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
18256 V1.getOperand(0) == V2.getOperand(1) &&
18257 V1.getOperand(1) == V2.getOperand(0)) {
18258 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
18259 }
18260 }
18261 }
18262 }
18263
18264 break;
18266 switch (N->getConstantOperandVal(1)) {
18267 default:
18268 break;
18269 case Intrinsic::ppc_altivec_vsum4sbs:
18270 case Intrinsic::ppc_altivec_vsum4shs:
18271 case Intrinsic::ppc_altivec_vsum4ubs: {
18272 // These sum-across intrinsics only have a chain due to the side effect
18273 // that they may set the SAT bit. If we know the SAT bit will not be set
18274 // for some inputs, we can replace any uses of their chain with the
18275 // input chain.
18276 if (BuildVectorSDNode *BVN =
18277 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
18278 APInt APSplatBits, APSplatUndef;
18279 unsigned SplatBitSize;
18280 bool HasAnyUndefs;
18281 bool BVNIsConstantSplat = BVN->isConstantSplat(
18282 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
18283 !Subtarget.isLittleEndian());
18284 // If the constant splat vector is 0, the SAT bit will not be set.
18285 if (BVNIsConstantSplat && APSplatBits == 0)
18286 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
18287 }
18288 return SDValue();
18289 }
18290 case Intrinsic::ppc_vsx_lxvw4x:
18291 case Intrinsic::ppc_vsx_lxvd2x:
18292 // For little endian, VSX loads require generating lxvd2x/xxswapd.
18293 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
18294 if (Subtarget.needsSwapsForVSXMemOps())
18295 return expandVSXLoadForLE(N, DCI);
18296 break;
18297 }
18298 break;
18300 // For little endian, VSX stores require generating xxswapd/stxvd2x.
18301 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
18302 if (Subtarget.needsSwapsForVSXMemOps()) {
18303 switch (N->getConstantOperandVal(1)) {
18304 default:
18305 break;
18306 case Intrinsic::ppc_vsx_stxvw4x:
18307 case Intrinsic::ppc_vsx_stxvd2x:
18308 return expandVSXStoreForLE(N, DCI);
18309 }
18310 }
18311 break;
18312 case ISD::BSWAP: {
18313 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
18314 // For subtargets without LDBRX, we can still do better than the default
18315 // expansion even for 64-bit BSWAP (LOAD).
18316 bool Is64BitBswapOn64BitTgt =
18317 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
18318 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
18319 N->getOperand(0).hasOneUse();
18320 if (IsSingleUseNormalLd &&
18321 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
18322 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
18323 SDValue Load = N->getOperand(0);
18324 LoadSDNode *LD = cast<LoadSDNode>(Load);
18325 // Create the byte-swapping load.
18326 SDValue Ops[] = {
18327 LD->getChain(), // Chain
18328 LD->getBasePtr(), // Ptr
18329 DAG.getValueType(N->getValueType(0)) // VT
18330 };
18331 SDValue BSLoad =
18332 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
18333 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
18334 MVT::i64 : MVT::i32, MVT::Other),
18335 Ops, LD->getMemoryVT(), LD->getMemOperand());
18336
18337 // If this is an i16 load, insert the truncate.
18338 SDValue ResVal = BSLoad;
18339 if (N->getValueType(0) == MVT::i16)
18340 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
18341
18342 // First, combine the bswap away. This makes the value produced by the
18343 // load dead.
18344 DCI.CombineTo(N, ResVal);
18345
18346 // Next, combine the load away, we give it a bogus result value but a real
18347 // chain result. The result value is dead because the bswap is dead.
18348 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
18349
18350 // Return N so it doesn't get rechecked!
18351 return SDValue(N, 0);
18352 }
18353 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18354 // before legalization so that the BUILD_PAIR is handled correctly.
18355 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
18356 !IsSingleUseNormalLd)
18357 return SDValue();
18358 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
18359
18360 // Can't split volatile or atomic loads.
18361 if (!LD->isSimple())
18362 return SDValue();
18363 SDValue BasePtr = LD->getBasePtr();
18364 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
18365 LD->getPointerInfo(), LD->getAlign());
18366 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
18367 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
18368 DAG.getIntPtrConstant(4, dl));
18370 LD->getMemOperand(), 4, 4);
18371 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
18372 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
18373 SDValue Res;
18374 if (Subtarget.isLittleEndian())
18375 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
18376 else
18377 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
18378 SDValue TF =
18379 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18380 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
18381 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
18382 return Res;
18383 }
18384 case PPCISD::VCMP:
18385 // If a VCMP_rec node already exists with exactly the same operands as this
18386 // node, use its result instead of this node (VCMP_rec computes both a CR6
18387 // and a normal output).
18388 //
18389 if (!N->getOperand(0).hasOneUse() &&
18390 !N->getOperand(1).hasOneUse() &&
18391 !N->getOperand(2).hasOneUse()) {
18392
18393 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
18394 SDNode *VCMPrecNode = nullptr;
18395
18396 SDNode *LHSN = N->getOperand(0).getNode();
18397 for (SDNode *User : LHSN->users())
18398 if (User->getOpcode() == PPCISD::VCMP_rec &&
18399 User->getOperand(1) == N->getOperand(1) &&
18400 User->getOperand(2) == N->getOperand(2) &&
18401 User->getOperand(0) == N->getOperand(0)) {
18402 VCMPrecNode = User;
18403 break;
18404 }
18405
18406 // If there is no VCMP_rec node, or if the flag value has a single use,
18407 // don't transform this.
18408 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
18409 break;
18410
18411 // Look at the (necessarily single) use of the flag value. If it has a
18412 // chain, this transformation is more complex. Note that multiple things
18413 // could use the value result, which we should ignore.
18414 SDNode *FlagUser = nullptr;
18415 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18416 FlagUser == nullptr; ++UI) {
18417 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18418 SDNode *User = UI->getUser();
18419 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
18420 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
18421 FlagUser = User;
18422 break;
18423 }
18424 }
18425 }
18426
18427 // If the user is a MFOCRF instruction, we know this is safe.
18428 // Otherwise we give up for right now.
18429 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18430 return SDValue(VCMPrecNode, 0);
18431 }
18432 break;
18433 case ISD::BR_CC: {
18434 // If this is a branch on an altivec predicate comparison, lower this so
18435 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18436 // lowering is done pre-legalize, because the legalizer lowers the predicate
18437 // compare down to code that is difficult to reassemble.
18438 // This code also handles branches that depend on the result of a store
18439 // conditional.
18440 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18441 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
18442
18443 int CompareOpc;
18444 bool isDot;
18445
18446 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
18447 break;
18448
18449 // Since we are doing this pre-legalize, the RHS can be a constant of
18450 // arbitrary bitwidth which may cause issues when trying to get the value
18451 // from the underlying APInt.
18452 auto RHSAPInt = RHS->getAsAPIntVal();
18453 if (!RHSAPInt.isIntN(64))
18454 break;
18455
18456 unsigned Val = RHSAPInt.getZExtValue();
18457 auto isImpossibleCompare = [&]() {
18458 // If this is a comparison against something other than 0/1, then we know
18459 // that the condition is never/always true.
18460 if (Val != 0 && Val != 1) {
18461 if (CC == ISD::SETEQ) // Cond never true, remove branch.
18462 return N->getOperand(0);
18463 // Always !=, turn it into an unconditional branch.
18464 return DAG.getNode(ISD::BR, dl, MVT::Other,
18465 N->getOperand(0), N->getOperand(4));
18466 }
18467 return SDValue();
18468 };
18469 // Combine branches fed by store conditional instructions (st[bhwd]cx).
18470 unsigned StoreWidth = 0;
18471 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18472 isStoreConditional(LHS, StoreWidth)) {
18473 if (SDValue Impossible = isImpossibleCompare())
18474 return Impossible;
18475 PPC::Predicate CompOpc;
18476 // eq 0 => ne
18477 // ne 0 => eq
18478 // eq 1 => eq
18479 // ne 1 => ne
18480 if (Val == 0)
18481 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18482 else
18483 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18484
18485 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
18486 DAG.getConstant(StoreWidth, dl, MVT::i32)};
18487 auto *MemNode = cast<MemSDNode>(LHS);
18488 SDValue ConstSt = DAG.getMemIntrinsicNode(
18489 PPCISD::STORE_COND, dl,
18490 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
18491 MemNode->getMemoryVT(), MemNode->getMemOperand());
18492
18493 SDValue InChain;
18494 // Unchain the branch from the original store conditional.
18495 if (N->getOperand(0) == LHS.getValue(1))
18496 InChain = LHS.getOperand(0);
18497 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
18498 SmallVector<SDValue, 4> InChains;
18499 SDValue InTF = N->getOperand(0);
18500 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
18501 if (InTF.getOperand(i) != LHS.getValue(1))
18502 InChains.push_back(InTF.getOperand(i));
18503 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
18504 }
18505
18506 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
18507 DAG.getConstant(CompOpc, dl, MVT::i32),
18508 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
18509 ConstSt.getValue(2));
18510 }
18511
18512 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18513 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
18514 assert(isDot && "Can't compare against a vector result!");
18515
18516 if (SDValue Impossible = isImpossibleCompare())
18517 return Impossible;
18518
18519 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
18520 // Create the PPCISD altivec 'dot' comparison node.
18521 SDValue Ops[] = {
18522 LHS.getOperand(2), // LHS of compare
18523 LHS.getOperand(3), // RHS of compare
18524 DAG.getConstant(CompareOpc, dl, MVT::i32)
18525 };
18526 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
18527 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
18528
18529 // Unpack the result based on how the target uses it.
18530 PPC::Predicate CompOpc;
18531 switch (LHS.getConstantOperandVal(1)) {
18532 default: // Can't happen, don't crash on invalid number though.
18533 case 0: // Branch on the value of the EQ bit of CR6.
18534 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18535 break;
18536 case 1: // Branch on the inverted value of the EQ bit of CR6.
18537 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18538 break;
18539 case 2: // Branch on the value of the LT bit of CR6.
18540 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18541 break;
18542 case 3: // Branch on the inverted value of the LT bit of CR6.
18543 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18544 break;
18545 }
18546
18547 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
18548 DAG.getConstant(CompOpc, dl, MVT::i32),
18549 DAG.getRegister(PPC::CR6, MVT::i32),
18550 N->getOperand(4), CompNode.getValue(1));
18551 }
18552 break;
18553 }
18554 case ISD::BUILD_VECTOR:
18555 return DAGCombineBuildVector(N, DCI);
18556 case PPCISD::ADDC:
18557 return DAGCombineAddc(N, DCI);
18558
18559 case ISD::BITCAST:
18560 return DAGCombineBitcast(N, DCI);
18561 }
18562
18563 return SDValue();
18564}
18565
18566SDValue
18568 SelectionDAG &DAG,
18569 SmallVectorImpl<SDNode *> &Created) const {
18570 // fold (sdiv X, pow2)
18571 EVT VT = N->getValueType(0);
18572 if (VT == MVT::i64 && !Subtarget.isPPC64())
18573 return SDValue();
18574 if ((VT != MVT::i32 && VT != MVT::i64) ||
18575 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18576 return SDValue();
18577
18578 SDLoc DL(N);
18579 SDValue N0 = N->getOperand(0);
18580
18581 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18582 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18583 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
18584
18585 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
18586 Created.push_back(Op.getNode());
18587
18588 if (IsNegPow2) {
18589 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
18590 Created.push_back(Op.getNode());
18591 }
18592
18593 return Op;
18594}
18595
18596//===----------------------------------------------------------------------===//
18597// Inline Assembly Support
18598//===----------------------------------------------------------------------===//
18599
18601 KnownBits &Known,
18602 const APInt &DemandedElts,
18603 const SelectionDAG &DAG,
18604 unsigned Depth) const {
18605 Known.resetAll();
18606 switch (Op.getOpcode()) {
18607 default: break;
18608 case PPCISD::LBRX: {
18609 // lhbrx is known to have the top bits cleared out.
18610 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
18611 Known.Zero = 0xFFFF0000;
18612 break;
18613 }
18614 case PPCISD::ADDE: {
18615 if (Op.getResNo() == 0) {
18616 // (0|1), _ = ADDE 0, 0, CARRY
18617 SDValue LHS = Op.getOperand(0);
18618 SDValue RHS = Op.getOperand(1);
18619 if (isNullConstant(LHS) && isNullConstant(RHS))
18620 Known.Zero = ~1ULL;
18621 }
18622 break;
18623 }
18625 switch (Op.getConstantOperandVal(0)) {
18626 default: break;
18627 case Intrinsic::ppc_altivec_vcmpbfp_p:
18628 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18629 case Intrinsic::ppc_altivec_vcmpequb_p:
18630 case Intrinsic::ppc_altivec_vcmpequh_p:
18631 case Intrinsic::ppc_altivec_vcmpequw_p:
18632 case Intrinsic::ppc_altivec_vcmpequd_p:
18633 case Intrinsic::ppc_altivec_vcmpequq_p:
18634 case Intrinsic::ppc_altivec_vcmpgefp_p:
18635 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18636 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18637 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18638 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18639 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18640 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18641 case Intrinsic::ppc_altivec_vcmpgtub_p:
18642 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18643 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18644 case Intrinsic::ppc_altivec_vcmpgtud_p:
18645 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18646 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18647 break;
18648 }
18649 break;
18650 }
18652 switch (Op.getConstantOperandVal(1)) {
18653 default:
18654 break;
18655 case Intrinsic::ppc_load2r:
18656 // Top bits are cleared for load2r (which is the same as lhbrx).
18657 Known.Zero = 0xFFFF0000;
18658 break;
18659 }
18660 break;
18661 }
18662 }
18663}
18664
18666 switch (Subtarget.getCPUDirective()) {
18667 default: break;
18668 case PPC::DIR_970:
18669 case PPC::DIR_PWR4:
18670 case PPC::DIR_PWR5:
18671 case PPC::DIR_PWR5X:
18672 case PPC::DIR_PWR6:
18673 case PPC::DIR_PWR6X:
18674 case PPC::DIR_PWR7:
18675 case PPC::DIR_PWR8:
18676 case PPC::DIR_PWR9:
18677 case PPC::DIR_PWR10:
18678 case PPC::DIR_PWR11:
18679 case PPC::DIR_PWR_FUTURE: {
18680 if (!ML)
18681 break;
18682
18684 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18685 // so that we can decrease cache misses and branch-prediction misses.
18686 // Actual alignment of the loop will depend on the hotness check and other
18687 // logic in alignBlocks.
18688 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18689 return Align(32);
18690 }
18691
18692 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18693
18694 // For small loops (between 5 and 8 instructions), align to a 32-byte
18695 // boundary so that the entire loop fits in one instruction-cache line.
18696 uint64_t LoopSize = 0;
18697 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18698 for (const MachineInstr &J : **I) {
18699 LoopSize += TII->getInstSizeInBytes(J);
18700 if (LoopSize > 32)
18701 break;
18702 }
18703
18704 if (LoopSize > 16 && LoopSize <= 32)
18705 return Align(32);
18706
18707 break;
18708 }
18709 }
18710
18712}
18713
18714/// getConstraintType - Given a constraint, return the type of
18715/// constraint it is for this target.
18718 if (Constraint.size() == 1) {
18719 switch (Constraint[0]) {
18720 default: break;
18721 case 'b':
18722 case 'r':
18723 case 'f':
18724 case 'd':
18725 case 'v':
18726 case 'y':
18727 return C_RegisterClass;
18728 case 'Z':
18729 // FIXME: While Z does indicate a memory constraint, it specifically
18730 // indicates an r+r address (used in conjunction with the 'y' modifier
18731 // in the replacement string). Currently, we're forcing the base
18732 // register to be r0 in the asm printer (which is interpreted as zero)
18733 // and forming the complete address in the second register. This is
18734 // suboptimal.
18735 return C_Memory;
18736 }
18737 } else if (Constraint == "wc") { // individual CR bits.
18738 return C_RegisterClass;
18739 } else if (Constraint == "wa" || Constraint == "wd" ||
18740 Constraint == "wf" || Constraint == "ws" ||
18741 Constraint == "wi" || Constraint == "ww") {
18742 return C_RegisterClass; // VSX registers.
18743 }
18744 return TargetLowering::getConstraintType(Constraint);
18745}
18746
18747/// Examine constraint type and operand type and determine a weight value.
18748/// This object must already have been set up with the operand type
18749/// and the current alternative constraint selected.
18752 AsmOperandInfo &info, const char *constraint) const {
18754 Value *CallOperandVal = info.CallOperandVal;
18755 // If we don't have a value, we can't do a match,
18756 // but allow it at the lowest weight.
18757 if (!CallOperandVal)
18758 return CW_Default;
18759 Type *type = CallOperandVal->getType();
18760
18761 // Look at the constraint type.
18762 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18763 return CW_Register; // an individual CR bit.
18764 else if ((StringRef(constraint) == "wa" ||
18765 StringRef(constraint) == "wd" ||
18766 StringRef(constraint) == "wf") &&
18767 type->isVectorTy())
18768 return CW_Register;
18769 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18770 return CW_Register; // just hold 64-bit integers data.
18771 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18772 return CW_Register;
18773 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18774 return CW_Register;
18775
18776 switch (*constraint) {
18777 default:
18779 break;
18780 case 'b':
18781 if (type->isIntegerTy())
18782 weight = CW_Register;
18783 break;
18784 case 'f':
18785 if (type->isFloatTy())
18786 weight = CW_Register;
18787 break;
18788 case 'd':
18789 if (type->isDoubleTy())
18790 weight = CW_Register;
18791 break;
18792 case 'v':
18793 if (type->isVectorTy())
18794 weight = CW_Register;
18795 break;
18796 case 'y':
18797 weight = CW_Register;
18798 break;
18799 case 'Z':
18800 weight = CW_Memory;
18801 break;
18802 }
18803 return weight;
18804}
18805
18806std::pair<unsigned, const TargetRegisterClass *>
18808 StringRef Constraint,
18809 MVT VT) const {
18810 if (Constraint.size() == 1) {
18811 // GCC RS6000 Constraint Letters
18812 switch (Constraint[0]) {
18813 case 'b': // R1-R31
18814 if (VT == MVT::i64 && Subtarget.isPPC64())
18815 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18816 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18817 case 'r': // R0-R31
18818 if (VT == MVT::i64 && Subtarget.isPPC64())
18819 return std::make_pair(0U, &PPC::G8RCRegClass);
18820 return std::make_pair(0U, &PPC::GPRCRegClass);
18821 // 'd' and 'f' constraints are both defined to be "the floating point
18822 // registers", where one is for 32-bit and the other for 64-bit. We don't
18823 // really care overly much here so just give them all the same reg classes.
18824 case 'd':
18825 case 'f':
18826 if (Subtarget.hasSPE()) {
18827 if (VT == MVT::f32 || VT == MVT::i32)
18828 return std::make_pair(0U, &PPC::GPRCRegClass);
18829 if (VT == MVT::f64 || VT == MVT::i64)
18830 return std::make_pair(0U, &PPC::SPERCRegClass);
18831 } else {
18832 if (VT == MVT::f32 || VT == MVT::i32)
18833 return std::make_pair(0U, &PPC::F4RCRegClass);
18834 if (VT == MVT::f64 || VT == MVT::i64)
18835 return std::make_pair(0U, &PPC::F8RCRegClass);
18836 }
18837 break;
18838 case 'v':
18839 if (Subtarget.hasAltivec() && VT.isVector())
18840 return std::make_pair(0U, &PPC::VRRCRegClass);
18841 else if (Subtarget.hasVSX())
18842 // Scalars in Altivec registers only make sense with VSX.
18843 return std::make_pair(0U, &PPC::VFRCRegClass);
18844 break;
18845 case 'y': // crrc
18846 return std::make_pair(0U, &PPC::CRRCRegClass);
18847 }
18848 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18849 // An individual CR bit.
18850 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18851 } else if ((Constraint == "wa" || Constraint == "wd" ||
18852 Constraint == "wf" || Constraint == "wi") &&
18853 Subtarget.hasVSX()) {
18854 // A VSX register for either a scalar (FP) or vector. There is no
18855 // support for single precision scalars on subtargets prior to Power8.
18856 if (VT.isVector())
18857 return std::make_pair(0U, &PPC::VSRCRegClass);
18858 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18859 return std::make_pair(0U, &PPC::VSSRCRegClass);
18860 return std::make_pair(0U, &PPC::VSFRCRegClass);
18861 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18862 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18863 return std::make_pair(0U, &PPC::VSSRCRegClass);
18864 else
18865 return std::make_pair(0U, &PPC::VSFRCRegClass);
18866 } else if (Constraint == "lr") {
18867 if (VT == MVT::i64)
18868 return std::make_pair(0U, &PPC::LR8RCRegClass);
18869 else
18870 return std::make_pair(0U, &PPC::LRRCRegClass);
18871 }
18872
18873 // Handle special cases of physical registers that are not properly handled
18874 // by the base class.
18875 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18876 // If we name a VSX register, we can't defer to the base class because it
18877 // will not recognize the correct register (their names will be VSL{0-31}
18878 // and V{0-31} so they won't match). So we match them here.
18879 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18880 int VSNum = atoi(Constraint.data() + 3);
18881 assert(VSNum >= 0 && VSNum <= 63 &&
18882 "Attempted to access a vsr out of range");
18883 if (VSNum < 32)
18884 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18885 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18886 }
18887
18888 // For float registers, we can't defer to the base class as it will match
18889 // the SPILLTOVSRRC class.
18890 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18891 int RegNum = atoi(Constraint.data() + 2);
18892 if (RegNum > 31 || RegNum < 0)
18893 report_fatal_error("Invalid floating point register number");
18894 if (VT == MVT::f32 || VT == MVT::i32)
18895 return Subtarget.hasSPE()
18896 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18897 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18898 if (VT == MVT::f64 || VT == MVT::i64)
18899 return Subtarget.hasSPE()
18900 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18901 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18902 }
18903 }
18904
18905 std::pair<unsigned, const TargetRegisterClass *> R =
18907
18908 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18909 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18910 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18911 // register.
18912 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18913 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18914 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18915 PPC::GPRCRegClass.contains(R.first))
18916 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18917 PPC::sub_32, &PPC::G8RCRegClass),
18918 &PPC::G8RCRegClass);
18919
18920 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18921 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18922 R.first = PPC::CR0;
18923 R.second = &PPC::CRRCRegClass;
18924 }
18925 // FIXME: This warning should ideally be emitted in the front end.
18926 const auto &TM = getTargetMachine();
18927 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18928 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18929 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18930 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18931 errs() << "warning: vector registers 20 to 32 are reserved in the "
18932 "default AIX AltiVec ABI and cannot be used\n";
18933 }
18934
18935 return R;
18936}
18937
18938/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18939/// vector. If it is invalid, don't add anything to Ops.
18941 StringRef Constraint,
18942 std::vector<SDValue> &Ops,
18943 SelectionDAG &DAG) const {
18944 SDValue Result;
18945
18946 // Only support length 1 constraints.
18947 if (Constraint.size() > 1)
18948 return;
18949
18950 char Letter = Constraint[0];
18951 switch (Letter) {
18952 default: break;
18953 case 'I':
18954 case 'J':
18955 case 'K':
18956 case 'L':
18957 case 'M':
18958 case 'N':
18959 case 'O':
18960 case 'P': {
18962 if (!CST) return; // Must be an immediate to match.
18963 SDLoc dl(Op);
18964 int64_t Value = CST->getSExtValue();
18965 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18966 // numbers are printed as such.
18967 switch (Letter) {
18968 default: llvm_unreachable("Unknown constraint letter!");
18969 case 'I': // "I" is a signed 16-bit constant.
18970 if (isInt<16>(Value))
18971 Result = DAG.getTargetConstant(Value, dl, TCVT);
18972 break;
18973 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18975 Result = DAG.getTargetConstant(Value, dl, TCVT);
18976 break;
18977 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18979 Result = DAG.getTargetConstant(Value, dl, TCVT);
18980 break;
18981 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18982 if (isUInt<16>(Value))
18983 Result = DAG.getTargetConstant(Value, dl, TCVT);
18984 break;
18985 case 'M': // "M" is a constant that is greater than 31.
18986 if (Value > 31)
18987 Result = DAG.getTargetConstant(Value, dl, TCVT);
18988 break;
18989 case 'N': // "N" is a positive constant that is an exact power of two.
18990 if (Value > 0 && isPowerOf2_64(Value))
18991 Result = DAG.getTargetConstant(Value, dl, TCVT);
18992 break;
18993 case 'O': // "O" is the constant zero.
18994 if (Value == 0)
18995 Result = DAG.getTargetConstant(Value, dl, TCVT);
18996 break;
18997 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18998 if (isInt<16>(-Value))
18999 Result = DAG.getTargetConstant(Value, dl, TCVT);
19000 break;
19001 }
19002 break;
19003 }
19004 }
19005
19006 if (Result.getNode()) {
19007 Ops.push_back(Result);
19008 return;
19009 }
19010
19011 // Handle standard constraint letters.
19013}
19014
19017 SelectionDAG &DAG) const {
19018 if (I.getNumOperands() <= 1)
19019 return;
19020 if (!isa<ConstantSDNode>(Ops[1].getNode()))
19021 return;
19022 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
19023 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
19024 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
19025 return;
19026
19027 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
19028 Ops.push_back(DAG.getMDNode(MDN));
19029}
19030
19031// isLegalAddressingMode - Return true if the addressing mode represented
19032// by AM is legal for this target, for a load/store of the specified type.
19034 const AddrMode &AM, Type *Ty,
19035 unsigned AS,
19036 Instruction *I) const {
19037 // Vector type r+i form is supported since power9 as DQ form. We don't check
19038 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
19039 // imm form is preferred and the offset can be adjusted to use imm form later
19040 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
19041 // max offset to check legal addressing mode, we should be a little aggressive
19042 // to contain other offsets for that LSRUse.
19043 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
19044 return false;
19045
19046 // PPC allows a sign-extended 16-bit immediate field.
19047 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
19048 return false;
19049
19050 // No global is ever allowed as a base.
19051 if (AM.BaseGV)
19052 return false;
19053
19054 // PPC only support r+r,
19055 switch (AM.Scale) {
19056 case 0: // "r+i" or just "i", depending on HasBaseReg.
19057 break;
19058 case 1:
19059 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
19060 return false;
19061 // Otherwise we have r+r or r+i.
19062 break;
19063 case 2:
19064 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
19065 return false;
19066 // Allow 2*r as r+r.
19067 break;
19068 default:
19069 // No other scales are supported.
19070 return false;
19071 }
19072
19073 return true;
19074}
19075
19076SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
19077 SelectionDAG &DAG) const {
19079 MachineFrameInfo &MFI = MF.getFrameInfo();
19080 MFI.setReturnAddressIsTaken(true);
19081
19082 SDLoc dl(Op);
19083 unsigned Depth = Op.getConstantOperandVal(0);
19084
19085 // Make sure the function does not optimize away the store of the RA to
19086 // the stack.
19087 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
19088 FuncInfo->setLRStoreRequired();
19089 auto PtrVT = getPointerTy(MF.getDataLayout());
19090
19091 if (Depth > 0) {
19092 // The link register (return address) is saved in the caller's frame
19093 // not the callee's stack frame. So we must get the caller's frame
19094 // address and load the return address at the LR offset from there.
19095 SDValue FrameAddr =
19096 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
19098 SDValue Offset =
19099 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
19100 Subtarget.getScalarIntVT());
19101 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
19102 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
19104 }
19105
19106 // Just load the return address off the stack.
19107 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
19108 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
19110}
19111
19112SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
19113 SelectionDAG &DAG) const {
19114 SDLoc dl(Op);
19115 unsigned Depth = Op.getConstantOperandVal(0);
19116
19117 MachineFunction &MF = DAG.getMachineFunction();
19118 MachineFrameInfo &MFI = MF.getFrameInfo();
19119 MFI.setFrameAddressIsTaken(true);
19120
19121 EVT PtrVT = getPointerTy(MF.getDataLayout());
19122 bool isPPC64 = PtrVT == MVT::i64;
19123
19124 // Naked functions never have a frame pointer, and so we use r1. For all
19125 // other functions, this decision must be delayed until during PEI.
19126 unsigned FrameReg;
19127 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
19128 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
19129 else
19130 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
19131
19132 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
19133 PtrVT);
19134 while (Depth--)
19135 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
19136 FrameAddr, MachinePointerInfo());
19137 return FrameAddr;
19138}
19139
19140#define GET_REGISTER_MATCHER
19141#include "PPCGenAsmMatcher.inc"
19142
19144 const MachineFunction &MF) const {
19145 bool IsPPC64 = Subtarget.isPPC64();
19146
19147 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
19148 if (!Is64Bit && VT != LLT::scalar(32))
19149 report_fatal_error("Invalid register global variable type");
19150
19152 if (!Reg)
19153 return Reg;
19154
19155 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
19156 // Need followup investigation as to why.
19157 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
19158 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
19159 StringRef(RegName) + "\"."));
19160
19161 // Convert GPR to GP8R register for 64bit.
19162 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
19163 Reg = Reg.id() - PPC::R0 + PPC::X0;
19164
19165 return Reg;
19166}
19167
19169 // 32-bit SVR4 ABI access everything as got-indirect.
19170 if (Subtarget.is32BitELFABI())
19171 return true;
19172
19173 // AIX accesses everything indirectly through the TOC, which is similar to
19174 // the GOT.
19175 if (Subtarget.isAIXABI())
19176 return true;
19177
19179 // If it is small or large code model, module locals are accessed
19180 // indirectly by loading their address from .toc/.got.
19181 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
19182 return true;
19183
19184 // JumpTable and BlockAddress are accessed as got-indirect.
19186 return true;
19187
19189 return Subtarget.isGVIndirectSymbol(G->getGlobal());
19190
19191 return false;
19192}
19193
19194bool
19196 // The PowerPC target isn't yet aware of offsets.
19197 return false;
19198}
19199
19202 MachineFunction &MF, unsigned Intrinsic) const {
19203 IntrinsicInfo Info;
19204 switch (Intrinsic) {
19205 case Intrinsic::ppc_atomicrmw_xchg_i128:
19206 case Intrinsic::ppc_atomicrmw_add_i128:
19207 case Intrinsic::ppc_atomicrmw_sub_i128:
19208 case Intrinsic::ppc_atomicrmw_nand_i128:
19209 case Intrinsic::ppc_atomicrmw_and_i128:
19210 case Intrinsic::ppc_atomicrmw_or_i128:
19211 case Intrinsic::ppc_atomicrmw_xor_i128:
19212 case Intrinsic::ppc_cmpxchg_i128:
19213 Info.opc = ISD::INTRINSIC_W_CHAIN;
19214 Info.memVT = MVT::i128;
19215 Info.ptrVal = I.getArgOperand(0);
19216 Info.offset = 0;
19217 Info.align = Align(16);
19220 Infos.push_back(Info);
19221 return;
19222 case Intrinsic::ppc_atomic_load_i128:
19223 Info.opc = ISD::INTRINSIC_W_CHAIN;
19224 Info.memVT = MVT::i128;
19225 Info.ptrVal = I.getArgOperand(0);
19226 Info.offset = 0;
19227 Info.align = Align(16);
19229 Infos.push_back(Info);
19230 return;
19231 case Intrinsic::ppc_atomic_store_i128:
19232 Info.opc = ISD::INTRINSIC_VOID;
19233 Info.memVT = MVT::i128;
19234 Info.ptrVal = I.getArgOperand(2);
19235 Info.offset = 0;
19236 Info.align = Align(16);
19238 Infos.push_back(Info);
19239 return;
19240 case Intrinsic::ppc_altivec_lvx:
19241 case Intrinsic::ppc_altivec_lvxl:
19242 case Intrinsic::ppc_altivec_lvebx:
19243 case Intrinsic::ppc_altivec_lvehx:
19244 case Intrinsic::ppc_altivec_lvewx:
19245 case Intrinsic::ppc_vsx_lxvd2x:
19246 case Intrinsic::ppc_vsx_lxvw4x:
19247 case Intrinsic::ppc_vsx_lxvd2x_be:
19248 case Intrinsic::ppc_vsx_lxvw4x_be:
19249 case Intrinsic::ppc_vsx_lxvl:
19250 case Intrinsic::ppc_vsx_lxvll: {
19251 EVT VT;
19252 switch (Intrinsic) {
19253 case Intrinsic::ppc_altivec_lvebx:
19254 VT = MVT::i8;
19255 break;
19256 case Intrinsic::ppc_altivec_lvehx:
19257 VT = MVT::i16;
19258 break;
19259 case Intrinsic::ppc_altivec_lvewx:
19260 VT = MVT::i32;
19261 break;
19262 case Intrinsic::ppc_vsx_lxvd2x:
19263 case Intrinsic::ppc_vsx_lxvd2x_be:
19264 VT = MVT::v2f64;
19265 break;
19266 default:
19267 VT = MVT::v4i32;
19268 break;
19269 }
19270
19271 Info.opc = ISD::INTRINSIC_W_CHAIN;
19272 Info.memVT = VT;
19273 Info.ptrVal = I.getArgOperand(0);
19274 Info.offset = -VT.getStoreSize()+1;
19275 Info.size = 2*VT.getStoreSize()-1;
19276 Info.align = Align(1);
19277 Info.flags = MachineMemOperand::MOLoad;
19278 Infos.push_back(Info);
19279 return;
19280 }
19281 case Intrinsic::ppc_altivec_stvx:
19282 case Intrinsic::ppc_altivec_stvxl:
19283 case Intrinsic::ppc_altivec_stvebx:
19284 case Intrinsic::ppc_altivec_stvehx:
19285 case Intrinsic::ppc_altivec_stvewx:
19286 case Intrinsic::ppc_vsx_stxvd2x:
19287 case Intrinsic::ppc_vsx_stxvw4x:
19288 case Intrinsic::ppc_vsx_stxvd2x_be:
19289 case Intrinsic::ppc_vsx_stxvw4x_be:
19290 case Intrinsic::ppc_vsx_stxvl:
19291 case Intrinsic::ppc_vsx_stxvll: {
19292 EVT VT;
19293 switch (Intrinsic) {
19294 case Intrinsic::ppc_altivec_stvebx:
19295 VT = MVT::i8;
19296 break;
19297 case Intrinsic::ppc_altivec_stvehx:
19298 VT = MVT::i16;
19299 break;
19300 case Intrinsic::ppc_altivec_stvewx:
19301 VT = MVT::i32;
19302 break;
19303 case Intrinsic::ppc_vsx_stxvd2x:
19304 case Intrinsic::ppc_vsx_stxvd2x_be:
19305 VT = MVT::v2f64;
19306 break;
19307 default:
19308 VT = MVT::v4i32;
19309 break;
19310 }
19311
19312 Info.opc = ISD::INTRINSIC_VOID;
19313 Info.memVT = VT;
19314 Info.ptrVal = I.getArgOperand(1);
19315 Info.offset = -VT.getStoreSize()+1;
19316 Info.size = 2*VT.getStoreSize()-1;
19317 Info.align = Align(1);
19318 Info.flags = MachineMemOperand::MOStore;
19319 Infos.push_back(Info);
19320 return;
19321 }
19322 case Intrinsic::ppc_stdcx:
19323 case Intrinsic::ppc_stwcx:
19324 case Intrinsic::ppc_sthcx:
19325 case Intrinsic::ppc_stbcx: {
19326 EVT VT;
19327 auto Alignment = Align(8);
19328 switch (Intrinsic) {
19329 case Intrinsic::ppc_stdcx:
19330 VT = MVT::i64;
19331 break;
19332 case Intrinsic::ppc_stwcx:
19333 VT = MVT::i32;
19334 Alignment = Align(4);
19335 break;
19336 case Intrinsic::ppc_sthcx:
19337 VT = MVT::i16;
19338 Alignment = Align(2);
19339 break;
19340 case Intrinsic::ppc_stbcx:
19341 VT = MVT::i8;
19342 Alignment = Align(1);
19343 break;
19344 }
19345 Info.opc = ISD::INTRINSIC_W_CHAIN;
19346 Info.memVT = VT;
19347 Info.ptrVal = I.getArgOperand(0);
19348 Info.offset = 0;
19349 Info.align = Alignment;
19351 Infos.push_back(Info);
19352 return;
19353 }
19354 default:
19355 break;
19356 }
19357}
19358
19359/// It returns EVT::Other if the type should be determined using generic
19360/// target-independent logic.
19362 LLVMContext &Context, const MemOp &Op,
19363 const AttributeList &FuncAttributes) const {
19364 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19365 // We should use Altivec/VSX loads and stores when available. For unaligned
19366 // addresses, unaligned VSX loads are only fast starting with the P8.
19367 if (Subtarget.hasAltivec() && Op.size() >= 16) {
19368 if (Op.isMemset() && Subtarget.hasVSX()) {
19369 uint64_t TailSize = Op.size() % 16;
19370 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19371 // element if vector element type matches tail store. For tail size
19372 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19373 if (TailSize > 2 && TailSize <= 4) {
19374 return MVT::v8i16;
19375 }
19376 return MVT::v4i32;
19377 }
19378 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
19379 return MVT::v4i32;
19380 }
19381 }
19382
19383 if (Subtarget.isPPC64()) {
19384 return MVT::i64;
19385 }
19386
19387 return MVT::i32;
19388}
19389
19390/// Returns true if it is beneficial to convert a load of a constant
19391/// to just the constant itself.
19393 Type *Ty) const {
19394 assert(Ty->isIntegerTy());
19395
19396 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19397 return !(BitSize == 0 || BitSize > 64);
19398}
19399
19401 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19402 return false;
19403 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19404 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19405 return NumBits1 == 64 && NumBits2 == 32;
19406}
19407
19409 if (!VT1.isInteger() || !VT2.isInteger())
19410 return false;
19411 unsigned NumBits1 = VT1.getSizeInBits();
19412 unsigned NumBits2 = VT2.getSizeInBits();
19413 return NumBits1 == 64 && NumBits2 == 32;
19414}
19415
19417 // Generally speaking, zexts are not free, but they are free when they can be
19418 // folded with other operations.
19419 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19420 EVT MemVT = LD->getMemoryVT();
19421 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
19422 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19423 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
19424 LD->getExtensionType() == ISD::ZEXTLOAD))
19425 return true;
19426 }
19427
19428 // FIXME: Add other cases...
19429 // - 32-bit shifts with a zext to i64
19430 // - zext after ctlz, bswap, etc.
19431 // - zext after and by a constant mask
19432
19433 return TargetLowering::isZExtFree(Val, VT2);
19434}
19435
19436bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19437 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19438 "invalid fpext types");
19439 // Extending to float128 is not free.
19440 if (DestVT == MVT::f128)
19441 return false;
19442 return true;
19443}
19444
19446 return isInt<16>(Imm) || isUInt<16>(Imm);
19447}
19448
19450 return isInt<16>(Imm) || isUInt<16>(Imm);
19451}
19452
19455 unsigned *Fast) const {
19457 return false;
19458
19459 // PowerPC supports unaligned memory access for simple non-vector types.
19460 // Although accessing unaligned addresses is not as efficient as accessing
19461 // aligned addresses, it is generally more efficient than manual expansion,
19462 // and generally only traps for software emulation when crossing page
19463 // boundaries.
19464
19465 if (!VT.isSimple())
19466 return false;
19467
19468 if (VT.isFloatingPoint() && !VT.isVector() &&
19469 !Subtarget.allowsUnalignedFPAccess())
19470 return false;
19471
19472 if (VT.getSimpleVT().isVector()) {
19473 if (Subtarget.hasVSX()) {
19474 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19475 VT != MVT::v4f32 && VT != MVT::v4i32)
19476 return false;
19477 } else {
19478 return false;
19479 }
19480 }
19481
19482 if (VT == MVT::ppcf128)
19483 return false;
19484
19485 if (Fast)
19486 *Fast = 1;
19487
19488 return true;
19489}
19490
19492 SDValue C) const {
19493 // Check integral scalar types.
19494 if (!VT.isScalarInteger())
19495 return false;
19496 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
19497 if (!ConstNode->getAPIntValue().isSignedIntN(64))
19498 return false;
19499 // This transformation will generate >= 2 operations. But the following
19500 // cases will generate <= 2 instructions during ISEL. So exclude them.
19501 // 1. If the constant multiplier fits 16 bits, it can be handled by one
19502 // HW instruction, ie. MULLI
19503 // 2. If the multiplier after shifted fits 16 bits, an extra shift
19504 // instruction is needed than case 1, ie. MULLI and RLDICR
19505 int64_t Imm = ConstNode->getSExtValue();
19506 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
19507 Imm >>= Shift;
19508 if (isInt<16>(Imm))
19509 return false;
19510 uint64_t UImm = static_cast<uint64_t>(Imm);
19511 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
19512 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
19513 return true;
19514 }
19515 return false;
19516}
19517
19523
19525 Type *Ty) const {
19526 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
19527 return false;
19528 switch (Ty->getScalarType()->getTypeID()) {
19529 case Type::FloatTyID:
19530 case Type::DoubleTyID:
19531 return true;
19532 case Type::FP128TyID:
19533 return Subtarget.hasP9Vector();
19534 default:
19535 return false;
19536 }
19537}
19538
19539// FIXME: add more patterns which are not profitable to hoist.
19541 if (!I->hasOneUse())
19542 return true;
19543
19544 Instruction *User = I->user_back();
19545 assert(User && "A single use instruction with no uses.");
19546
19547 switch (I->getOpcode()) {
19548 case Instruction::FMul: {
19549 // Don't break FMA, PowerPC prefers FMA.
19550 if (User->getOpcode() != Instruction::FSub &&
19551 User->getOpcode() != Instruction::FAdd)
19552 return true;
19553
19555 const Function *F = I->getFunction();
19556 const DataLayout &DL = F->getDataLayout();
19557 Type *Ty = User->getOperand(0)->getType();
19558 bool AllowContract = I->getFastMathFlags().allowContract() &&
19559 User->getFastMathFlags().allowContract();
19560
19561 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
19563 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
19564 }
19565 case Instruction::Load: {
19566 // Don't break "store (load float*)" pattern, this pattern will be combined
19567 // to "store (load int32)" in later InstCombine pass. See function
19568 // combineLoadToOperationType. On PowerPC, loading a float point takes more
19569 // cycles than loading a 32 bit integer.
19570 LoadInst *LI = cast<LoadInst>(I);
19571 // For the loads that combineLoadToOperationType does nothing, like
19572 // ordered load, it should be profitable to hoist them.
19573 // For swifterror load, it can only be used for pointer to pointer type, so
19574 // later type check should get rid of this case.
19575 if (!LI->isUnordered())
19576 return true;
19577
19578 if (User->getOpcode() != Instruction::Store)
19579 return true;
19580
19581 if (I->getType()->getTypeID() != Type::FloatTyID)
19582 return true;
19583
19584 return false;
19585 }
19586 default:
19587 return true;
19588 }
19589 return true;
19590}
19591
19592const MCPhysReg *
19594 // LR is a callee-save register, but we must treat it as clobbered by any call
19595 // site. Hence we include LR in the scratch registers, which are in turn added
19596 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19597 // to CTR, which is used by any indirect call.
19598 static const MCPhysReg ScratchRegs[] = {
19599 PPC::X12, PPC::LR8, PPC::CTR8, 0
19600 };
19601
19602 return ScratchRegs;
19603}
19604
19606 const Constant *PersonalityFn) const {
19607 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19608}
19609
19611 const Constant *PersonalityFn) const {
19612 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19613}
19614
19615bool
19617 EVT VT , unsigned DefinedValues) const {
19618 if (VT == MVT::v2i64)
19619 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19620
19621 if (Subtarget.hasVSX())
19622 return true;
19623
19625}
19626
19628 if (DisableILPPref || Subtarget.enableMachineScheduler())
19630
19631 return Sched::ILP;
19632}
19633
19634// Create a fast isel object.
19636 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19637 const LibcallLoweringInfo *LibcallLowering) const {
19638 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19639}
19640
19641// 'Inverted' means the FMA opcode after negating one multiplicand.
19642// For example, (fma -a b c) = (fnmsub a b c)
19643static unsigned invertFMAOpcode(unsigned Opc) {
19644 switch (Opc) {
19645 default:
19646 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19647 case ISD::FMA:
19648 return PPCISD::FNMSUB;
19649 case PPCISD::FNMSUB:
19650 return ISD::FMA;
19651 }
19652}
19653
19655 bool LegalOps, bool OptForSize,
19657 unsigned Depth) const {
19659 return SDValue();
19660
19661 unsigned Opc = Op.getOpcode();
19662 EVT VT = Op.getValueType();
19663 SDNodeFlags Flags = Op.getNode()->getFlags();
19664
19665 switch (Opc) {
19666 case PPCISD::FNMSUB:
19667 if (!Op.hasOneUse() || !isTypeLegal(VT))
19668 break;
19669
19670 SDValue N0 = Op.getOperand(0);
19671 SDValue N1 = Op.getOperand(1);
19672 SDValue N2 = Op.getOperand(2);
19673 SDLoc Loc(Op);
19674
19676 SDValue NegN2 =
19677 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19678
19679 if (!NegN2)
19680 return SDValue();
19681
19682 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19683 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19684 // These transformations may change sign of zeroes. For example,
19685 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19686 if (Flags.hasNoSignedZeros()) {
19687 // Try and choose the cheaper one to negate.
19689 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19690 N0Cost, Depth + 1);
19691
19693 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19694 N1Cost, Depth + 1);
19695
19696 if (NegN0 && N0Cost <= N1Cost) {
19697 Cost = std::min(N0Cost, N2Cost);
19698 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19699 } else if (NegN1) {
19700 Cost = std::min(N1Cost, N2Cost);
19701 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19702 }
19703 }
19704
19705 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19706 if (isOperationLegal(ISD::FMA, VT)) {
19707 Cost = N2Cost;
19708 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19709 }
19710
19711 break;
19712 }
19713
19714 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19715 Cost, Depth);
19716}
19717
19718// Override to enable LOAD_STACK_GUARD lowering on Linux.
19720 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19721 return true;
19723}
19724
19726 bool ForCodeSize) const {
19727 if (!VT.isSimple() || !Subtarget.hasVSX())
19728 return false;
19729
19730 switch(VT.getSimpleVT().SimpleTy) {
19731 default:
19732 // For FP types that are currently not supported by PPC backend, return
19733 // false. Examples: f16, f80.
19734 return false;
19735 case MVT::f32:
19736 case MVT::f64: {
19737 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19738 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19739 return true;
19740 }
19741 bool IsExact;
19742 APSInt IntResult(16, false);
19743 // The rounding mode doesn't really matter because we only care about floats
19744 // that can be converted to integers exactly.
19745 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19746 // For exact values in the range [-16, 15] we can materialize the float.
19747 if (IsExact && IntResult <= 15 && IntResult >= -16)
19748 return true;
19749 return Imm.isZero();
19750 }
19751 case MVT::ppcf128:
19752 return Imm.isPosZero();
19753 }
19754}
19755
19756// For vector shift operation op, fold
19757// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19759 SelectionDAG &DAG) {
19760 SDValue N0 = N->getOperand(0);
19761 SDValue N1 = N->getOperand(1);
19762 EVT VT = N0.getValueType();
19763 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19764 unsigned Opcode = N->getOpcode();
19765 unsigned TargetOpcode;
19766
19767 switch (Opcode) {
19768 default:
19769 llvm_unreachable("Unexpected shift operation");
19770 case ISD::SHL:
19771 TargetOpcode = PPCISD::SHL;
19772 break;
19773 case ISD::SRL:
19774 TargetOpcode = PPCISD::SRL;
19775 break;
19776 case ISD::SRA:
19777 TargetOpcode = PPCISD::SRA;
19778 break;
19779 }
19780
19781 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19782 N1->getOpcode() == ISD::AND)
19783 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19784 if (Mask->getZExtValue() == OpSizeInBits - 1)
19785 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19786
19787 return SDValue();
19788}
19789
19790SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19791 DAGCombinerInfo &DCI) const {
19792 EVT VT = N->getValueType(0);
19793 assert(VT.isVector() && "Vector type expected.");
19794
19795 unsigned Opc = N->getOpcode();
19796 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19797 "Unexpected opcode.");
19798
19799 if (!isOperationLegal(Opc, VT))
19800 return SDValue();
19801
19802 EVT EltTy = VT.getScalarType();
19803 unsigned EltBits = EltTy.getSizeInBits();
19804 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19805 return SDValue();
19806
19807 SDValue N1 = N->getOperand(1);
19808 uint64_t SplatBits = 0;
19809 bool AddSplatCase = false;
19810 unsigned OpcN1 = N1.getOpcode();
19811 if (OpcN1 == PPCISD::VADD_SPLAT &&
19813 AddSplatCase = true;
19814 SplatBits = N1.getConstantOperandVal(0);
19815 }
19816
19817 if (!AddSplatCase) {
19818 if (OpcN1 != ISD::BUILD_VECTOR)
19819 return SDValue();
19820
19821 unsigned SplatBitSize;
19822 bool HasAnyUndefs;
19823 APInt APSplatBits, APSplatUndef;
19824 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19825 bool BVNIsConstantSplat =
19826 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19827 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19828 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19829 return SDValue();
19830 SplatBits = APSplatBits.getZExtValue();
19831 }
19832
19833 SDLoc DL(N);
19834 SDValue N0 = N->getOperand(0);
19835 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19836 // shift vector, which means the max value is 31/63. A shift vector of all
19837 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19838 // -16 to 15 range.
19839 if (SplatBits == (EltBits - 1)) {
19840 unsigned NewOpc;
19841 switch (Opc) {
19842 case ISD::SHL:
19843 NewOpc = PPCISD::SHL;
19844 break;
19845 case ISD::SRL:
19846 NewOpc = PPCISD::SRL;
19847 break;
19848 case ISD::SRA:
19849 NewOpc = PPCISD::SRA;
19850 break;
19851 }
19852 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19853 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19854 }
19855
19856 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19857 return SDValue();
19858
19859 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19860 // before the BUILD_VECTOR is replaced by a load.
19861 if (EltTy != MVT::i64 || SplatBits != 1)
19862 return SDValue();
19863
19864 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19865}
19866
19867SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19868 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19869 return Value;
19870
19871 if (N->getValueType(0).isVector())
19872 return combineVectorShift(N, DCI);
19873
19874 SDValue N0 = N->getOperand(0);
19875 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19876 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19877 N0.getOpcode() != ISD::SIGN_EXTEND ||
19878 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19879 N->getValueType(0) != MVT::i64)
19880 return SDValue();
19881
19882 // We can't save an operation here if the value is already extended, and
19883 // the existing shift is easier to combine.
19884 SDValue ExtsSrc = N0.getOperand(0);
19885 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19886 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19887 return SDValue();
19888
19889 SDLoc DL(N0);
19890 SDValue ShiftBy = SDValue(CN1, 0);
19891 // We want the shift amount to be i32 on the extswli, but the shift could
19892 // have an i64.
19893 if (ShiftBy.getValueType() == MVT::i64)
19894 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19895
19896 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19897 ShiftBy);
19898}
19899
19900SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19901 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19902 return Value;
19903
19904 if (N->getValueType(0).isVector())
19905 return combineVectorShift(N, DCI);
19906
19907 return SDValue();
19908}
19909
19910SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19911 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19912 return Value;
19913
19914 if (N->getValueType(0).isVector())
19915 return combineVectorShift(N, DCI);
19916
19917 return SDValue();
19918}
19919
19920// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19921// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19922// When C is zero, the equation (addi Z, -C) can be simplified to Z
19923// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19925 const PPCSubtarget &Subtarget) {
19926 if (!Subtarget.isPPC64())
19927 return SDValue();
19928
19929 SDValue LHS = N->getOperand(0);
19930 SDValue RHS = N->getOperand(1);
19931
19932 auto isZextOfCompareWithConstant = [](SDValue Op) {
19933 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19934 Op.getValueType() != MVT::i64)
19935 return false;
19936
19937 SDValue Cmp = Op.getOperand(0);
19938 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19939 Cmp.getOperand(0).getValueType() != MVT::i64)
19940 return false;
19941
19942 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19943 int64_t NegConstant = 0 - Constant->getSExtValue();
19944 // Due to the limitations of the addi instruction,
19945 // -C is required to be [-32768, 32767].
19946 return isInt<16>(NegConstant);
19947 }
19948
19949 return false;
19950 };
19951
19952 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19953 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19954
19955 // If there is a pattern, canonicalize a zext operand to the RHS.
19956 if (LHSHasPattern && !RHSHasPattern)
19957 std::swap(LHS, RHS);
19958 else if (!LHSHasPattern && !RHSHasPattern)
19959 return SDValue();
19960
19961 SDLoc DL(N);
19962 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19963 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19964 SDValue Cmp = RHS.getOperand(0);
19965 SDValue Z = Cmp.getOperand(0);
19966 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19967 int64_t NegConstant = 0 - Constant->getSExtValue();
19968
19969 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19970 default: break;
19971 case ISD::SETNE: {
19972 // when C == 0
19973 // --> addze X, (addic Z, -1).carry
19974 // /
19975 // add X, (zext(setne Z, C))--
19976 // \ when -32768 <= -C <= 32767 && C != 0
19977 // --> addze X, (addic (addi Z, -C), -1).carry
19978 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19979 DAG.getConstant(NegConstant, DL, MVT::i64));
19980 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19981 SDValue Addc =
19982 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19983 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19984 DAG.getConstant(0, DL, CarryType));
19985 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19986 DAG.getConstant(0, DL, MVT::i64),
19987 SDValue(Addc.getNode(), 1));
19988 }
19989 case ISD::SETEQ: {
19990 // when C == 0
19991 // --> addze X, (subfic Z, 0).carry
19992 // /
19993 // add X, (zext(sete Z, C))--
19994 // \ when -32768 <= -C <= 32767 && C != 0
19995 // --> addze X, (subfic (addi Z, -C), 0).carry
19996 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19997 DAG.getConstant(NegConstant, DL, MVT::i64));
19998 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19999 SDValue Subc =
20000 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
20001 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
20002 DAG.getConstant(0, DL, CarryType));
20003 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
20004 DAG.getConstant(1UL, DL, CarryType));
20005 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
20006 DAG.getConstant(0, DL, MVT::i64), Invert);
20007 }
20008 }
20009
20010 return SDValue();
20011}
20012
20013// Transform
20014// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
20015// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
20016// In this case both C1 and C2 must be known constants.
20017// C1+C2 must fit into a 34 bit signed integer.
20019 const PPCSubtarget &Subtarget) {
20020 if (!Subtarget.isUsingPCRelativeCalls())
20021 return SDValue();
20022
20023 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
20024 // If we find that node try to cast the Global Address and the Constant.
20025 SDValue LHS = N->getOperand(0);
20026 SDValue RHS = N->getOperand(1);
20027
20028 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
20029 std::swap(LHS, RHS);
20030
20031 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
20032 return SDValue();
20033
20034 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
20037
20038 // Check that both casts succeeded.
20039 if (!GSDN || !ConstNode)
20040 return SDValue();
20041
20042 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
20043 SDLoc DL(GSDN);
20044
20045 // The signed int offset needs to fit in 34 bits.
20046 if (!isInt<34>(NewOffset))
20047 return SDValue();
20048
20049 // The new global address is a copy of the old global address except
20050 // that it has the updated Offset.
20051 SDValue GA =
20052 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
20053 NewOffset, GSDN->getTargetFlags());
20054 SDValue MatPCRel =
20055 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
20056 return MatPCRel;
20057}
20058
20059// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
20060// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
20061// Mathematical identity: X + 1 = X - (-1)
20062// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
20063// Requirement: VSX feature for efficient xxleqv generation
20065 const PPCSubtarget &Subtarget) {
20066
20067 EVT VT = N->getValueType(0);
20068 if (!Subtarget.hasVSX())
20069 return SDValue();
20070
20071 // Handle v2i64, v4i32, v8i16 and v16i8 types
20072 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
20073 VT == MVT::v2i64))
20074 return SDValue();
20075
20076 SDValue LHS = N->getOperand(0);
20077 SDValue RHS = N->getOperand(1);
20078
20079 // Check if RHS is BUILD_VECTOR
20080 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
20081 return SDValue();
20082
20083 // Check if all the elements are 1
20084 unsigned NumOfEles = RHS.getNumOperands();
20085 for (unsigned i = 0; i < NumOfEles; ++i) {
20086 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
20087 if (!CN || CN->getSExtValue() != 1)
20088 return SDValue();
20089 }
20090 SDLoc DL(N);
20091
20092 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
20093 SmallVector<SDValue, 4> Ops(4, MinusOne);
20094 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
20095
20096 // Bitcast to the target vector type
20097 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
20098
20099 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
20100}
20101
20102SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
20103 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
20104 return Value;
20105
20106 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
20107 return Value;
20108
20109 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
20110 return Value;
20111 return SDValue();
20112}
20113
20114// Detect TRUNCATE operations on bitcasts of float128 values.
20115// What we are looking for here is the situtation where we extract a subset
20116// of bits from a 128 bit float.
20117// This can be of two forms:
20118// 1) BITCAST of f128 feeding TRUNCATE
20119// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
20120// The reason this is required is because we do not have a legal i128 type
20121// and so we want to prevent having to store the f128 and then reload part
20122// of it.
20123SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
20124 DAGCombinerInfo &DCI) const {
20125 // If we are using CRBits then try that first.
20126 if (Subtarget.useCRBits()) {
20127 // Check if CRBits did anything and return that if it did.
20128 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
20129 return CRTruncValue;
20130 }
20131
20132 SDLoc dl(N);
20133 SDValue Op0 = N->getOperand(0);
20134
20135 // Looking for a truncate of i128 to i64.
20136 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
20137 return SDValue();
20138
20139 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
20140
20141 // SRL feeding TRUNCATE.
20142 if (Op0.getOpcode() == ISD::SRL) {
20143 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
20144 // The right shift has to be by 64 bits.
20145 if (!ConstNode || ConstNode->getZExtValue() != 64)
20146 return SDValue();
20147
20148 // Switch the element number to extract.
20149 EltToExtract = EltToExtract ? 0 : 1;
20150 // Update Op0 past the SRL.
20151 Op0 = Op0.getOperand(0);
20152 }
20153
20154 // BITCAST feeding a TRUNCATE possibly via SRL.
20155 if (Op0.getOpcode() == ISD::BITCAST &&
20156 Op0.getValueType() == MVT::i128 &&
20157 Op0.getOperand(0).getValueType() == MVT::f128) {
20158 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
20159 return DCI.DAG.getNode(
20160 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
20161 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
20162 }
20163 return SDValue();
20164}
20165
20166SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
20167 SelectionDAG &DAG = DCI.DAG;
20168
20169 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
20170 if (!ConstOpOrElement)
20171 return SDValue();
20172
20173 // An imul is usually smaller than the alternative sequence for legal type.
20175 isOperationLegal(ISD::MUL, N->getValueType(0)))
20176 return SDValue();
20177
20178 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
20179 switch (this->Subtarget.getCPUDirective()) {
20180 default:
20181 // TODO: enhance the condition for subtarget before pwr8
20182 return false;
20183 case PPC::DIR_PWR8:
20184 // type mul add shl
20185 // scalar 4 1 1
20186 // vector 7 2 2
20187 return true;
20188 case PPC::DIR_PWR9:
20189 case PPC::DIR_PWR10:
20190 case PPC::DIR_PWR11:
20192 // type mul add shl
20193 // scalar 5 2 2
20194 // vector 7 2 2
20195
20196 // The cycle RATIO of related operations are showed as a table above.
20197 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
20198 // scalar and vector type. For 2 instrs patterns, add/sub + shl
20199 // are 4, it is always profitable; but for 3 instrs patterns
20200 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
20201 // So we should only do it for vector type.
20202 return IsAddOne && IsNeg ? VT.isVector() : true;
20203 }
20204 };
20205
20206 EVT VT = N->getValueType(0);
20207 SDLoc DL(N);
20208
20209 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
20210 bool IsNeg = MulAmt.isNegative();
20211 APInt MulAmtAbs = MulAmt.abs();
20212
20213 if ((MulAmtAbs - 1).isPowerOf2()) {
20214 // (mul x, 2^N + 1) => (add (shl x, N), x)
20215 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
20216
20217 if (!IsProfitable(IsNeg, true, VT))
20218 return SDValue();
20219
20220 SDValue Op0 = N->getOperand(0);
20221 SDValue Op1 =
20222 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
20223 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
20224 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
20225
20226 if (!IsNeg)
20227 return Res;
20228
20229 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
20230 } else if ((MulAmtAbs + 1).isPowerOf2()) {
20231 // (mul x, 2^N - 1) => (sub (shl x, N), x)
20232 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
20233
20234 if (!IsProfitable(IsNeg, false, VT))
20235 return SDValue();
20236
20237 SDValue Op0 = N->getOperand(0);
20238 SDValue Op1 =
20239 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
20240 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
20241
20242 if (!IsNeg)
20243 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
20244 else
20245 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
20246
20247 } else {
20248 return SDValue();
20249 }
20250}
20251
20252// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
20253// in combiner since we need to check SD flags and other subtarget features.
20254SDValue PPCTargetLowering::combineFMALike(SDNode *N,
20255 DAGCombinerInfo &DCI) const {
20256 SDValue N0 = N->getOperand(0);
20257 SDValue N1 = N->getOperand(1);
20258 SDValue N2 = N->getOperand(2);
20259 SDNodeFlags Flags = N->getFlags();
20260 EVT VT = N->getValueType(0);
20261 SelectionDAG &DAG = DCI.DAG;
20262 unsigned Opc = N->getOpcode();
20264 bool LegalOps = !DCI.isBeforeLegalizeOps();
20265 SDLoc Loc(N);
20266
20267 if (!isOperationLegal(ISD::FMA, VT))
20268 return SDValue();
20269
20270 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
20271 // since (fnmsub a b c)=-0 while c-ab=+0.
20272 if (!Flags.hasNoSignedZeros())
20273 return SDValue();
20274
20275 // (fma (fneg a) b c) => (fnmsub a b c)
20276 // (fnmsub (fneg a) b c) => (fma a b c)
20277 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
20278 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
20279
20280 // (fma a (fneg b) c) => (fnmsub a b c)
20281 // (fnmsub a (fneg b) c) => (fma a b c)
20282 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
20283 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
20284
20285 return SDValue();
20286}
20287
20288bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
20289 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
20290 if (!Subtarget.is64BitELFABI())
20291 return false;
20292
20293 // If not a tail call then no need to proceed.
20294 if (!CI->isTailCall())
20295 return false;
20296
20297 // If sibling calls have been disabled and tail-calls aren't guaranteed
20298 // there is no reason to duplicate.
20299 auto &TM = getTargetMachine();
20300 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
20301 return false;
20302
20303 // Can't tail call a function called indirectly, or if it has variadic args.
20304 const Function *Callee = CI->getCalledFunction();
20305 if (!Callee || Callee->isVarArg())
20306 return false;
20307
20308 // Make sure the callee and caller calling conventions are eligible for tco.
20309 const Function *Caller = CI->getParent()->getParent();
20310 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
20311 CI->getCallingConv()))
20312 return false;
20313
20314 // If the function is local then we have a good chance at tail-calling it
20315 return getTargetMachine().shouldAssumeDSOLocal(Callee);
20316}
20317
20318bool PPCTargetLowering::
20319isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
20320 const Value *Mask = AndI.getOperand(1);
20321 // If the mask is suitable for andi. or andis. we should sink the and.
20322 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
20323 // Can't handle constants wider than 64-bits.
20324 if (CI->getBitWidth() > 64)
20325 return false;
20326 int64_t ConstVal = CI->getZExtValue();
20327 return isUInt<16>(ConstVal) ||
20328 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
20329 }
20330
20331 // For non-constant masks, we can always use the record-form and.
20332 return true;
20333}
20334
20335/// getAddrModeForFlags - Based on the set of address flags, select the most
20336/// optimal instruction format to match by.
20337PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20338 // This is not a node we should be handling here.
20339 if (Flags == PPC::MOF_None)
20340 return PPC::AM_None;
20341 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20342 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
20343 if ((Flags & FlagSet) == FlagSet)
20344 return PPC::AM_DForm;
20345 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
20346 if ((Flags & FlagSet) == FlagSet)
20347 return PPC::AM_DSForm;
20348 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
20349 if ((Flags & FlagSet) == FlagSet)
20350 return PPC::AM_DQForm;
20351 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
20352 if ((Flags & FlagSet) == FlagSet)
20353 return PPC::AM_PrefixDForm;
20354 // If no other forms are selected, return an X-Form as it is the most
20355 // general addressing mode.
20356 return PPC::AM_XForm;
20357}
20358
20359/// Set alignment flags based on whether or not the Frame Index is aligned.
20360/// Utilized when computing flags for address computation when selecting
20361/// load and store instructions.
20362static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20363 SelectionDAG &DAG) {
20364 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
20365 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
20366 if (!FI)
20367 return;
20369 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
20370 // If this is (add $FI, $S16Imm), the alignment flags are already set
20371 // based on the immediate. We just need to clear the alignment flags
20372 // if the FI alignment is weaker.
20373 if ((FrameIndexAlign % 4) != 0)
20374 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20375 if ((FrameIndexAlign % 16) != 0)
20376 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20377 // If the address is a plain FrameIndex, set alignment flags based on
20378 // FI alignment.
20379 if (!IsAdd) {
20380 if ((FrameIndexAlign % 4) == 0)
20381 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20382 if ((FrameIndexAlign % 16) == 0)
20383 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20384 }
20385}
20386
20387/// Given a node, compute flags that are used for address computation when
20388/// selecting load and store instructions. The flags computed are stored in
20389/// FlagSet. This function takes into account whether the node is a constant,
20390/// an ADD, OR, or a constant, and computes the address flags accordingly.
20391static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20392 SelectionDAG &DAG) {
20393 // Set the alignment flags for the node depending on if the node is
20394 // 4-byte or 16-byte aligned.
20395 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20396 if ((Imm & 0x3) == 0)
20397 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20398 if ((Imm & 0xf) == 0)
20399 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20400 };
20401
20403 // All 32-bit constants can be computed as LIS + Disp.
20404 const APInt &ConstImm = CN->getAPIntValue();
20405 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
20406 FlagSet |= PPC::MOF_AddrIsSImm32;
20407 SetAlignFlagsForImm(ConstImm.getZExtValue());
20408 setAlignFlagsForFI(N, FlagSet, DAG);
20409 }
20410 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
20411 FlagSet |= PPC::MOF_RPlusSImm34;
20412 else // Let constant materialization handle large constants.
20413 FlagSet |= PPC::MOF_NotAddNorCst;
20414 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
20415 // This address can be represented as an addition of:
20416 // - Register + Imm16 (possibly a multiple of 4/16)
20417 // - Register + Imm34
20418 // - Register + PPCISD::Lo
20419 // - Register + Register
20420 // In any case, we won't have to match this as Base + Zero.
20421 SDValue RHS = N.getOperand(1);
20423 const APInt &ConstImm = CN->getAPIntValue();
20424 if (ConstImm.isSignedIntN(16)) {
20425 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20426 SetAlignFlagsForImm(ConstImm.getZExtValue());
20427 setAlignFlagsForFI(N, FlagSet, DAG);
20428 }
20429 if (ConstImm.isSignedIntN(34))
20430 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20431 else
20432 FlagSet |= PPC::MOF_RPlusR; // Register.
20433 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
20434 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
20435 else
20436 FlagSet |= PPC::MOF_RPlusR;
20437 } else { // The address computation is not a constant or an addition.
20438 setAlignFlagsForFI(N, FlagSet, DAG);
20439 FlagSet |= PPC::MOF_NotAddNorCst;
20440 }
20441}
20442
20443static bool isPCRelNode(SDValue N) {
20444 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
20449}
20450
20451/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20452/// the address flags of the load/store instruction that is to be matched.
20453unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20454 SelectionDAG &DAG) const {
20455 unsigned FlagSet = PPC::MOF_None;
20456
20457 // Compute subtarget flags.
20458 if (!Subtarget.hasP9Vector())
20459 FlagSet |= PPC::MOF_SubtargetBeforeP9;
20460 else
20461 FlagSet |= PPC::MOF_SubtargetP9;
20462
20463 if (Subtarget.hasPrefixInstrs())
20464 FlagSet |= PPC::MOF_SubtargetP10;
20465
20466 if (Subtarget.hasSPE())
20467 FlagSet |= PPC::MOF_SubtargetSPE;
20468
20469 // Check if we have a PCRel node and return early.
20470 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20471 return FlagSet;
20472
20473 // If the node is the paired load/store intrinsics, compute flags for
20474 // address computation and return early.
20475 unsigned ParentOp = Parent->getOpcode();
20476 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
20477 (ParentOp == ISD::INTRINSIC_VOID))) {
20478 unsigned ID = Parent->getConstantOperandVal(1);
20479 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
20480 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20481 ? Parent->getOperand(2)
20482 : Parent->getOperand(3);
20483 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
20484 FlagSet |= PPC::MOF_Vector;
20485 return FlagSet;
20486 }
20487 }
20488
20489 // Mark this as something we don't want to handle here if it is atomic
20490 // or pre-increment instruction.
20491 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
20492 if (LSB->isIndexed())
20493 return PPC::MOF_None;
20494
20495 // Compute in-memory type flags. This is based on if there are scalars,
20496 // floats or vectors.
20497 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
20498 assert(MN && "Parent should be a MemSDNode!");
20499 EVT MemVT = MN->getMemoryVT();
20500 unsigned Size = MemVT.getSizeInBits();
20501 if (MemVT.isScalarInteger()) {
20502 assert(Size <= 128 &&
20503 "Not expecting scalar integers larger than 16 bytes!");
20504 if (Size < 32)
20505 FlagSet |= PPC::MOF_SubWordInt;
20506 else if (Size == 32)
20507 FlagSet |= PPC::MOF_WordInt;
20508 else
20509 FlagSet |= PPC::MOF_DoubleWordInt;
20510 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20511 if (Size == 128)
20512 FlagSet |= PPC::MOF_Vector;
20513 else if (Size == 256) {
20514 assert(Subtarget.pairedVectorMemops() &&
20515 "256-bit vectors are only available when paired vector memops is "
20516 "enabled!");
20517 FlagSet |= PPC::MOF_Vector;
20518 } else
20519 llvm_unreachable("Not expecting illegal vectors!");
20520 } else { // Floating point type: can be scalar, f128 or vector types.
20521 if (Size == 32 || Size == 64)
20522 FlagSet |= PPC::MOF_ScalarFloat;
20523 else if (MemVT == MVT::f128 || MemVT.isVector())
20524 FlagSet |= PPC::MOF_Vector;
20525 else
20526 llvm_unreachable("Not expecting illegal scalar floats!");
20527 }
20528
20529 // Compute flags for address computation.
20530 computeFlagsForAddressComputation(N, FlagSet, DAG);
20531
20532 // Compute type extension flags.
20533 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
20534 switch (LN->getExtensionType()) {
20535 case ISD::SEXTLOAD:
20536 FlagSet |= PPC::MOF_SExt;
20537 break;
20538 case ISD::EXTLOAD:
20539 case ISD::ZEXTLOAD:
20540 FlagSet |= PPC::MOF_ZExt;
20541 break;
20542 case ISD::NON_EXTLOAD:
20543 FlagSet |= PPC::MOF_NoExt;
20544 break;
20545 }
20546 } else
20547 FlagSet |= PPC::MOF_NoExt;
20548
20549 // For integers, no extension is the same as zero extension.
20550 // We set the extension mode to zero extension so we don't have
20551 // to add separate entries in AddrModesMap for loads and stores.
20552 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20553 FlagSet |= PPC::MOF_ZExt;
20554 FlagSet &= ~PPC::MOF_NoExt;
20555 }
20556
20557 // If we don't have prefixed instructions, 34-bit constants should be
20558 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20559 bool IsNonP1034BitConst =
20561 FlagSet) == PPC::MOF_RPlusSImm34;
20562 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20563 IsNonP1034BitConst)
20564 FlagSet |= PPC::MOF_NotAddNorCst;
20565
20566 return FlagSet;
20567}
20568
20569/// SelectForceXFormMode - Given the specified address, force it to be
20570/// represented as an indexed [r+r] operation (an XForm instruction).
20572 SDValue &Base,
20573 SelectionDAG &DAG) const {
20574
20576 int16_t ForceXFormImm = 0;
20577 if (provablyDisjointOr(DAG, N) &&
20578 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
20579 Disp = N.getOperand(0);
20580 Base = N.getOperand(1);
20581 return Mode;
20582 }
20583
20584 // If the address is the result of an add, we will utilize the fact that the
20585 // address calculation includes an implicit add. However, we can reduce
20586 // register pressure if we do not materialize a constant just for use as the
20587 // index register. We only get rid of the add if it is not an add of a
20588 // value and a 16-bit signed constant and both have a single use.
20589 if (N.getOpcode() == ISD::ADD &&
20590 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
20591 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
20592 Disp = N.getOperand(0);
20593 Base = N.getOperand(1);
20594 return Mode;
20595 }
20596
20597 // Otherwise, use R0 as the base register.
20598 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20599 N.getValueType());
20600 Base = N;
20601
20602 return Mode;
20603}
20604
20606 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20607 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20608 EVT ValVT = Val.getValueType();
20609 // If we are splitting a scalar integer into f64 parts (i.e. so they
20610 // can be placed into VFRC registers), we need to zero extend and
20611 // bitcast the values. This will ensure the value is placed into a
20612 // VSR using direct moves or stack operations as needed.
20613 if (PartVT == MVT::f64 &&
20614 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20615 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
20616 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
20617 Parts[0] = Val;
20618 return true;
20619 }
20620 return false;
20621}
20622
20623SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20624 SelectionDAG &DAG) const {
20625 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20627 EVT RetVT = Op.getValueType();
20628 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
20629 SDValue Callee =
20630 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
20631 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
20633 for (const SDValue &N : Op->op_values()) {
20634 EVT ArgVT = N.getValueType();
20635 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
20636 TargetLowering::ArgListEntry Entry(N, ArgTy);
20637 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
20638 Entry.IsZExt = !Entry.IsSExt;
20639 Args.push_back(Entry);
20640 }
20641
20642 SDValue InChain = DAG.getEntryNode();
20643 SDValue TCChain = InChain;
20644 const Function &F = DAG.getMachineFunction().getFunction();
20645 bool isTailCall =
20646 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
20647 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20648 if (isTailCall)
20649 InChain = TCChain;
20650 CLI.setDebugLoc(SDLoc(Op))
20651 .setChain(InChain)
20652 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
20653 .setTailCall(isTailCall)
20654 .setSExtResult(SignExtend)
20655 .setZExtResult(!SignExtend)
20657 return TLI.LowerCallTo(CLI).first;
20658}
20659
20660SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20661 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20662 SelectionDAG &DAG) const {
20663 if (Op.getValueType() == MVT::f32)
20664 return lowerToLibCall(LibCallFloatName, Op, DAG);
20665
20666 if (Op.getValueType() == MVT::f64)
20667 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20668
20669 return SDValue();
20670}
20671
20672bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20673 SDNodeFlags Flags = Op.getNode()->getFlags();
20674 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20675 Flags.hasNoNaNs() && Flags.hasNoInfs();
20676}
20677
20678bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20679 return Op.getNode()->getFlags().hasApproximateFuncs();
20680}
20681
20682bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20684}
20685
20686SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20687 const char *LibCallFloatName,
20688 const char *LibCallDoubleNameFinite,
20689 const char *LibCallFloatNameFinite,
20690 SDValue Op,
20691 SelectionDAG &DAG) const {
20692 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20693 return SDValue();
20694
20695 if (!isLowringToMASSFiniteSafe(Op))
20696 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20697 DAG);
20698
20699 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20700 LibCallDoubleNameFinite, Op, DAG);
20701}
20702
20703SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20704 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20705 "__xl_powf_finite", Op, DAG);
20706}
20707
20708SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20709 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20710 "__xl_sinf_finite", Op, DAG);
20711}
20712
20713SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20714 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20715 "__xl_cosf_finite", Op, DAG);
20716}
20717
20718SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20719 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20720 "__xl_logf_finite", Op, DAG);
20721}
20722
20723SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20724 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20725 "__xl_log10f_finite", Op, DAG);
20726}
20727
20728SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20729 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20730 "__xl_expf_finite", Op, DAG);
20731}
20732
20733// If we happen to match to an aligned D-Form, check if the Frame Index is
20734// adequately aligned. If it is not, reset the mode to match to X-Form.
20735static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20738 return;
20739 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20742}
20743
20744/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20745/// compute the address flags of the node, get the optimal address mode based
20746/// on the flags, and set the Base and Disp based on the address mode.
20748 SDValue N, SDValue &Disp,
20749 SDValue &Base,
20750 SelectionDAG &DAG,
20751 MaybeAlign Align) const {
20752 SDLoc DL(Parent);
20753
20754 // Compute the address flags.
20755 unsigned Flags = computeMOFlags(Parent, N, DAG);
20756
20757 // Get the optimal address mode based on the Flags.
20758 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20759
20760 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20761 // Select an X-Form load if it is not.
20762 setXFormForUnalignedFI(N, Flags, Mode);
20763
20764 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20765 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20766 assert(Subtarget.isUsingPCRelativeCalls() &&
20767 "Must be using PC-Relative calls when a valid PC-Relative node is "
20768 "present!");
20769 Mode = PPC::AM_PCRel;
20770 }
20771
20772 // Set Base and Disp accordingly depending on the address mode.
20773 switch (Mode) {
20774 case PPC::AM_DForm:
20775 case PPC::AM_DSForm:
20776 case PPC::AM_DQForm: {
20777 // This is a register plus a 16-bit immediate. The base will be the
20778 // register and the displacement will be the immediate unless it
20779 // isn't sufficiently aligned.
20780 if (Flags & PPC::MOF_RPlusSImm16) {
20781 SDValue Op0 = N.getOperand(0);
20782 SDValue Op1 = N.getOperand(1);
20783 int16_t Imm = Op1->getAsZExtVal();
20784 if (!Align || isAligned(*Align, Imm)) {
20785 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20786 Base = Op0;
20788 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20789 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20790 }
20791 break;
20792 }
20793 }
20794 // This is a register plus the @lo relocation. The base is the register
20795 // and the displacement is the global address.
20796 else if (Flags & PPC::MOF_RPlusLo) {
20797 Disp = N.getOperand(1).getOperand(0); // The global address.
20802 Base = N.getOperand(0);
20803 break;
20804 }
20805 // This is a constant address at most 32 bits. The base will be
20806 // zero or load-immediate-shifted and the displacement will be
20807 // the low 16 bits of the address.
20808 else if (Flags & PPC::MOF_AddrIsSImm32) {
20809 auto *CN = cast<ConstantSDNode>(N);
20810 EVT CNType = CN->getValueType(0);
20811 uint64_t CNImm = CN->getZExtValue();
20812 // If this address fits entirely in a 16-bit sext immediate field, codegen
20813 // this as "d, 0".
20814 int16_t Imm;
20815 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20816 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20817 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20818 CNType);
20819 break;
20820 }
20821 // Handle 32-bit sext immediate with LIS + Addr mode.
20822 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20823 (!Align || isAligned(*Align, CNImm))) {
20824 int32_t Addr = (int32_t)CNImm;
20825 // Otherwise, break this down into LIS + Disp.
20826 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20827 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20828 MVT::i32);
20829 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20830 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20831 break;
20832 }
20833 }
20834 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20835 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20837 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20838 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20839 } else
20840 Base = N;
20841 break;
20842 }
20843 case PPC::AM_PrefixDForm: {
20844 int64_t Imm34 = 0;
20845 unsigned Opcode = N.getOpcode();
20846 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20847 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20848 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20849 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20850 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20851 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20852 else
20853 Base = N.getOperand(0);
20854 } else if (isIntS34Immediate(N, Imm34)) {
20855 // The address is a 34-bit signed immediate.
20856 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20857 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20858 }
20859 break;
20860 }
20861 case PPC::AM_PCRel: {
20862 // When selecting PC-Relative instructions, "Base" is not utilized as
20863 // we select the address as [PC+imm].
20864 Disp = N;
20865 break;
20866 }
20867 case PPC::AM_None:
20868 break;
20869 default: { // By default, X-Form is always available to be selected.
20870 // When a frame index is not aligned, we also match by XForm.
20872 Base = FI ? N : N.getOperand(1);
20873 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20874 N.getValueType())
20875 : N.getOperand(0);
20876 break;
20877 }
20878 }
20879 return Mode;
20880}
20881
20883 bool Return,
20884 bool IsVarArg) const {
20885 switch (CC) {
20886 case CallingConv::Cold:
20887 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20888 default:
20889 return CC_PPC64_ELF;
20890 }
20891}
20892
20894 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20895}
20896
20899 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20900 if (shouldInlineQuadwordAtomics() && Size == 128)
20902
20903 switch (AI->getOperation()) {
20909 default:
20911 }
20912
20913 llvm_unreachable("unreachable atomicrmw operation");
20914}
20915
20924
20925static Intrinsic::ID
20927 switch (BinOp) {
20928 default:
20929 llvm_unreachable("Unexpected AtomicRMW BinOp");
20931 return Intrinsic::ppc_atomicrmw_xchg_i128;
20932 case AtomicRMWInst::Add:
20933 return Intrinsic::ppc_atomicrmw_add_i128;
20934 case AtomicRMWInst::Sub:
20935 return Intrinsic::ppc_atomicrmw_sub_i128;
20936 case AtomicRMWInst::And:
20937 return Intrinsic::ppc_atomicrmw_and_i128;
20938 case AtomicRMWInst::Or:
20939 return Intrinsic::ppc_atomicrmw_or_i128;
20940 case AtomicRMWInst::Xor:
20941 return Intrinsic::ppc_atomicrmw_xor_i128;
20943 return Intrinsic::ppc_atomicrmw_nand_i128;
20944 }
20945}
20946
20948 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20949 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20950 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20951 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20952 Type *ValTy = Incr->getType();
20953 assert(ValTy->getPrimitiveSizeInBits() == 128);
20954 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20955 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20956 Value *IncrHi =
20957 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20958 Value *LoHi = Builder.CreateIntrinsic(
20960 {AlignedAddr, IncrLo, IncrHi});
20961 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20962 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20963 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20964 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20965 return Builder.CreateOr(
20966 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20967}
20968
20970 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20971 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20972 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20973 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20974 Type *ValTy = CmpVal->getType();
20975 assert(ValTy->getPrimitiveSizeInBits() == 128);
20976 Function *IntCmpXchg =
20977 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20978 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20979 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20980 Value *CmpHi =
20981 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20982 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20983 Value *NewHi =
20984 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20985 emitLeadingFence(Builder, CI, Ord);
20986 Value *LoHi =
20987 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20988 emitTrailingFence(Builder, CI, Ord);
20989 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20990 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20991 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20992 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20993 return Builder.CreateOr(
20994 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20995}
20996
20998 return Subtarget.useCRBits();
20999}
21000
21001/// Shuffle masks for vectors of bits are not legal as such vectors are
21002/// reserved for MMA/DM.
21003bool PPCTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const {
21004 if (VT.getScalarType() == MVT::i1)
21005 return false;
21006 return TargetLowering::isShuffleMaskLegal(Mask, VT);
21007}
21008
21009// Optimize the following patterns using vbpermq/vbpermd:
21010// i16 = bitcast(v16i1 truncate(v16i8))
21011// i8 = bitcast(v8i1 truncate(v8i16))
21012// i8 = bitcast(v8i1 truncate(v8i8))
21013SDValue PPCTargetLowering::DAGCombineBitcast(SDNode *N,
21014 DAGCombinerInfo &DCI) const {
21015 SDValue Op0 = N->getOperand(0);
21016 if (Op0.getOpcode() != ISD::TRUNCATE)
21017 return SDValue();
21018 SDValue Src = Op0.getOperand(0);
21019 EVT ResVT = N->getValueType(0);
21020 EVT TruncResVT = Op0.getValueType();
21021 EVT SrcVT = Src.getValueType();
21022 SDLoc dl(N);
21023 SelectionDAG &DAG = DCI.DAG;
21024 bool IsLittleEndian = Subtarget.isLittleEndian();
21025
21026 if (ResVT != MVT::i16 && ResVT != MVT::i8)
21027 return SDValue();
21028 SDValue VBPerm =
21029 GenerateVBPERM(DAG, dl, Src, SrcVT, TruncResVT, IsLittleEndian);
21030 if (!VBPerm)
21031 return SDValue();
21032 SDValue ForExtract = DAG.getBitcast(MVT::v4i32, VBPerm);
21033 SDValue Extracted =
21034 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, ForExtract,
21035 DAG.getIntPtrConstant(IsLittleEndian ? 2 : 1, dl));
21036 return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Extracted);
21037}
21038
21039SDValue PPCTargetLowering::GenerateVBPERM(SelectionDAG &DAG, SDLoc dl,
21040 SDValue Src, EVT SrcVT, EVT ResVT,
21041 bool IsLE) const {
21042 bool IsV16i8 = (ResVT == MVT::v16i1 && SrcVT == MVT::v16i8);
21043 bool IsV8i16 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i16);
21044 bool IsV8i8 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i8);
21045
21046 if (!IsV16i8 && !IsV8i16 && !IsV8i8)
21047 return SDValue();
21048
21049 if (IsV8i8) {
21050 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i8,
21051 DAG.getUNDEF(MVT::v16i8), Src,
21052 DAG.getIntPtrConstant(0, dl));
21053 }
21054 SmallVector<int, 16> BitIndices(16, 128);
21055 unsigned NumElts = SrcVT.getVectorNumElements();
21056 unsigned EltSize = SrcVT.getScalarType().getSizeInBits();
21057 for (int Idx = 0, End = SrcVT.getVectorNumElements(); Idx < End; Idx++) {
21058 BitIndices[Idx] = EltSize * (NumElts - Idx) - 1;
21059 if (IsV8i8 && IsLE)
21060 BitIndices[Idx] += 64;
21061 }
21062 if (!IsLE)
21063 std::reverse(BitIndices.begin(), BitIndices.end());
21065 for (auto Idx : BitIndices)
21066 BVOps.push_back(DAG.getConstant(Idx, dl, MVT::i8));
21067 SDValue VRB = DAG.getBuildVector(MVT::v16i8, dl, BVOps);
21068 return DAG.getNode(
21069 ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
21070 DAG.getConstant(Intrinsic::ppc_altivec_vbpermq, dl, MVT::i32),
21071 DAG.getBitcast(MVT::v16i8, Src), VRB);
21072}
21073
21074// For Power8/9, optimize vec splats of small FP values that can be
21075// represented as integers. Use vspltisw + xvcvsxwdp/xvcvsxwsp instead of
21076// loading from constant pool.
21077SDValue PPCTargetLowering::LowerVecSplatSmallFP(SDValue Op, SelectionDAG &DAG,
21078 bool BVNIsConstantSplat,
21079 unsigned SplatBitSize) const {
21080
21081 if (!BVNIsConstantSplat || !Subtarget.hasVSX() || !Subtarget.hasP8Vector() ||
21082 Subtarget.hasP10Vector())
21083 return SDValue();
21084
21085 EVT VT = Op->getValueType(0);
21086 if (!((SplatBitSize == 64 && VT == MVT::v2f64) ||
21087 (SplatBitSize == 32 && VT == MVT::v4f32)))
21088 return SDValue();
21089
21090 auto *CN = dyn_cast<ConstantFPSDNode>(Op.getOperand(0));
21091 if (!CN)
21092 return SDValue();
21093
21094 APFloat APFloatVal = CN->getValueAPF();
21095 bool IsExact;
21096 APSInt IntResult(16, false);
21097 APFloatVal.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
21098
21099 if (!(IsExact && IntResult <= 15 && IntResult >= -16 && !APFloatVal.isZero()))
21100 return SDValue();
21101
21102 int64_t IntVal = IntResult.getSExtValue();
21103
21104 SDLoc dl(Op);
21105 SDValue IntSplat = getCanonicalConstSplat(IntVal, 4, MVT::v4i32, DAG, dl);
21106
21107 if (SplatBitSize == 64)
21108 return DAG.getNode(
21109 ISD::INTRINSIC_WO_CHAIN, dl, MVT::v2f64,
21110 DAG.getConstant(Intrinsic::ppc_vsx_xvcvsxwdp, dl, MVT::i32), IntSplat);
21111
21112 return DAG.getNode(PPCISD::XVCVSXWSP, dl, MVT::v4f32, IntSplat);
21113}
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
MachineInstr unsigned OpIdx
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static MachineBasicBlock * emitAtomicCmpSwapSoftware(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit software-emulated atomic compare-and-swap for I8/I16 without hardware partword atomic support.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool IsSelect(unsigned Opcode, bool CheckOnlyCC=false)
Check if the opcode is a SELECT or SELECT_CC variant.
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static void prepareOutOfLineGlueCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool IsSelectCC(unsigned Opcode)
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS, bool IsPPC64)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG)
Optimize the bitfloor(X) pattern for PowerPC.
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static MachineBasicBlock * emitSelect(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit SELECT instruction, using ISEL if available, otherwise use branch-based control flow.
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static void createAtomicLoopBlocks(MachineFunction *F, MachineBasicBlock *BB, MachineBasicBlock *&loop1MBB, MachineBasicBlock *&loop2MBB, MachineBasicBlock *&exitMBB, MachineInstr &MI, MachineFunction::iterator It)
Helper function to create basic blocks for atomic compare-and-swap.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
static MachineBasicBlock * emitAtomicCmpSwapHardware(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit hardware-supported atomic compare-and-swap for I32/I64 and I8/I16 with partword atomic support.
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static void signExtendOperandIfUnknown(MachineInstr &MI, MachineBasicBlock *BB, unsigned OpIdx, bool IsByte, const PPCInstrInfo *TII)
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5912
bool isDenormal() const
Definition APFloat.h:1539
bool isZero() const
Definition APFloat.h:1534
APInt bitcastToAPInt() const
Definition APFloat.h:1430
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1391
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
APInt abs() const
Get the absolute value.
Definition APInt.h:1818
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:358
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:759
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:771
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
arg_iterator arg_begin()
Definition Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:354
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
const Argument * const_arg_iterator
Definition Function.h:74
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:659
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:200
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1069
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
AtomicOrdering getFailureOrdering() const
For cmpxchg atomic operations, return the atomic ordering requirements when store does not occur.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID for this memory operation.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
MCRegister getGlueCodeDescriptorRegister() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:180
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:326
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:273
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ TargetConstantPool
Definition ISDOpcodes.h:189
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:979
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ TargetExternalSymbol
Definition ISDOpcodes.h:190
@ BR
Control flow instructions. These all have token chains.
@ TargetJumpTable
Definition ISDOpcodes.h:188
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:974
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:186
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:155
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:148
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:196
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:199
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:174
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:205
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:156
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:123
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:152
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:202
@ MO_TPREL_HA
Definition PPC.h:181
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:115
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:190
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:142
@ MO_TPREL_LO
Definition PPC.h:180
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:177
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:168
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:193
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:137
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:162
@ MO_HA
Definition PPC.h:178
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:119
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
LLVM_ABI bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:31
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:150
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:58
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.