LLVM 22.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM, STI), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186 const MVT RegVT = Subtarget.getScalarIntVT();
187
188 // Set up the register classes.
189 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
190 if (!useSoftFloat()) {
191 if (hasSPE()) {
192 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
193 // EFPU2 APU only supports f32
194 if (!Subtarget.hasEFPU2())
195 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
196 } else {
197 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
198 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
199 }
200 }
201
204
205 // PowerPC uses addo_carry,subo_carry to propagate carry.
208
209 // On P10, the default lowering generates better code using the
210 // setbc instruction.
211 if (!Subtarget.hasP10Vector()) {
214 if (isPPC64) {
217 }
218 }
219
220 // Match BITREVERSE to customized fast code sequence in the td file.
223
224 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
225 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
226
227 // Custom lower inline assembly to check for special registers.
228 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
229 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
230
231 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232 for (MVT VT : MVT::integer_valuetypes()) {
235 }
236
237 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
238 setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
239
240 if (Subtarget.isISA3_0()) {
241 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
242 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
243 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
244 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
245 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
246 } else {
247 // No extending loads from f16 or HW conversions back and forth.
248 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
249 setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
250 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
251 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
252 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
253 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
254 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
255 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
256 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
257 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
258 }
259
260 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
261
262 // PowerPC has pre-inc load and store's.
273 if (!Subtarget.hasSPE()) {
278 }
279
280 if (Subtarget.useCRBits()) {
282
283 if (isPPC64 || Subtarget.hasFPCVT()) {
288
290 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
292 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
293
298
300 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
302 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
303 } else {
308 }
309
310 // PowerPC does not support direct load/store of condition registers.
311 setOperationAction(ISD::LOAD, MVT::i1, Custom);
312 setOperationAction(ISD::STORE, MVT::i1, Custom);
313
314 // FIXME: Remove this once the ANDI glue bug is fixed:
315 if (ANDIGlueBug)
317
318 for (MVT VT : MVT::integer_valuetypes()) {
321 setTruncStoreAction(VT, MVT::i1, Expand);
322 }
323
324 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
325 }
326
327 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328 // PPC (the libcall is not available).
333
334 // We do not currently implement these libm ops for PowerPC.
335 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
336 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
337 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
338 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
339 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
341
342 // PowerPC has no SREM/UREM instructions unless we are on P9
343 // On P9 we may use a hardware instruction to compute the remainder.
344 // When the result of both the remainder and the division is required it is
345 // more efficient to compute the remainder from the result of the division
346 // rather than use the remainder instruction. The instructions are legalized
347 // directly because the DivRemPairsPass performs the transformation at the IR
348 // level.
349 if (Subtarget.isISA3_0()) {
354 } else {
359 }
360
361 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
370
371 // Handle constrained floating-point operations of scalar.
372 // TODO: Handle SPE specific operation.
378
383
384 if (!Subtarget.hasSPE()) {
387 }
388
389 if (Subtarget.hasVSX()) {
392 }
393
394 if (Subtarget.hasFSQRT()) {
397 }
398
399 if (Subtarget.hasFPRND()) {
404
409 }
410
411 // We don't support sin/cos/sqrt/fmod/pow
412 setOperationAction(ISD::FSIN , MVT::f64, Expand);
413 setOperationAction(ISD::FCOS , MVT::f64, Expand);
414 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
416 setOperationAction(ISD::FPOW , MVT::f64, Expand);
417 setOperationAction(ISD::FSIN , MVT::f32, Expand);
418 setOperationAction(ISD::FCOS , MVT::f32, Expand);
419 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
421 setOperationAction(ISD::FPOW , MVT::f32, Expand);
422
423 // MASS transformation for LLVM intrinsics with replicating fast-math flag
424 // to be consistent to PPCGenScalarMASSEntries pass
425 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
426 setOperationAction(ISD::FSIN , MVT::f64, Custom);
427 setOperationAction(ISD::FCOS , MVT::f64, Custom);
428 setOperationAction(ISD::FPOW , MVT::f64, Custom);
429 setOperationAction(ISD::FLOG, MVT::f64, Custom);
430 setOperationAction(ISD::FLOG10, MVT::f64, Custom);
431 setOperationAction(ISD::FEXP, MVT::f64, Custom);
432 setOperationAction(ISD::FSIN , MVT::f32, Custom);
433 setOperationAction(ISD::FCOS , MVT::f32, Custom);
434 setOperationAction(ISD::FPOW , MVT::f32, Custom);
435 setOperationAction(ISD::FLOG, MVT::f32, Custom);
436 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
437 setOperationAction(ISD::FEXP, MVT::f32, Custom);
438 }
439
440 if (Subtarget.hasSPE()) {
443 } else {
444 setOperationAction(ISD::FMA , MVT::f64, Legal);
445 setOperationAction(ISD::FMA , MVT::f32, Legal);
447 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
448 }
449
450 if (Subtarget.hasSPE())
451 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
452
453 // If we're enabling GP optimizations, use hardware square root
454 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
455 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
456
457 if (!Subtarget.hasFSQRT() &&
458 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
459 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
460
461 if (Subtarget.hasFCPSGN()) {
464 } else {
467 }
468
469 if (Subtarget.hasFPRND()) {
470 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
471 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
472 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
473 setOperationAction(ISD::FROUND, MVT::f64, Legal);
474
475 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
476 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
477 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
478 setOperationAction(ISD::FROUND, MVT::f32, Legal);
479 }
480
481 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482 // instruction xxbrd to speed up scalar BSWAP64.
483 if (Subtarget.isISA3_1()) {
486 } else {
489 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490 }
491
492 // CTPOP or CTTZ were introduced in P8/P9 respectively
493 if (Subtarget.isISA3_0()) {
494 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
495 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
496 } else {
497 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
498 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
499 }
500
501 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
504 } else {
507 }
508
509 // PowerPC does not have ROTR
512
513 if (!Subtarget.useCRBits()) {
514 // PowerPC does not have Select
519 }
520
521 // PowerPC wants to turn select_cc of FP into fsel when possible.
524
525 // PowerPC wants to optimize integer setcc a bit
526 if (!Subtarget.useCRBits())
528
529 if (Subtarget.hasFPU()) {
533
537 }
538
539 // PowerPC does not have BRCOND which requires SetCC
540 if (!Subtarget.useCRBits())
541 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
542
543 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
544
545 if (Subtarget.hasSPE()) {
546 // SPE has built-in conversions
553
554 // SPE supports signaling compare of f32/f64.
557 } else {
558 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
561
562 // PowerPC does not have [U|S]INT_TO_FP
567 }
568
569 if (Subtarget.hasDirectMove() && isPPC64) {
570 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
571 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
572 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
573 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
574
583 } else {
584 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
585 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
586 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
587 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
588 }
589
590 // We cannot sextinreg(i1). Expand to shifts.
592
593 // Custom handling for PowerPC ucmp instruction
595 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
596
597 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
599 // support continuation, user-level threading, and etc.. As a result, no
600 // other SjLj exception interfaces are implemented and please don't build
601 // your own exception handling based on them.
602 // LLVM/Clang supports zero-cost DWARF exception handling.
605
606 // We want to legalize GlobalAddress and ConstantPool nodes into the
607 // appropriate instructions to materialize the address.
618
619 // TRAP is legal.
620 setOperationAction(ISD::TRAP, MVT::Other, Legal);
621
622 // TRAMPOLINE is custom lowered.
623 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
624 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
625
626 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
627 setOperationAction(ISD::VASTART , MVT::Other, Custom);
628
629 if (Subtarget.is64BitELFABI()) {
630 // VAARG always uses double-word chunks, so promote anything smaller.
631 setOperationAction(ISD::VAARG, MVT::i1, Promote);
632 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
633 setOperationAction(ISD::VAARG, MVT::i8, Promote);
634 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
635 setOperationAction(ISD::VAARG, MVT::i16, Promote);
636 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
637 setOperationAction(ISD::VAARG, MVT::i32, Promote);
638 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
639 setOperationAction(ISD::VAARG, MVT::Other, Expand);
640 } else if (Subtarget.is32BitELFABI()) {
641 // VAARG is custom lowered with the 32-bit SVR4 ABI.
642 setOperationAction(ISD::VAARG, MVT::Other, Custom);
643 setOperationAction(ISD::VAARG, MVT::i64, Custom);
644 } else
645 setOperationAction(ISD::VAARG, MVT::Other, Expand);
646
647 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
648 if (Subtarget.is32BitELFABI())
649 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
650 else
651 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
652
653 // Use the default implementation.
654 setOperationAction(ISD::VAEND , MVT::Other, Expand);
655 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
656 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
657 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
658 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
659 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
660 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
663
664 if (Subtarget.isISA3_0() && isPPC64) {
665 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
666 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
667 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
668 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
669 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
670 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
671 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
672 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
673 }
674
675 // We want to custom lower some of our intrinsics.
681
682 // To handle counter-based loop conditions.
684
689
690 // Comparisons that require checking two conditions.
691 if (Subtarget.hasSPE()) {
696 }
709
712
713 if (Subtarget.has64BitSupport()) {
714 // They also have instructions for converting between i64 and fp.
723 // This is just the low 32 bits of a (signed) fp->i64 conversion.
724 // We cannot do this with Promote because i64 is not a legal type.
727
728 if (Subtarget.hasLFIWAX() || isPPC64) {
731 }
732 } else {
733 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
734 if (Subtarget.hasSPE()) {
737 } else {
740 }
741 }
742
743 // With the instructions enabled under FPCVT, we can do everything.
744 if (Subtarget.hasFPCVT()) {
745 if (Subtarget.has64BitSupport()) {
754 }
755
764 }
765
766 if (Subtarget.use64BitRegs()) {
767 // 64-bit PowerPC implementations can support i64 types directly
768 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
769 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
771 // 64-bit PowerPC wants to expand i128 shifts itself.
775 } else {
776 // 32-bit PowerPC wants to expand i64 shifts itself.
780 }
781
782 // PowerPC has better expansions for funnel shifts than the generic
783 // TargetLowering::expandFunnelShift.
784 if (Subtarget.has64BitSupport()) {
787 }
790
791 if (Subtarget.hasVSX()) {
792 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
793 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
794 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
795 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
798 }
799
800 if (Subtarget.hasAltivec()) {
801 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
806 }
807 // First set operation action for all vector types to expand. Then we
808 // will selectively turn on ones that can be effectively codegen'd.
810 // add/sub are legal for all supported vector VT's.
813
814 // For v2i64, these are only valid with P8Vector. This is corrected after
815 // the loop.
816 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
821 }
822 else {
827 }
828
829 if (Subtarget.hasVSX()) {
830 setOperationAction(ISD::FMAXNUM, VT, Legal);
831 setOperationAction(ISD::FMINNUM, VT, Legal);
832 }
833
834 // Vector instructions introduced in P8
835 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
838 }
839 else {
842 }
843
844 // Vector instructions introduced in P9
845 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
847 else
849
850 // We promote all shuffles to v16i8.
852 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
853
854 // We promote all non-typed operations to v4i32.
856 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
858 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
860 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
861 setOperationAction(ISD::LOAD , VT, Promote);
862 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
864 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
867 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
868 setOperationAction(ISD::STORE, VT, Promote);
869 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
870
871 // No other operations are legal.
879 setOperationAction(ISD::FNEG, VT, Expand);
880 setOperationAction(ISD::FSQRT, VT, Expand);
881 setOperationAction(ISD::FLOG, VT, Expand);
882 setOperationAction(ISD::FLOG10, VT, Expand);
883 setOperationAction(ISD::FLOG2, VT, Expand);
884 setOperationAction(ISD::FEXP, VT, Expand);
885 setOperationAction(ISD::FEXP2, VT, Expand);
886 setOperationAction(ISD::FSIN, VT, Expand);
887 setOperationAction(ISD::FCOS, VT, Expand);
888 setOperationAction(ISD::FABS, VT, Expand);
889 setOperationAction(ISD::FFLOOR, VT, Expand);
890 setOperationAction(ISD::FCEIL, VT, Expand);
891 setOperationAction(ISD::FTRUNC, VT, Expand);
892 setOperationAction(ISD::FRINT, VT, Expand);
893 setOperationAction(ISD::FLDEXP, VT, Expand);
894 setOperationAction(ISD::FNEARBYINT, VT, Expand);
905 setOperationAction(ISD::FPOW, VT, Expand);
910
911 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
912 setTruncStoreAction(VT, InnerVT, Expand);
915 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
916 }
917 }
919 if (!Subtarget.hasP8Vector()) {
920 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
921 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
922 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
923 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
924 }
925
926 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
927 // with merges, splats, etc.
929
930 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
931 // are cheap, so handle them before they get expanded to scalar.
937
938 setOperationAction(ISD::AND , MVT::v4i32, Legal);
939 setOperationAction(ISD::OR , MVT::v4i32, Legal);
940 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
941 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
943 Subtarget.useCRBits() ? Legal : Expand);
944 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
953 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
954 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
955 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
956 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
957
958 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
959 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
960 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
961 if (Subtarget.hasAltivec())
962 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
964 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
965 if (Subtarget.hasP8Altivec())
966 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
967
968 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
969 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
970 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
971 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
972
973 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
974 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
975
976 if (Subtarget.hasVSX()) {
977 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
978 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
980 }
981
982 if (Subtarget.hasP8Altivec())
983 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
984 else
985 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
986
987 if (Subtarget.isISA3_1()) {
988 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
989 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
990 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
991 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
992 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
993 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
994 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
995 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
996 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
997 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
998 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
999 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1000 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1001 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1002 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1003 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1004 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1005 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1006 }
1007
1008 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1009 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1010
1013 // LE is P8+/64-bit so direct moves are supported and these operations
1014 // are legal. The custom transformation requires 64-bit since we need a
1015 // pair of stores that will cover a 128-bit load for P10.
1016 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1020 }
1021
1026
1027 // Altivec does not contain unordered floating-point compare instructions
1028 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1029 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1030 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1031 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1032
1033 if (Subtarget.hasVSX()) {
1036 if (Subtarget.hasP8Vector()) {
1039 }
1040 if (Subtarget.hasDirectMove() && isPPC64) {
1049 }
1051
1052 // The nearbyint variants are not allowed to raise the inexact exception
1053 // so we can only code-gen them with fpexcept.ignore.
1056
1057 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1058 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1059 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1060 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1061 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1062 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1063 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1064 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1065
1066 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1067 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1068 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1069 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1070 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1071
1072 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1073 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1074
1075 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1076 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1077
1078 // Share the Altivec comparison restrictions.
1079 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1080 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1081 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1082 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1083
1084 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1085 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1086
1088
1089 if (Subtarget.hasP8Vector())
1090 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1091
1092 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1093
1094 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1095 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1096 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1097
1098 if (Subtarget.hasP8Altivec()) {
1099 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1100 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1101 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1102
1103 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1104 // SRL, but not for SRA because of the instructions available:
1105 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1106 // doing
1107 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1108 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1109 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1110
1111 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1112 }
1113 else {
1114 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1115 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1116 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1117
1118 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1119
1120 // VSX v2i64 only supports non-arithmetic operations.
1121 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1122 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1123 }
1124
1125 if (Subtarget.isISA3_1())
1126 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1127 else
1128 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1129
1130 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1131 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1132 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1133 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1134
1136
1145
1146 // Custom handling for partial vectors of integers converted to
1147 // floating point. We already have optimal handling for v2i32 through
1148 // the DAG combine, so those aren't necessary.
1165
1166 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1167 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1168 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1169 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1172
1175
1176 // Handle constrained floating-point operations of vector.
1177 // The predictor is `hasVSX` because altivec instruction has
1178 // no exception but VSX vector instruction has.
1192
1206
1207 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1208 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1209
1210 for (MVT FPT : MVT::fp_valuetypes())
1211 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1212
1213 // Expand the SELECT to SELECT_CC
1215
1216 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1217 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1218
1219 // No implementation for these ops for PowerPC.
1220 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1221 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1222 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1223 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1224 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1225 setOperationAction(ISD::FREM, MVT::f128, Expand);
1226 }
1227
1228 if (Subtarget.hasP8Altivec()) {
1229 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1230 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1231 }
1232
1233 if (Subtarget.hasP9Vector()) {
1236
1237 // Test data class instructions store results in CR bits.
1238 if (Subtarget.useCRBits()) {
1243 }
1244
1245 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1246 // SRL, but not for SRA because of the instructions available:
1247 // VS{RL} and VS{RL}O.
1248 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1249 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1250 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1251
1252 setOperationAction(ISD::FADD, MVT::f128, Legal);
1253 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1254 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1255 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1256 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1257
1258 setOperationAction(ISD::FMA, MVT::f128, Legal);
1265
1266 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1267 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1268 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1269 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1270 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1271 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1272
1275 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1276
1277 // Handle constrained floating-point operations of fp128
1293 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1294 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1295 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1296 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1297 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1298 } else if (Subtarget.hasVSX()) {
1299 setOperationAction(ISD::LOAD, MVT::f128, Promote);
1300 setOperationAction(ISD::STORE, MVT::f128, Promote);
1301
1302 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1303 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1304
1305 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1306 // fp_to_uint and int_to_fp.
1309
1310 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1311 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1312 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1313 setOperationAction(ISD::FABS, MVT::f128, Expand);
1314 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1315 setOperationAction(ISD::FMA, MVT::f128, Expand);
1317
1318 // Expand the fp_extend if the target type is fp128.
1319 setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1321
1322 // Expand the fp_round if the source type is fp128.
1323 for (MVT VT : {MVT::f32, MVT::f64}) {
1326 }
1327
1331 setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1332
1333 // Lower following f128 select_cc pattern:
1334 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1336
1337 // We need to handle f128 SELECT_CC with integer result type.
1339 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1340 }
1341
1342 if (Subtarget.hasP9Altivec()) {
1343 if (Subtarget.isISA3_1()) {
1348 } else {
1351 }
1359
1360 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1361 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1362 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1363 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1364 }
1365
1366 if (Subtarget.hasP10Vector()) {
1368 }
1369 }
1370
1371 if (Subtarget.pairedVectorMemops()) {
1372 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1373 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1374 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1375 }
1376 if (Subtarget.hasMMA()) {
1377 if (Subtarget.isISAFuture()) {
1378 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1379 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1380 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1381 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1382 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1383 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1384 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1385 } else {
1386 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1387 }
1388 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1389 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1391 }
1392
1393 if (Subtarget.has64BitSupport())
1394 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1395
1396 if (Subtarget.isISA3_1())
1397 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1398
1399 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1400
1401 if (!isPPC64) {
1402 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1403 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1404 }
1405
1407 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1408 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1410 }
1411
1413
1414 if (Subtarget.hasAltivec()) {
1415 // Altivec instructions set fields to all zeros or all ones.
1417 }
1418
1421 else if (isPPC64)
1423 else
1425
1426 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1427
1428 // We have target-specific dag combine patterns for the following nodes:
1431 if (Subtarget.hasFPCVT())
1433 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1434 if (Subtarget.useCRBits())
1435 setTargetDAGCombine(ISD::BRCOND);
1438
1440
1442
1443 if (Subtarget.useCRBits()) {
1445 }
1446
1447 // With 32 condition bits, we don't need to sink (and duplicate) compares
1448 // aggressively in CodeGenPrep.
1449 if (Subtarget.useCRBits()) {
1451 }
1452
1453 // TODO: The default entry number is set to 64. This stops most jump table
1454 // generation on PPC. But it is good for current PPC HWs because the indirect
1455 // branch instruction mtctr to the jump table may lead to bad branch predict.
1456 // Re-evaluate this value on future HWs that can do better with mtctr.
1458
1459 // The default minimum of largest number in a BitTest cluster is 3.
1461
1463 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1464
1465 auto CPUDirective = Subtarget.getCPUDirective();
1466 switch (CPUDirective) {
1467 default: break;
1468 case PPC::DIR_970:
1469 case PPC::DIR_A2:
1470 case PPC::DIR_E500:
1471 case PPC::DIR_E500mc:
1472 case PPC::DIR_E5500:
1473 case PPC::DIR_PWR4:
1474 case PPC::DIR_PWR5:
1475 case PPC::DIR_PWR5X:
1476 case PPC::DIR_PWR6:
1477 case PPC::DIR_PWR6X:
1478 case PPC::DIR_PWR7:
1479 case PPC::DIR_PWR8:
1480 case PPC::DIR_PWR9:
1481 case PPC::DIR_PWR10:
1482 case PPC::DIR_PWR11:
1486 break;
1487 }
1488
1489 if (Subtarget.enableMachineScheduler())
1491 else
1493
1495
1496 // The Freescale cores do better with aggressive inlining of memcpy and
1497 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1498 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1499 MaxStoresPerMemset = 32;
1501 MaxStoresPerMemcpy = 32;
1505 } else if (CPUDirective == PPC::DIR_A2) {
1506 // The A2 also benefits from (very) aggressive inlining of memcpy and
1507 // friends. The overhead of a the function call, even when warm, can be
1508 // over one hundred cycles.
1509 MaxStoresPerMemset = 128;
1510 MaxStoresPerMemcpy = 128;
1511 MaxStoresPerMemmove = 128;
1512 MaxLoadsPerMemcmp = 128;
1513 } else {
1516 }
1517
1518 // Enable generation of STXVP instructions by default for mcpu=future.
1519 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1520 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1521 DisableAutoPairedVecSt = false;
1522
1523 IsStrictFPEnabled = true;
1524
1525 // Let the subtarget (CPU) decide if a predictable select is more expensive
1526 // than the corresponding branch. This information is used in CGP to decide
1527 // when to convert selects into branches.
1528 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1529
1531}
1532
1533// *********************************** NOTE ************************************
1534// For selecting load and store instructions, the addressing modes are defined
1535// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1536// patterns to match the load the store instructions.
1537//
1538// The TD definitions for the addressing modes correspond to their respective
1539// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1540// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1541// address mode flags of a particular node. Afterwards, the computed address
1542// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1543// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1544// accordingly, based on the preferred addressing mode.
1545//
1546// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1547// MemOpFlags contains all the possible flags that can be used to compute the
1548// optimal addressing mode for load and store instructions.
1549// AddrMode contains all the possible load and store addressing modes available
1550// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1551//
1552// When adding new load and store instructions, it is possible that new address
1553// flags may need to be added into MemOpFlags, and a new addressing mode will
1554// need to be added to AddrMode. An entry of the new addressing mode (consisting
1555// of the minimal and main distinguishing address flags for the new load/store
1556// instructions) will need to be added into initializeAddrModeMap() below.
1557// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1558// need to be updated to account for selecting the optimal addressing mode.
1559// *****************************************************************************
1560/// Initialize the map that relates the different addressing modes of the load
1561/// and store instructions to a set of flags. This ensures the load/store
1562/// instruction is correctly matched during instruction selection.
1563void PPCTargetLowering::initializeAddrModeMap() {
1564 AddrModesMap[PPC::AM_DForm] = {
1565 // LWZ, STW
1570 // LBZ, LHZ, STB, STH
1575 // LHA
1580 // LFS, LFD, STFS, STFD
1585 };
1586 AddrModesMap[PPC::AM_DSForm] = {
1587 // LWA
1591 // LD, STD
1595 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1599 };
1600 AddrModesMap[PPC::AM_DQForm] = {
1601 // LXV, STXV
1605 };
1606 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1608 // TODO: Add mapping for quadword load/store.
1609}
1610
1611/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1612/// the desired ByVal argument alignment.
1613static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1614 if (MaxAlign == MaxMaxAlign)
1615 return;
1616 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1617 if (MaxMaxAlign >= 32 &&
1618 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1619 MaxAlign = Align(32);
1620 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1621 MaxAlign < 16)
1622 MaxAlign = Align(16);
1623 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1624 Align EltAlign;
1625 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1626 if (EltAlign > MaxAlign)
1627 MaxAlign = EltAlign;
1628 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1629 for (auto *EltTy : STy->elements()) {
1630 Align EltAlign;
1631 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1632 if (EltAlign > MaxAlign)
1633 MaxAlign = EltAlign;
1634 if (MaxAlign == MaxMaxAlign)
1635 break;
1636 }
1637 }
1638}
1639
1640/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1641/// function arguments in the caller parameter area.
1643 const DataLayout &DL) const {
1644 // 16byte and wider vectors are passed on 16byte boundary.
1645 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1646 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1647 if (Subtarget.hasAltivec())
1648 getMaxByValAlign(Ty, Alignment, Align(16));
1649 return Alignment;
1650}
1651
1653 return Subtarget.useSoftFloat();
1654}
1655
1657 return Subtarget.hasSPE();
1658}
1659
1661 return VT.isScalarInteger();
1662}
1663
1665 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1666 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1667 return false;
1668
1669 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1670 if (VTy->getScalarType()->isIntegerTy()) {
1671 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1672 if (ElemSizeInBits == 32) {
1673 Index = Subtarget.isLittleEndian() ? 2 : 1;
1674 return true;
1675 }
1676 if (ElemSizeInBits == 64) {
1677 Index = Subtarget.isLittleEndian() ? 1 : 0;
1678 return true;
1679 }
1680 }
1681 }
1682 return false;
1683}
1684
1686 EVT VT) const {
1687 if (!VT.isVector())
1688 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1689
1691}
1692
1694 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1695 return true;
1696}
1697
1698//===----------------------------------------------------------------------===//
1699// Node matching predicates, for use by the tblgen matching code.
1700//===----------------------------------------------------------------------===//
1701
1702/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1705 return CFP->getValueAPF().isZero();
1706 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1707 // Maybe this has already been legalized into the constant pool?
1708 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1709 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1710 return CFP->getValueAPF().isZero();
1711 }
1712 return false;
1713}
1714
1715/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1716/// true if Op is undef or if it matches the specified value.
1717static bool isConstantOrUndef(int Op, int Val) {
1718 return Op < 0 || Op == Val;
1719}
1720
1721/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1722/// VPKUHUM instruction.
1723/// The ShuffleKind distinguishes between big-endian operations with
1724/// two different inputs (0), either-endian operations with two identical
1725/// inputs (1), and little-endian operations with two different inputs (2).
1726/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1728 SelectionDAG &DAG) {
1729 bool IsLE = DAG.getDataLayout().isLittleEndian();
1730 if (ShuffleKind == 0) {
1731 if (IsLE)
1732 return false;
1733 for (unsigned i = 0; i != 16; ++i)
1734 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1735 return false;
1736 } else if (ShuffleKind == 2) {
1737 if (!IsLE)
1738 return false;
1739 for (unsigned i = 0; i != 16; ++i)
1740 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1741 return false;
1742 } else if (ShuffleKind == 1) {
1743 unsigned j = IsLE ? 0 : 1;
1744 for (unsigned i = 0; i != 8; ++i)
1745 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1746 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1747 return false;
1748 }
1749 return true;
1750}
1751
1752/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1753/// VPKUWUM instruction.
1754/// The ShuffleKind distinguishes between big-endian operations with
1755/// two different inputs (0), either-endian operations with two identical
1756/// inputs (1), and little-endian operations with two different inputs (2).
1757/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1759 SelectionDAG &DAG) {
1760 bool IsLE = DAG.getDataLayout().isLittleEndian();
1761 if (ShuffleKind == 0) {
1762 if (IsLE)
1763 return false;
1764 for (unsigned i = 0; i != 16; i += 2)
1765 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1766 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1767 return false;
1768 } else if (ShuffleKind == 2) {
1769 if (!IsLE)
1770 return false;
1771 for (unsigned i = 0; i != 16; i += 2)
1772 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1773 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1774 return false;
1775 } else if (ShuffleKind == 1) {
1776 unsigned j = IsLE ? 0 : 2;
1777 for (unsigned i = 0; i != 8; i += 2)
1778 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1779 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1780 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1781 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1782 return false;
1783 }
1784 return true;
1785}
1786
1787/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1788/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1789/// current subtarget.
1790///
1791/// The ShuffleKind distinguishes between big-endian operations with
1792/// two different inputs (0), either-endian operations with two identical
1793/// inputs (1), and little-endian operations with two different inputs (2).
1794/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1796 SelectionDAG &DAG) {
1797 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1798 if (!Subtarget.hasP8Vector())
1799 return false;
1800
1801 bool IsLE = DAG.getDataLayout().isLittleEndian();
1802 if (ShuffleKind == 0) {
1803 if (IsLE)
1804 return false;
1805 for (unsigned i = 0; i != 16; i += 4)
1806 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1807 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1808 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1809 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1810 return false;
1811 } else if (ShuffleKind == 2) {
1812 if (!IsLE)
1813 return false;
1814 for (unsigned i = 0; i != 16; i += 4)
1815 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1816 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1817 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1818 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1819 return false;
1820 } else if (ShuffleKind == 1) {
1821 unsigned j = IsLE ? 0 : 4;
1822 for (unsigned i = 0; i != 8; i += 4)
1823 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1824 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1825 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1826 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1827 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1828 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1829 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1830 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1831 return false;
1832 }
1833 return true;
1834}
1835
1836/// isVMerge - Common function, used to match vmrg* shuffles.
1837///
1838static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1839 unsigned LHSStart, unsigned RHSStart) {
1840 if (N->getValueType(0) != MVT::v16i8)
1841 return false;
1842 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1843 "Unsupported merge size!");
1844
1845 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1846 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1847 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1848 LHSStart+j+i*UnitSize) ||
1849 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1850 RHSStart+j+i*UnitSize))
1851 return false;
1852 }
1853 return true;
1854}
1855
1856/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1857/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1858/// The ShuffleKind distinguishes between big-endian merges with two
1859/// different inputs (0), either-endian merges with two identical inputs (1),
1860/// and little-endian merges with two different inputs (2). For the latter,
1861/// the input operands are swapped (see PPCInstrAltivec.td).
1863 unsigned ShuffleKind, SelectionDAG &DAG) {
1864 if (DAG.getDataLayout().isLittleEndian()) {
1865 if (ShuffleKind == 1) // unary
1866 return isVMerge(N, UnitSize, 0, 0);
1867 else if (ShuffleKind == 2) // swapped
1868 return isVMerge(N, UnitSize, 0, 16);
1869 else
1870 return false;
1871 } else {
1872 if (ShuffleKind == 1) // unary
1873 return isVMerge(N, UnitSize, 8, 8);
1874 else if (ShuffleKind == 0) // normal
1875 return isVMerge(N, UnitSize, 8, 24);
1876 else
1877 return false;
1878 }
1879}
1880
1881/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1882/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1883/// The ShuffleKind distinguishes between big-endian merges with two
1884/// different inputs (0), either-endian merges with two identical inputs (1),
1885/// and little-endian merges with two different inputs (2). For the latter,
1886/// the input operands are swapped (see PPCInstrAltivec.td).
1888 unsigned ShuffleKind, SelectionDAG &DAG) {
1889 if (DAG.getDataLayout().isLittleEndian()) {
1890 if (ShuffleKind == 1) // unary
1891 return isVMerge(N, UnitSize, 8, 8);
1892 else if (ShuffleKind == 2) // swapped
1893 return isVMerge(N, UnitSize, 8, 24);
1894 else
1895 return false;
1896 } else {
1897 if (ShuffleKind == 1) // unary
1898 return isVMerge(N, UnitSize, 0, 0);
1899 else if (ShuffleKind == 0) // normal
1900 return isVMerge(N, UnitSize, 0, 16);
1901 else
1902 return false;
1903 }
1904}
1905
1906/**
1907 * Common function used to match vmrgew and vmrgow shuffles
1908 *
1909 * The indexOffset determines whether to look for even or odd words in
1910 * the shuffle mask. This is based on the of the endianness of the target
1911 * machine.
1912 * - Little Endian:
1913 * - Use offset of 0 to check for odd elements
1914 * - Use offset of 4 to check for even elements
1915 * - Big Endian:
1916 * - Use offset of 0 to check for even elements
1917 * - Use offset of 4 to check for odd elements
1918 * A detailed description of the vector element ordering for little endian and
1919 * big endian can be found at
1920 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1921 * Targeting your applications - what little endian and big endian IBM XL C/C++
1922 * compiler differences mean to you
1923 *
1924 * The mask to the shuffle vector instruction specifies the indices of the
1925 * elements from the two input vectors to place in the result. The elements are
1926 * numbered in array-access order, starting with the first vector. These vectors
1927 * are always of type v16i8, thus each vector will contain 16 elements of size
1928 * 8. More info on the shuffle vector can be found in the
1929 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1930 * Language Reference.
1931 *
1932 * The RHSStartValue indicates whether the same input vectors are used (unary)
1933 * or two different input vectors are used, based on the following:
1934 * - If the instruction uses the same vector for both inputs, the range of the
1935 * indices will be 0 to 15. In this case, the RHSStart value passed should
1936 * be 0.
1937 * - If the instruction has two different vectors then the range of the
1938 * indices will be 0 to 31. In this case, the RHSStart value passed should
1939 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1940 * to 31 specify elements in the second vector).
1941 *
1942 * \param[in] N The shuffle vector SD Node to analyze
1943 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1944 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1945 * vector to the shuffle_vector instruction
1946 * \return true iff this shuffle vector represents an even or odd word merge
1947 */
1948static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1949 unsigned RHSStartValue) {
1950 if (N->getValueType(0) != MVT::v16i8)
1951 return false;
1952
1953 for (unsigned i = 0; i < 2; ++i)
1954 for (unsigned j = 0; j < 4; ++j)
1955 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1956 i*RHSStartValue+j+IndexOffset) ||
1957 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1958 i*RHSStartValue+j+IndexOffset+8))
1959 return false;
1960 return true;
1961}
1962
1963/**
1964 * Determine if the specified shuffle mask is suitable for the vmrgew or
1965 * vmrgow instructions.
1966 *
1967 * \param[in] N The shuffle vector SD Node to analyze
1968 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1969 * \param[in] ShuffleKind Identify the type of merge:
1970 * - 0 = big-endian merge with two different inputs;
1971 * - 1 = either-endian merge with two identical inputs;
1972 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1973 * little-endian merges).
1974 * \param[in] DAG The current SelectionDAG
1975 * \return true iff this shuffle mask
1976 */
1978 unsigned ShuffleKind, SelectionDAG &DAG) {
1979 if (DAG.getDataLayout().isLittleEndian()) {
1980 unsigned indexOffset = CheckEven ? 4 : 0;
1981 if (ShuffleKind == 1) // Unary
1982 return isVMerge(N, indexOffset, 0);
1983 else if (ShuffleKind == 2) // swapped
1984 return isVMerge(N, indexOffset, 16);
1985 else
1986 return false;
1987 }
1988 else {
1989 unsigned indexOffset = CheckEven ? 0 : 4;
1990 if (ShuffleKind == 1) // Unary
1991 return isVMerge(N, indexOffset, 0);
1992 else if (ShuffleKind == 0) // Normal
1993 return isVMerge(N, indexOffset, 16);
1994 else
1995 return false;
1996 }
1997 return false;
1998}
1999
2000/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2001/// amount, otherwise return -1.
2002/// The ShuffleKind distinguishes between big-endian operations with two
2003/// different inputs (0), either-endian operations with two identical inputs
2004/// (1), and little-endian operations with two different inputs (2). For the
2005/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2006int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2007 SelectionDAG &DAG) {
2008 if (N->getValueType(0) != MVT::v16i8)
2009 return -1;
2010
2012
2013 // Find the first non-undef value in the shuffle mask.
2014 unsigned i;
2015 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2016 /*search*/;
2017
2018 if (i == 16) return -1; // all undef.
2019
2020 // Otherwise, check to see if the rest of the elements are consecutively
2021 // numbered from this value.
2022 unsigned ShiftAmt = SVOp->getMaskElt(i);
2023 if (ShiftAmt < i) return -1;
2024
2025 ShiftAmt -= i;
2026 bool isLE = DAG.getDataLayout().isLittleEndian();
2027
2028 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2029 // Check the rest of the elements to see if they are consecutive.
2030 for (++i; i != 16; ++i)
2031 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2032 return -1;
2033 } else if (ShuffleKind == 1) {
2034 // Check the rest of the elements to see if they are consecutive.
2035 for (++i; i != 16; ++i)
2036 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2037 return -1;
2038 } else
2039 return -1;
2040
2041 if (isLE)
2042 ShiftAmt = 16 - ShiftAmt;
2043
2044 return ShiftAmt;
2045}
2046
2047/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2048/// specifies a splat of a single element that is suitable for input to
2049/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2051 EVT VT = N->getValueType(0);
2052 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2053 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2054
2055 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2056 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2057
2058 // The consecutive indices need to specify an element, not part of two
2059 // different elements. So abandon ship early if this isn't the case.
2060 if (N->getMaskElt(0) % EltSize != 0)
2061 return false;
2062
2063 // This is a splat operation if each element of the permute is the same, and
2064 // if the value doesn't reference the second vector.
2065 unsigned ElementBase = N->getMaskElt(0);
2066
2067 // FIXME: Handle UNDEF elements too!
2068 if (ElementBase >= 16)
2069 return false;
2070
2071 // Check that the indices are consecutive, in the case of a multi-byte element
2072 // splatted with a v16i8 mask.
2073 for (unsigned i = 1; i != EltSize; ++i)
2074 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2075 return false;
2076
2077 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2078 // An UNDEF element is a sequence of UNDEF bytes.
2079 if (N->getMaskElt(i) < 0) {
2080 for (unsigned j = 1; j != EltSize; ++j)
2081 if (N->getMaskElt(i + j) >= 0)
2082 return false;
2083 } else
2084 for (unsigned j = 0; j != EltSize; ++j)
2085 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2086 return false;
2087 }
2088 return true;
2089}
2090
2091/// Check that the mask is shuffling N byte elements. Within each N byte
2092/// element of the mask, the indices could be either in increasing or
2093/// decreasing order as long as they are consecutive.
2094/// \param[in] N the shuffle vector SD Node to analyze
2095/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2096/// Word/DoubleWord/QuadWord).
2097/// \param[in] StepLen the delta indices number among the N byte element, if
2098/// the mask is in increasing/decreasing order then it is 1/-1.
2099/// \return true iff the mask is shuffling N byte elements.
2100static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2101 int StepLen) {
2102 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2103 "Unexpected element width.");
2104 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2105
2106 unsigned NumOfElem = 16 / Width;
2107 unsigned MaskVal[16]; // Width is never greater than 16
2108 for (unsigned i = 0; i < NumOfElem; ++i) {
2109 MaskVal[0] = N->getMaskElt(i * Width);
2110 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2111 return false;
2112 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2113 return false;
2114 }
2115
2116 for (unsigned int j = 1; j < Width; ++j) {
2117 MaskVal[j] = N->getMaskElt(i * Width + j);
2118 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2119 return false;
2120 }
2121 }
2122 }
2123
2124 return true;
2125}
2126
2127bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2128 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2129 if (!isNByteElemShuffleMask(N, 4, 1))
2130 return false;
2131
2132 // Now we look at mask elements 0,4,8,12
2133 unsigned M0 = N->getMaskElt(0) / 4;
2134 unsigned M1 = N->getMaskElt(4) / 4;
2135 unsigned M2 = N->getMaskElt(8) / 4;
2136 unsigned M3 = N->getMaskElt(12) / 4;
2137 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2138 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2139
2140 // Below, let H and L be arbitrary elements of the shuffle mask
2141 // where H is in the range [4,7] and L is in the range [0,3].
2142 // H, 1, 2, 3 or L, 5, 6, 7
2143 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2144 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2145 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2146 InsertAtByte = IsLE ? 12 : 0;
2147 Swap = M0 < 4;
2148 return true;
2149 }
2150 // 0, H, 2, 3 or 4, L, 6, 7
2151 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2152 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2153 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2154 InsertAtByte = IsLE ? 8 : 4;
2155 Swap = M1 < 4;
2156 return true;
2157 }
2158 // 0, 1, H, 3 or 4, 5, L, 7
2159 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2160 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2161 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2162 InsertAtByte = IsLE ? 4 : 8;
2163 Swap = M2 < 4;
2164 return true;
2165 }
2166 // 0, 1, 2, H or 4, 5, 6, L
2167 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2168 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2169 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2170 InsertAtByte = IsLE ? 0 : 12;
2171 Swap = M3 < 4;
2172 return true;
2173 }
2174
2175 // If both vector operands for the shuffle are the same vector, the mask will
2176 // contain only elements from the first one and the second one will be undef.
2177 if (N->getOperand(1).isUndef()) {
2178 ShiftElts = 0;
2179 Swap = true;
2180 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2181 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2182 InsertAtByte = IsLE ? 12 : 0;
2183 return true;
2184 }
2185 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2186 InsertAtByte = IsLE ? 8 : 4;
2187 return true;
2188 }
2189 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2190 InsertAtByte = IsLE ? 4 : 8;
2191 return true;
2192 }
2193 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2194 InsertAtByte = IsLE ? 0 : 12;
2195 return true;
2196 }
2197 }
2198
2199 return false;
2200}
2201
2203 bool &Swap, bool IsLE) {
2204 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2205 // Ensure each byte index of the word is consecutive.
2206 if (!isNByteElemShuffleMask(N, 4, 1))
2207 return false;
2208
2209 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2210 unsigned M0 = N->getMaskElt(0) / 4;
2211 unsigned M1 = N->getMaskElt(4) / 4;
2212 unsigned M2 = N->getMaskElt(8) / 4;
2213 unsigned M3 = N->getMaskElt(12) / 4;
2214
2215 // If both vector operands for the shuffle are the same vector, the mask will
2216 // contain only elements from the first one and the second one will be undef.
2217 if (N->getOperand(1).isUndef()) {
2218 assert(M0 < 4 && "Indexing into an undef vector?");
2219 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2220 return false;
2221
2222 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2223 Swap = false;
2224 return true;
2225 }
2226
2227 // Ensure each word index of the ShuffleVector Mask is consecutive.
2228 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2229 return false;
2230
2231 if (IsLE) {
2232 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2233 // Input vectors don't need to be swapped if the leading element
2234 // of the result is one of the 3 left elements of the second vector
2235 // (or if there is no shift to be done at all).
2236 Swap = false;
2237 ShiftElts = (8 - M0) % 8;
2238 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2239 // Input vectors need to be swapped if the leading element
2240 // of the result is one of the 3 left elements of the first vector
2241 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2242 Swap = true;
2243 ShiftElts = (4 - M0) % 4;
2244 }
2245
2246 return true;
2247 } else { // BE
2248 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2249 // Input vectors don't need to be swapped if the leading element
2250 // of the result is one of the 4 elements of the first vector.
2251 Swap = false;
2252 ShiftElts = M0;
2253 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2254 // Input vectors need to be swapped if the leading element
2255 // of the result is one of the 4 elements of the right vector.
2256 Swap = true;
2257 ShiftElts = M0 - 4;
2258 }
2259
2260 return true;
2261 }
2262}
2263
2265 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2266
2267 if (!isNByteElemShuffleMask(N, Width, -1))
2268 return false;
2269
2270 for (int i = 0; i < 16; i += Width)
2271 if (N->getMaskElt(i) != i + Width - 1)
2272 return false;
2273
2274 return true;
2275}
2276
2280
2284
2288
2292
2293/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2294/// if the inputs to the instruction should be swapped and set \p DM to the
2295/// value for the immediate.
2296/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2297/// AND element 0 of the result comes from the first input (LE) or second input
2298/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2299/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2300/// mask.
2302 bool &Swap, bool IsLE) {
2303 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2304
2305 // Ensure each byte index of the double word is consecutive.
2306 if (!isNByteElemShuffleMask(N, 8, 1))
2307 return false;
2308
2309 unsigned M0 = N->getMaskElt(0) / 8;
2310 unsigned M1 = N->getMaskElt(8) / 8;
2311 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2312
2313 // If both vector operands for the shuffle are the same vector, the mask will
2314 // contain only elements from the first one and the second one will be undef.
2315 if (N->getOperand(1).isUndef()) {
2316 if ((M0 | M1) < 2) {
2317 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2318 Swap = false;
2319 return true;
2320 } else
2321 return false;
2322 }
2323
2324 if (IsLE) {
2325 if (M0 > 1 && M1 < 2) {
2326 Swap = false;
2327 } else if (M0 < 2 && M1 > 1) {
2328 M0 = (M0 + 2) % 4;
2329 M1 = (M1 + 2) % 4;
2330 Swap = true;
2331 } else
2332 return false;
2333
2334 // Note: if control flow comes here that means Swap is already set above
2335 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2336 return true;
2337 } else { // BE
2338 if (M0 < 2 && M1 > 1) {
2339 Swap = false;
2340 } else if (M0 > 1 && M1 < 2) {
2341 M0 = (M0 + 2) % 4;
2342 M1 = (M1 + 2) % 4;
2343 Swap = true;
2344 } else
2345 return false;
2346
2347 // Note: if control flow comes here that means Swap is already set above
2348 DM = (M0 << 1) + (M1 & 1);
2349 return true;
2350 }
2351}
2352
2353
2354/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2355/// appropriate for PPC mnemonics (which have a big endian bias - namely
2356/// elements are counted from the left of the vector register).
2357unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2358 SelectionDAG &DAG) {
2360 assert(isSplatShuffleMask(SVOp, EltSize));
2361 EVT VT = SVOp->getValueType(0);
2362
2363 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2364 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2365 : SVOp->getMaskElt(0);
2366
2367 if (DAG.getDataLayout().isLittleEndian())
2368 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2369 else
2370 return SVOp->getMaskElt(0) / EltSize;
2371}
2372
2373/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2374/// by using a vspltis[bhw] instruction of the specified element size, return
2375/// the constant being splatted. The ByteSize field indicates the number of
2376/// bytes of each element [124] -> [bhw].
2378 SDValue OpVal;
2379
2380 // If ByteSize of the splat is bigger than the element size of the
2381 // build_vector, then we have a case where we are checking for a splat where
2382 // multiple elements of the buildvector are folded together into a single
2383 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2384 unsigned EltSize = 16/N->getNumOperands();
2385 if (EltSize < ByteSize) {
2386 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2387 SDValue UniquedVals[4];
2388 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2389
2390 // See if all of the elements in the buildvector agree across.
2391 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2392 if (N->getOperand(i).isUndef()) continue;
2393 // If the element isn't a constant, bail fully out.
2394 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2395
2396 if (!UniquedVals[i&(Multiple-1)].getNode())
2397 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2398 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2399 return SDValue(); // no match.
2400 }
2401
2402 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2403 // either constant or undef values that are identical for each chunk. See
2404 // if these chunks can form into a larger vspltis*.
2405
2406 // Check to see if all of the leading entries are either 0 or -1. If
2407 // neither, then this won't fit into the immediate field.
2408 bool LeadingZero = true;
2409 bool LeadingOnes = true;
2410 for (unsigned i = 0; i != Multiple-1; ++i) {
2411 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2412
2413 LeadingZero &= isNullConstant(UniquedVals[i]);
2414 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2415 }
2416 // Finally, check the least significant entry.
2417 if (LeadingZero) {
2418 if (!UniquedVals[Multiple-1].getNode())
2419 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2420 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2421 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2422 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2423 }
2424 if (LeadingOnes) {
2425 if (!UniquedVals[Multiple-1].getNode())
2426 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2427 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2428 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2429 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2430 }
2431
2432 return SDValue();
2433 }
2434
2435 // Check to see if this buildvec has a single non-undef value in its elements.
2436 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2437 if (N->getOperand(i).isUndef()) continue;
2438 if (!OpVal.getNode())
2439 OpVal = N->getOperand(i);
2440 else if (OpVal != N->getOperand(i))
2441 return SDValue();
2442 }
2443
2444 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2445
2446 unsigned ValSizeInBytes = EltSize;
2447 uint64_t Value = 0;
2448 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2449 Value = CN->getZExtValue();
2450 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2451 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2452 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2453 }
2454
2455 // If the splat value is larger than the element value, then we can never do
2456 // this splat. The only case that we could fit the replicated bits into our
2457 // immediate field for would be zero, and we prefer to use vxor for it.
2458 if (ValSizeInBytes < ByteSize) return SDValue();
2459
2460 // If the element value is larger than the splat value, check if it consists
2461 // of a repeated bit pattern of size ByteSize.
2462 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2463 return SDValue();
2464
2465 // Properly sign extend the value.
2466 int MaskVal = SignExtend32(Value, ByteSize * 8);
2467
2468 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2469 if (MaskVal == 0) return SDValue();
2470
2471 // Finally, if this value fits in a 5 bit sext field, return it
2472 if (SignExtend32<5>(MaskVal) == MaskVal)
2473 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2474 return SDValue();
2475}
2476
2477//===----------------------------------------------------------------------===//
2478// Addressing Mode Selection
2479//===----------------------------------------------------------------------===//
2480
2481/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2482/// or 64-bit immediate, and if the value can be accurately represented as a
2483/// sign extension from a 16-bit value. If so, this returns true and the
2484/// immediate.
2485bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2486 if (!isa<ConstantSDNode>(N))
2487 return false;
2488
2489 Imm = (int16_t)N->getAsZExtVal();
2490 if (N->getValueType(0) == MVT::i32)
2491 return Imm == (int32_t)N->getAsZExtVal();
2492 else
2493 return Imm == (int64_t)N->getAsZExtVal();
2494}
2496 return isIntS16Immediate(Op.getNode(), Imm);
2497}
2498
2499/// Used when computing address flags for selecting loads and stores.
2500/// If we have an OR, check if the LHS and RHS are provably disjoint.
2501/// An OR of two provably disjoint values is equivalent to an ADD.
2502/// Most PPC load/store instructions compute the effective address as a sum,
2503/// so doing this conversion is useful.
2504static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2505 if (N.getOpcode() != ISD::OR)
2506 return false;
2507 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2508 if (!LHSKnown.Zero.getBoolValue())
2509 return false;
2510 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2511 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2512}
2513
2514/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2515/// be represented as an indexed [r+r] operation.
2517 SDValue &Index,
2518 SelectionDAG &DAG) const {
2519 for (SDNode *U : N->users()) {
2520 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2521 if (Memop->getMemoryVT() == MVT::f64) {
2522 Base = N.getOperand(0);
2523 Index = N.getOperand(1);
2524 return true;
2525 }
2526 }
2527 }
2528 return false;
2529}
2530
2531/// isIntS34Immediate - This method tests if value of node given can be
2532/// accurately represented as a sign extension from a 34-bit value. If so,
2533/// this returns true and the immediate.
2534bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2535 if (!isa<ConstantSDNode>(N))
2536 return false;
2537
2538 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2539 return isInt<34>(Imm);
2540}
2542 return isIntS34Immediate(Op.getNode(), Imm);
2543}
2544
2545/// SelectAddressRegReg - Given the specified addressed, check to see if it
2546/// can be represented as an indexed [r+r] operation. Returns false if it
2547/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2548/// non-zero and N can be represented by a base register plus a signed 16-bit
2549/// displacement, make a more precise judgement by checking (displacement % \p
2550/// EncodingAlignment).
2552 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2553 MaybeAlign EncodingAlignment) const {
2554 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2555 // a [pc+imm].
2557 return false;
2558
2559 int16_t Imm = 0;
2560 if (N.getOpcode() == ISD::ADD) {
2561 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2562 // SPE load/store can only handle 8-bit offsets.
2563 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2564 return true;
2565 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2566 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2567 return false; // r+i
2568 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2569 return false; // r+i
2570
2571 Base = N.getOperand(0);
2572 Index = N.getOperand(1);
2573 return true;
2574 } else if (N.getOpcode() == ISD::OR) {
2575 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2576 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2577 return false; // r+i can fold it if we can.
2578
2579 // If this is an or of disjoint bitfields, we can codegen this as an add
2580 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2581 // disjoint.
2582 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2583
2584 if (LHSKnown.Zero.getBoolValue()) {
2585 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2586 // If all of the bits are known zero on the LHS or RHS, the add won't
2587 // carry.
2588 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2589 Base = N.getOperand(0);
2590 Index = N.getOperand(1);
2591 return true;
2592 }
2593 }
2594 }
2595
2596 return false;
2597}
2598
2599// If we happen to be doing an i64 load or store into a stack slot that has
2600// less than a 4-byte alignment, then the frame-index elimination may need to
2601// use an indexed load or store instruction (because the offset may not be a
2602// multiple of 4). The extra register needed to hold the offset comes from the
2603// register scavenger, and it is possible that the scavenger will need to use
2604// an emergency spill slot. As a result, we need to make sure that a spill slot
2605// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2606// stack slot.
2607static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2608 // FIXME: This does not handle the LWA case.
2609 if (VT != MVT::i64)
2610 return;
2611
2612 // NOTE: We'll exclude negative FIs here, which come from argument
2613 // lowering, because there are no known test cases triggering this problem
2614 // using packed structures (or similar). We can remove this exclusion if
2615 // we find such a test case. The reason why this is so test-case driven is
2616 // because this entire 'fixup' is only to prevent crashes (from the
2617 // register scavenger) on not-really-valid inputs. For example, if we have:
2618 // %a = alloca i1
2619 // %b = bitcast i1* %a to i64*
2620 // store i64* a, i64 b
2621 // then the store should really be marked as 'align 1', but is not. If it
2622 // were marked as 'align 1' then the indexed form would have been
2623 // instruction-selected initially, and the problem this 'fixup' is preventing
2624 // won't happen regardless.
2625 if (FrameIdx < 0)
2626 return;
2627
2629 MachineFrameInfo &MFI = MF.getFrameInfo();
2630
2631 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2632 return;
2633
2634 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2635 FuncInfo->setHasNonRISpills();
2636}
2637
2638/// Returns true if the address N can be represented by a base register plus
2639/// a signed 16-bit displacement [r+imm], and if it is not better
2640/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2641/// displacements that are multiples of that value.
2643 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2644 MaybeAlign EncodingAlignment) const {
2645 // FIXME dl should come from parent load or store, not from address
2646 SDLoc dl(N);
2647
2648 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2649 // a [pc+imm].
2651 return false;
2652
2653 // If this can be more profitably realized as r+r, fail.
2654 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2655 return false;
2656
2657 if (N.getOpcode() == ISD::ADD) {
2658 int16_t imm = 0;
2659 if (isIntS16Immediate(N.getOperand(1), imm) &&
2660 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2661 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2662 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2663 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2664 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2665 } else {
2666 Base = N.getOperand(0);
2667 }
2668 return true; // [r+i]
2669 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2670 // Match LOAD (ADD (X, Lo(G))).
2671 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2672 "Cannot handle constant offsets yet!");
2673 Disp = N.getOperand(1).getOperand(0); // The global address.
2678 Base = N.getOperand(0);
2679 return true; // [&g+r]
2680 }
2681 } else if (N.getOpcode() == ISD::OR) {
2682 int16_t imm = 0;
2683 if (isIntS16Immediate(N.getOperand(1), imm) &&
2684 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2685 // If this is an or of disjoint bitfields, we can codegen this as an add
2686 // (for better address arithmetic) if the LHS and RHS of the OR are
2687 // provably disjoint.
2688 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2689
2690 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2691 // If all of the bits are known zero on the LHS or RHS, the add won't
2692 // carry.
2693 if (FrameIndexSDNode *FI =
2694 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2695 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2696 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2697 } else {
2698 Base = N.getOperand(0);
2699 }
2700 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2701 return true;
2702 }
2703 }
2704 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2705 // Loading from a constant address.
2706
2707 // If this address fits entirely in a 16-bit sext immediate field, codegen
2708 // this as "d, 0"
2709 int16_t Imm;
2710 if (isIntS16Immediate(CN, Imm) &&
2711 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2712 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2713 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2714 CN->getValueType(0));
2715 return true;
2716 }
2717
2718 // Handle 32-bit sext immediates with LIS + addr mode.
2719 if ((CN->getValueType(0) == MVT::i32 ||
2720 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2721 (!EncodingAlignment ||
2722 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2723 int Addr = (int)CN->getZExtValue();
2724
2725 // Otherwise, break this down into an LIS + disp.
2726 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2727
2728 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2729 MVT::i32);
2730 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2731 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2732 return true;
2733 }
2734 }
2735
2736 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2738 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2739 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2740 } else
2741 Base = N;
2742 return true; // [r+0]
2743}
2744
2745/// Similar to the 16-bit case but for instructions that take a 34-bit
2746/// displacement field (prefixed loads/stores).
2748 SDValue &Base,
2749 SelectionDAG &DAG) const {
2750 // Only on 64-bit targets.
2751 if (N.getValueType() != MVT::i64)
2752 return false;
2753
2754 SDLoc dl(N);
2755 int64_t Imm = 0;
2756
2757 if (N.getOpcode() == ISD::ADD) {
2758 if (!isIntS34Immediate(N.getOperand(1), Imm))
2759 return false;
2760 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2761 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2762 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2763 else
2764 Base = N.getOperand(0);
2765 return true;
2766 }
2767
2768 if (N.getOpcode() == ISD::OR) {
2769 if (!isIntS34Immediate(N.getOperand(1), Imm))
2770 return false;
2771 // If this is an or of disjoint bitfields, we can codegen this as an add
2772 // (for better address arithmetic) if the LHS and RHS of the OR are
2773 // provably disjoint.
2774 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2775 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2776 return false;
2777 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2778 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2779 else
2780 Base = N.getOperand(0);
2781 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2782 return true;
2783 }
2784
2785 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2786 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2787 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2788 return true;
2789 }
2790
2791 return false;
2792}
2793
2794/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2795/// represented as an indexed [r+r] operation.
2797 SDValue &Index,
2798 SelectionDAG &DAG) const {
2799 // Check to see if we can easily represent this as an [r+r] address. This
2800 // will fail if it thinks that the address is more profitably represented as
2801 // reg+imm, e.g. where imm = 0.
2802 if (SelectAddressRegReg(N, Base, Index, DAG))
2803 return true;
2804
2805 // If the address is the result of an add, we will utilize the fact that the
2806 // address calculation includes an implicit add. However, we can reduce
2807 // register pressure if we do not materialize a constant just for use as the
2808 // index register. We only get rid of the add if it is not an add of a
2809 // value and a 16-bit signed constant and both have a single use.
2810 int16_t imm = 0;
2811 if (N.getOpcode() == ISD::ADD &&
2812 (!isIntS16Immediate(N.getOperand(1), imm) ||
2813 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2814 Base = N.getOperand(0);
2815 Index = N.getOperand(1);
2816 return true;
2817 }
2818
2819 // Otherwise, do it the hard way, using R0 as the base register.
2820 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2821 N.getValueType());
2822 Index = N;
2823 return true;
2824}
2825
2826template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2827 Ty *PCRelCand = dyn_cast<Ty>(N);
2828 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2829}
2830
2831/// Returns true if this address is a PC Relative address.
2832/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2833/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2835 // This is a materialize PC Relative node. Always select this as PC Relative.
2836 Base = N;
2837 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2838 return true;
2843 return true;
2844 return false;
2845}
2846
2847/// Returns true if we should use a direct load into vector instruction
2848/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2849static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2850
2851 // If there are any other uses other than scalar to vector, then we should
2852 // keep it as a scalar load -> direct move pattern to prevent multiple
2853 // loads.
2855 if (!LD)
2856 return false;
2857
2858 EVT MemVT = LD->getMemoryVT();
2859 if (!MemVT.isSimple())
2860 return false;
2861 switch(MemVT.getSimpleVT().SimpleTy) {
2862 case MVT::i64:
2863 break;
2864 case MVT::i32:
2865 if (!ST.hasP8Vector())
2866 return false;
2867 break;
2868 case MVT::i16:
2869 case MVT::i8:
2870 if (!ST.hasP9Vector())
2871 return false;
2872 break;
2873 default:
2874 return false;
2875 }
2876
2877 SDValue LoadedVal(N, 0);
2878 if (!LoadedVal.hasOneUse())
2879 return false;
2880
2881 for (SDUse &Use : LD->uses())
2882 if (Use.getResNo() == 0 &&
2883 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2884 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2885 return false;
2886
2887 return true;
2888}
2889
2890/// getPreIndexedAddressParts - returns true by value, base pointer and
2891/// offset pointer and addressing mode by reference if the node's address
2892/// can be legally represented as pre-indexed load / store address.
2894 SDValue &Offset,
2896 SelectionDAG &DAG) const {
2897 if (DisablePPCPreinc) return false;
2898
2899 bool isLoad = true;
2900 SDValue Ptr;
2901 EVT VT;
2902 Align Alignment;
2903 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2904 Ptr = LD->getBasePtr();
2905 VT = LD->getMemoryVT();
2906 Alignment = LD->getAlign();
2907 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2908 Ptr = ST->getBasePtr();
2909 VT = ST->getMemoryVT();
2910 Alignment = ST->getAlign();
2911 isLoad = false;
2912 } else
2913 return false;
2914
2915 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2916 // instructions because we can fold these into a more efficient instruction
2917 // instead, (such as LXSD).
2918 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2919 return false;
2920 }
2921
2922 // PowerPC doesn't have preinc load/store instructions for vectors
2923 if (VT.isVector())
2924 return false;
2925
2926 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2927 // Common code will reject creating a pre-inc form if the base pointer
2928 // is a frame index, or if N is a store and the base pointer is either
2929 // the same as or a predecessor of the value being stored. Check for
2930 // those situations here, and try with swapped Base/Offset instead.
2931 bool Swap = false;
2932
2934 Swap = true;
2935 else if (!isLoad) {
2936 SDValue Val = cast<StoreSDNode>(N)->getValue();
2937 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2938 Swap = true;
2939 }
2940
2941 if (Swap)
2943
2944 AM = ISD::PRE_INC;
2945 return true;
2946 }
2947
2948 // LDU/STU can only handle immediates that are a multiple of 4.
2949 if (VT != MVT::i64) {
2950 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2951 return false;
2952 } else {
2953 // LDU/STU need an address with at least 4-byte alignment.
2954 if (Alignment < Align(4))
2955 return false;
2956
2957 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2958 return false;
2959 }
2960
2961 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2962 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2963 // sext i32 to i64 when addr mode is r+i.
2964 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2965 LD->getExtensionType() == ISD::SEXTLOAD &&
2967 return false;
2968 }
2969
2970 AM = ISD::PRE_INC;
2971 return true;
2972}
2973
2974//===----------------------------------------------------------------------===//
2975// LowerOperation implementation
2976//===----------------------------------------------------------------------===//
2977
2978/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2979/// and LoOpFlags to the target MO flags.
2980static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2981 unsigned &HiOpFlags, unsigned &LoOpFlags,
2982 const GlobalValue *GV = nullptr) {
2983 HiOpFlags = PPCII::MO_HA;
2984 LoOpFlags = PPCII::MO_LO;
2985
2986 // Don't use the pic base if not in PIC relocation model.
2987 if (IsPIC) {
2988 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
2989 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
2990 }
2991}
2992
2993static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2994 SelectionDAG &DAG) {
2995 SDLoc DL(HiPart);
2996 EVT PtrVT = HiPart.getValueType();
2997 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2998
2999 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3000 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3001
3002 // With PIC, the first instruction is actually "GR+hi(&G)".
3003 if (isPIC)
3004 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3005 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3006
3007 // Generate non-pic code that has direct accesses to the constant pool.
3008 // The address of the global is just (hi(&g)+lo(&g)).
3009 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3010}
3011
3013 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3014 FuncInfo->setUsesTOCBasePtr();
3015}
3016
3020
3021SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3022 SDValue GA) const {
3023 EVT VT = Subtarget.getScalarIntVT();
3024 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3025 : Subtarget.isAIXABI()
3026 ? DAG.getRegister(PPC::R2, VT)
3027 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3028 SDValue Ops[] = { GA, Reg };
3029 return DAG.getMemIntrinsicNode(
3030 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3033}
3034
3035SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3036 SelectionDAG &DAG) const {
3037 EVT PtrVT = Op.getValueType();
3038 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3039 const Constant *C = CP->getConstVal();
3040
3041 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3042 // The actual address of the GlobalValue is stored in the TOC.
3043 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3044 if (Subtarget.isUsingPCRelativeCalls()) {
3045 SDLoc DL(CP);
3046 EVT Ty = getPointerTy(DAG.getDataLayout());
3047 SDValue ConstPool = DAG.getTargetConstantPool(
3048 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3049 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3050 }
3051 setUsesTOCBasePtr(DAG);
3052 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3053 return getTOCEntry(DAG, SDLoc(CP), GA);
3054 }
3055
3056 unsigned MOHiFlag, MOLoFlag;
3057 bool IsPIC = isPositionIndependent();
3058 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3059
3060 if (IsPIC && Subtarget.isSVR4ABI()) {
3061 SDValue GA =
3062 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3063 return getTOCEntry(DAG, SDLoc(CP), GA);
3064 }
3065
3066 SDValue CPIHi =
3067 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3068 SDValue CPILo =
3069 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3070 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3071}
3072
3073// For 64-bit PowerPC, prefer the more compact relative encodings.
3074// This trades 32 bits per jump table entry for one or two instructions
3075// on the jump site.
3082
3085 return false;
3086 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3087 return true;
3089}
3090
3092 SelectionDAG &DAG) const {
3093 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3095
3096 switch (getTargetMachine().getCodeModel()) {
3097 case CodeModel::Small:
3098 case CodeModel::Medium:
3100 default:
3101 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3103 }
3104}
3105
3106const MCExpr *
3108 unsigned JTI,
3109 MCContext &Ctx) const {
3110 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3112
3113 switch (getTargetMachine().getCodeModel()) {
3114 case CodeModel::Small:
3115 case CodeModel::Medium:
3117 default:
3118 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3119 }
3120}
3121
3122SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3123 EVT PtrVT = Op.getValueType();
3125
3126 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3127 if (Subtarget.isUsingPCRelativeCalls()) {
3128 SDLoc DL(JT);
3129 EVT Ty = getPointerTy(DAG.getDataLayout());
3130 SDValue GA =
3131 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3132 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3133 return MatAddr;
3134 }
3135
3136 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3137 // The actual address of the GlobalValue is stored in the TOC.
3138 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3139 setUsesTOCBasePtr(DAG);
3140 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3141 return getTOCEntry(DAG, SDLoc(JT), GA);
3142 }
3143
3144 unsigned MOHiFlag, MOLoFlag;
3145 bool IsPIC = isPositionIndependent();
3146 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3147
3148 if (IsPIC && Subtarget.isSVR4ABI()) {
3149 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3151 return getTOCEntry(DAG, SDLoc(GA), GA);
3152 }
3153
3154 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3155 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3156 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3157}
3158
3159SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3160 SelectionDAG &DAG) const {
3161 EVT PtrVT = Op.getValueType();
3162 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3163 const BlockAddress *BA = BASDN->getBlockAddress();
3164
3165 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3166 if (Subtarget.isUsingPCRelativeCalls()) {
3167 SDLoc DL(BASDN);
3168 EVT Ty = getPointerTy(DAG.getDataLayout());
3169 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3171 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3172 return MatAddr;
3173 }
3174
3175 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3176 // The actual BlockAddress is stored in the TOC.
3177 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3178 setUsesTOCBasePtr(DAG);
3179 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3180 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3181 }
3182
3183 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3184 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3185 return getTOCEntry(
3186 DAG, SDLoc(BASDN),
3187 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3188
3189 unsigned MOHiFlag, MOLoFlag;
3190 bool IsPIC = isPositionIndependent();
3191 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3192 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3193 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3194 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3195}
3196
3197SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3198 SelectionDAG &DAG) const {
3199 if (Subtarget.isAIXABI())
3200 return LowerGlobalTLSAddressAIX(Op, DAG);
3201
3202 return LowerGlobalTLSAddressLinux(Op, DAG);
3203}
3204
3205/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3206/// and then apply the update.
3208 SelectionDAG &DAG,
3209 const TargetMachine &TM) {
3210 // Initialize TLS model opt setting lazily:
3211 // (1) Use initial-exec for single TLS var references within current function.
3212 // (2) Use local-dynamic for multiple TLS var references within current
3213 // function.
3214 PPCFunctionInfo *FuncInfo =
3216 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3218 // Iterate over all instructions within current function, collect all TLS
3219 // global variables (global variables taken as the first parameter to
3220 // Intrinsic::threadlocal_address).
3221 const Function &Func = DAG.getMachineFunction().getFunction();
3222 for (const BasicBlock &BB : Func)
3223 for (const Instruction &I : BB)
3224 if (I.getOpcode() == Instruction::Call)
3225 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3226 if (Function *CF = CI->getCalledFunction())
3227 if (CF->isDeclaration() &&
3228 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3229 if (const GlobalValue *GV =
3230 dyn_cast<GlobalValue>(I.getOperand(0))) {
3231 TLSModel::Model GVModel = TM.getTLSModel(GV);
3232 if (GVModel == TLSModel::LocalDynamic)
3233 TLSGV.insert(GV);
3234 }
3235
3236 unsigned TLSGVCnt = TLSGV.size();
3237 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3238 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3239 FuncInfo->setAIXFuncUseTLSIEForLD();
3241 }
3242
3243 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3244 LLVM_DEBUG(
3245 dbgs() << DAG.getMachineFunction().getName()
3246 << " function is using the TLS-IE model for TLS-LD access.\n");
3247 Model = TLSModel::InitialExec;
3248 }
3249}
3250
3251SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3252 SelectionDAG &DAG) const {
3253 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3254
3255 if (DAG.getTarget().useEmulatedTLS())
3256 report_fatal_error("Emulated TLS is not yet supported on AIX");
3257
3258 SDLoc dl(GA);
3259 const GlobalValue *GV = GA->getGlobal();
3260 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3261 bool Is64Bit = Subtarget.isPPC64();
3263
3264 // Apply update to the TLS model.
3265 if (Subtarget.hasAIXShLibTLSModelOpt())
3267
3268 // TLS variables are accessed through TOC entries.
3269 // To support this, set the DAG to use the TOC base pointer.
3270 setUsesTOCBasePtr(DAG);
3271
3272 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3273
3274 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3275 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3276 bool HasAIXSmallTLSGlobalAttr = false;
3277 SDValue VariableOffsetTGA =
3278 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3279 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3280 SDValue TLSReg;
3281
3282 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3283 if (GVar->hasAttribute("aix-small-tls"))
3284 HasAIXSmallTLSGlobalAttr = true;
3285
3286 if (Is64Bit) {
3287 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3288 // involves a load of the variable offset (from the TOC), followed by an
3289 // add of the loaded variable offset to R13 (the thread pointer).
3290 // This code sequence looks like:
3291 // ld reg1,var[TC](2)
3292 // add reg2, reg1, r13 // r13 contains the thread pointer
3293 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3294
3295 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3296 // global variable attribute, produce a faster access sequence for
3297 // local-exec TLS variables where the offset from the TLS base is encoded
3298 // as an immediate operand.
3299 //
3300 // We only utilize the faster local-exec access sequence when the TLS
3301 // variable has a size within the policy limit. We treat types that are
3302 // not sized or are empty as being over the policy size limit.
3303 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3304 IsTLSLocalExecModel) {
3305 Type *GVType = GV->getValueType();
3306 if (GVType->isSized() && !GVType->isEmptyTy() &&
3307 GV->getDataLayout().getTypeAllocSize(GVType) <=
3309 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3310 }
3311 } else {
3312 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3313 // involves loading the variable offset from the TOC, generating a call to
3314 // .__get_tpointer to get the thread pointer (which will be in R3), and
3315 // adding the two together:
3316 // lwz reg1,var[TC](2)
3317 // bla .__get_tpointer
3318 // add reg2, reg1, r3
3319 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3320
3321 // We do not implement the 32-bit version of the faster access sequence
3322 // for local-exec that is controlled by the -maix-small-local-exec-tls
3323 // option, or the "aix-small-tls" global variable attribute.
3324 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3325 report_fatal_error("The small-local-exec TLS access sequence is "
3326 "currently only supported on AIX (64-bit mode).");
3327 }
3328 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3329 }
3330
3331 if (Model == TLSModel::LocalDynamic) {
3332 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3333
3334 // We do not implement the 32-bit version of the faster access sequence
3335 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3336 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3337 report_fatal_error("The small-local-dynamic TLS access sequence is "
3338 "currently only supported on AIX (64-bit mode).");
3339
3340 // For local-dynamic on AIX, we need to generate one TOC entry for each
3341 // variable offset, and a single module-handle TOC entry for the entire
3342 // file.
3343
3344 SDValue VariableOffsetTGA =
3345 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3346 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3347
3349 GlobalVariable *TLSGV =
3350 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3351 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3353 assert(TLSGV && "Not able to create GV for _$TLSML.");
3354 SDValue ModuleHandleTGA =
3355 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3356 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3357 SDValue ModuleHandle =
3358 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3359
3360 // With the -maix-small-local-dynamic-tls option, produce a faster access
3361 // sequence for local-dynamic TLS variables where the offset from the
3362 // module-handle is encoded as an immediate operand.
3363 //
3364 // We only utilize the faster local-dynamic access sequence when the TLS
3365 // variable has a size within the policy limit. We treat types that are
3366 // not sized or are empty as being over the policy size limit.
3367 if (HasAIXSmallLocalDynamicTLS) {
3368 Type *GVType = GV->getValueType();
3369 if (GVType->isSized() && !GVType->isEmptyTy() &&
3370 GV->getDataLayout().getTypeAllocSize(GVType) <=
3372 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3373 ModuleHandle);
3374 }
3375
3376 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3377 }
3378
3379 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3380 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3381 // need to generate two TOC entries, one for the variable offset, one for the
3382 // region handle. The global address for the TOC entry of the region handle is
3383 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3384 // entry of the variable offset is created with MO_TLSGD_FLAG.
3385 SDValue VariableOffsetTGA =
3386 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3387 SDValue RegionHandleTGA =
3388 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3389 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3390 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3391 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3392 RegionHandle);
3393}
3394
3395SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3396 SelectionDAG &DAG) const {
3397 // FIXME: TLS addresses currently use medium model code sequences,
3398 // which is the most useful form. Eventually support for small and
3399 // large models could be added if users need it, at the cost of
3400 // additional complexity.
3401 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3402 if (DAG.getTarget().useEmulatedTLS())
3403 return LowerToTLSEmulatedModel(GA, DAG);
3404
3405 SDLoc dl(GA);
3406 const GlobalValue *GV = GA->getGlobal();
3407 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3408 bool is64bit = Subtarget.isPPC64();
3409 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3410 PICLevel::Level picLevel = M->getPICLevel();
3411
3412 const TargetMachine &TM = getTargetMachine();
3413 TLSModel::Model Model = TM.getTLSModel(GV);
3414
3415 if (Model == TLSModel::LocalExec) {
3416 if (Subtarget.isUsingPCRelativeCalls()) {
3417 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3418 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3420 SDValue MatAddr =
3421 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3422 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3423 }
3424
3425 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3427 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3429 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3430 : DAG.getRegister(PPC::R2, MVT::i32);
3431
3432 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3433 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3434 }
3435
3436 if (Model == TLSModel::InitialExec) {
3437 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3439 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3440 SDValue TGATLS = DAG.getTargetGlobalAddress(
3441 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3442 SDValue TPOffset;
3443 if (IsPCRel) {
3444 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3445 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3446 MachinePointerInfo());
3447 } else {
3448 SDValue GOTPtr;
3449 if (is64bit) {
3450 setUsesTOCBasePtr(DAG);
3451 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3452 GOTPtr =
3453 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3454 } else {
3455 if (!TM.isPositionIndependent())
3456 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3457 else if (picLevel == PICLevel::SmallPIC)
3458 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3459 else
3460 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3461 }
3462 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3463 }
3464 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3465 }
3466
3467 if (Model == TLSModel::GeneralDynamic) {
3468 if (Subtarget.isUsingPCRelativeCalls()) {
3469 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3471 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3472 }
3473
3474 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3475 SDValue GOTPtr;
3476 if (is64bit) {
3477 setUsesTOCBasePtr(DAG);
3478 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3479 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3480 GOTReg, TGA);
3481 } else {
3482 if (picLevel == PICLevel::SmallPIC)
3483 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3484 else
3485 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3486 }
3487 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3488 GOTPtr, TGA, TGA);
3489 }
3490
3491 if (Model == TLSModel::LocalDynamic) {
3492 if (Subtarget.isUsingPCRelativeCalls()) {
3493 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3495 SDValue MatPCRel =
3496 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3497 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3498 }
3499
3500 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3501 SDValue GOTPtr;
3502 if (is64bit) {
3503 setUsesTOCBasePtr(DAG);
3504 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3505 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3506 GOTReg, TGA);
3507 } else {
3508 if (picLevel == PICLevel::SmallPIC)
3509 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3510 else
3511 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3512 }
3513 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3514 PtrVT, GOTPtr, TGA, TGA);
3515 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3516 PtrVT, TLSAddr, TGA);
3517 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3518 }
3519
3520 llvm_unreachable("Unknown TLS model!");
3521}
3522
3523SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3524 SelectionDAG &DAG) const {
3525 EVT PtrVT = Op.getValueType();
3526 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3527 SDLoc DL(GSDN);
3528 const GlobalValue *GV = GSDN->getGlobal();
3529
3530 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3531 // The actual address of the GlobalValue is stored in the TOC.
3532 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3533 if (Subtarget.isUsingPCRelativeCalls()) {
3534 EVT Ty = getPointerTy(DAG.getDataLayout());
3536 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3538 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3539 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3540 MachinePointerInfo());
3541 return Load;
3542 } else {
3543 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3545 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3546 }
3547 }
3548 setUsesTOCBasePtr(DAG);
3549 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3550 return getTOCEntry(DAG, DL, GA);
3551 }
3552
3553 unsigned MOHiFlag, MOLoFlag;
3554 bool IsPIC = isPositionIndependent();
3555 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3556
3557 if (IsPIC && Subtarget.isSVR4ABI()) {
3558 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3559 GSDN->getOffset(),
3561 return getTOCEntry(DAG, DL, GA);
3562 }
3563
3564 SDValue GAHi =
3565 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3566 SDValue GALo =
3567 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3568
3569 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3570}
3571
3572SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3573 bool IsStrict = Op->isStrictFPOpcode();
3574 ISD::CondCode CC =
3575 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3576 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3577 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3578 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3579 EVT LHSVT = LHS.getValueType();
3580 SDLoc dl(Op);
3581
3582 // Soften the setcc with libcall if it is fp128.
3583 if (LHSVT == MVT::f128) {
3584 assert(!Subtarget.hasP9Vector() &&
3585 "SETCC for f128 is already legal under Power9!");
3586 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3587 Op->getOpcode() == ISD::STRICT_FSETCCS);
3588 if (RHS.getNode())
3589 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3590 DAG.getCondCode(CC));
3591 if (IsStrict)
3592 return DAG.getMergeValues({LHS, Chain}, dl);
3593 return LHS;
3594 }
3595
3596 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3597
3598 if (Op.getValueType() == MVT::v2i64) {
3599 // When the operands themselves are v2i64 values, we need to do something
3600 // special because VSX has no underlying comparison operations for these.
3601 if (LHS.getValueType() == MVT::v2i64) {
3602 // Equality can be handled by casting to the legal type for Altivec
3603 // comparisons, everything else needs to be expanded.
3604 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3605 return SDValue();
3606 SDValue SetCC32 = DAG.getSetCC(
3607 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3608 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3609 int ShuffV[] = {1, 0, 3, 2};
3610 SDValue Shuff =
3611 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3612 return DAG.getBitcast(MVT::v2i64,
3613 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3614 dl, MVT::v4i32, Shuff, SetCC32));
3615 }
3616
3617 // We handle most of these in the usual way.
3618 return Op;
3619 }
3620
3621 // If we're comparing for equality to zero, expose the fact that this is
3622 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3623 // fold the new nodes.
3624 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3625 return V;
3626
3627 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3628 // Leave comparisons against 0 and -1 alone for now, since they're usually
3629 // optimized. FIXME: revisit this when we can custom lower all setcc
3630 // optimizations.
3631 if (C->isAllOnes() || C->isZero())
3632 return SDValue();
3633 }
3634
3635 // If we have an integer seteq/setne, turn it into a compare against zero
3636 // by xor'ing the rhs with the lhs, which is faster than setting a
3637 // condition register, reading it back out, and masking the correct bit. The
3638 // normal approach here uses sub to do this instead of xor. Using xor exposes
3639 // the result to other bit-twiddling opportunities.
3640 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3641 EVT VT = Op.getValueType();
3642 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3643 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3644 }
3645 return SDValue();
3646}
3647
3648SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3649 SDNode *Node = Op.getNode();
3650 EVT VT = Node->getValueType(0);
3651 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3652 SDValue InChain = Node->getOperand(0);
3653 SDValue VAListPtr = Node->getOperand(1);
3654 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3655 SDLoc dl(Node);
3656
3657 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3658
3659 // gpr_index
3660 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3661 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3662 InChain = GprIndex.getValue(1);
3663
3664 if (VT == MVT::i64) {
3665 // Check if GprIndex is even
3666 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3667 DAG.getConstant(1, dl, MVT::i32));
3668 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3669 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3670 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3671 DAG.getConstant(1, dl, MVT::i32));
3672 // Align GprIndex to be even if it isn't
3673 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3674 GprIndex);
3675 }
3676
3677 // fpr index is 1 byte after gpr
3678 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3679 DAG.getConstant(1, dl, MVT::i32));
3680
3681 // fpr
3682 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3683 FprPtr, MachinePointerInfo(SV), MVT::i8);
3684 InChain = FprIndex.getValue(1);
3685
3686 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3687 DAG.getConstant(8, dl, MVT::i32));
3688
3689 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3690 DAG.getConstant(4, dl, MVT::i32));
3691
3692 // areas
3693 SDValue OverflowArea =
3694 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3695 InChain = OverflowArea.getValue(1);
3696
3697 SDValue RegSaveArea =
3698 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3699 InChain = RegSaveArea.getValue(1);
3700
3701 // select overflow_area if index > 8
3702 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3703 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3704
3705 // adjustment constant gpr_index * 4/8
3706 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3707 VT.isInteger() ? GprIndex : FprIndex,
3708 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3709 MVT::i32));
3710
3711 // OurReg = RegSaveArea + RegConstant
3712 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3713 RegConstant);
3714
3715 // Floating types are 32 bytes into RegSaveArea
3716 if (VT.isFloatingPoint())
3717 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3718 DAG.getConstant(32, dl, MVT::i32));
3719
3720 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3721 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3722 VT.isInteger() ? GprIndex : FprIndex,
3723 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3724 MVT::i32));
3725
3726 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3727 VT.isInteger() ? VAListPtr : FprPtr,
3728 MachinePointerInfo(SV), MVT::i8);
3729
3730 // determine if we should load from reg_save_area or overflow_area
3731 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3732
3733 // increase overflow_area by 4/8 if gpr/fpr > 8
3734 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3735 DAG.getConstant(VT.isInteger() ? 4 : 8,
3736 dl, MVT::i32));
3737
3738 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3739 OverflowAreaPlusN);
3740
3741 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3742 MachinePointerInfo(), MVT::i32);
3743
3744 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3745}
3746
3747SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3748 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3749
3750 // We have to copy the entire va_list struct:
3751 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3752 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3753 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3754 false, true, /*CI=*/nullptr, std::nullopt,
3755 MachinePointerInfo(), MachinePointerInfo());
3756}
3757
3758SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3759 SelectionDAG &DAG) const {
3760 return Op.getOperand(0);
3761}
3762
3763SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3764 MachineFunction &MF = DAG.getMachineFunction();
3765 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3766
3767 assert((Op.getOpcode() == ISD::INLINEASM ||
3768 Op.getOpcode() == ISD::INLINEASM_BR) &&
3769 "Expecting Inline ASM node.");
3770
3771 // If an LR store is already known to be required then there is not point in
3772 // checking this ASM as well.
3773 if (MFI.isLRStoreRequired())
3774 return Op;
3775
3776 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3777 // type MVT::Glue. We want to ignore this last operand if that is the case.
3778 unsigned NumOps = Op.getNumOperands();
3779 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3780 --NumOps;
3781
3782 // Check all operands that may contain the LR.
3783 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3784 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3785 unsigned NumVals = Flags.getNumOperandRegisters();
3786 ++i; // Skip the ID value.
3787
3788 switch (Flags.getKind()) {
3789 default:
3790 llvm_unreachable("Bad flags!");
3794 i += NumVals;
3795 break;
3799 for (; NumVals; --NumVals, ++i) {
3800 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3801 if (Reg != PPC::LR && Reg != PPC::LR8)
3802 continue;
3803 MFI.setLRStoreRequired();
3804 return Op;
3805 }
3806 break;
3807 }
3808 }
3809 }
3810
3811 return Op;
3812}
3813
3814SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3815 SelectionDAG &DAG) const {
3816 SDValue Chain = Op.getOperand(0);
3817 SDValue Trmp = Op.getOperand(1); // trampoline
3818 SDValue FPtr = Op.getOperand(2); // nested function
3819 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3820 SDLoc dl(Op);
3821
3822 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3823
3824 if (Subtarget.isAIXABI()) {
3825 // On AIX we create a trampoline descriptor by combining the
3826 // entry point and TOC from the global descriptor (FPtr) with the
3827 // nest argument as the environment pointer.
3828 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3829 MaybeAlign PointerAlign(PointerSize);
3830 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3833 : MachineMemOperand::MONone;
3834
3835 uint64_t TOCPointerOffset = 1 * PointerSize;
3836 uint64_t EnvPointerOffset = 2 * PointerSize;
3837 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3838 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3839
3840 const Value *TrampolineAddr =
3841 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3842 const Function *Func =
3843 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3844
3845 SDValue OutChains[3];
3846
3847 // Copy the entry point address from the global descriptor to the
3848 // trampoline buffer.
3849 SDValue LoadEntryPoint =
3850 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3851 PointerAlign, MMOFlags);
3852 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3853 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3854 MachinePointerInfo(TrampolineAddr, 0));
3855
3856 // Copy the TOC pointer from the global descriptor to the trampoline
3857 // buffer.
3858 SDValue TOCFromDescriptorPtr =
3859 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3860 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3861 MachinePointerInfo(Func, TOCPointerOffset),
3862 PointerAlign, MMOFlags);
3863 SDValue TrampolineTOCPointer =
3864 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3865 SDValue TOCLoadChain = TOCReg.getValue(1);
3866 OutChains[1] =
3867 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3868 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3869
3870 // Store the nest argument into the environment pointer in the trampoline
3871 // buffer.
3872 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3873 OutChains[2] =
3874 DAG.getStore(Chain, dl, Nest, EnvPointer,
3875 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3876
3878 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3879 return TokenFactor;
3880 }
3881
3882 bool isPPC64 = (PtrVT == MVT::i64);
3883 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3884
3886 Args.emplace_back(Trmp, IntPtrTy);
3887 // TrampSize == (isPPC64 ? 48 : 40);
3888 Args.emplace_back(
3889 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3890 IntPtrTy);
3891 Args.emplace_back(FPtr, IntPtrTy);
3892 Args.emplace_back(Nest, IntPtrTy);
3893
3894 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3895 TargetLowering::CallLoweringInfo CLI(DAG);
3896 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3898 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3899
3900 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3901 return CallResult.second;
3902}
3903
3904SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3905 MachineFunction &MF = DAG.getMachineFunction();
3906 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3907 EVT PtrVT = getPointerTy(MF.getDataLayout());
3908
3909 SDLoc dl(Op);
3910
3911 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3912 // vastart just stores the address of the VarArgsFrameIndex slot into the
3913 // memory location argument.
3914 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3915 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3916 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3917 MachinePointerInfo(SV));
3918 }
3919
3920 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3921 // We suppose the given va_list is already allocated.
3922 //
3923 // typedef struct {
3924 // char gpr; /* index into the array of 8 GPRs
3925 // * stored in the register save area
3926 // * gpr=0 corresponds to r3,
3927 // * gpr=1 to r4, etc.
3928 // */
3929 // char fpr; /* index into the array of 8 FPRs
3930 // * stored in the register save area
3931 // * fpr=0 corresponds to f1,
3932 // * fpr=1 to f2, etc.
3933 // */
3934 // char *overflow_arg_area;
3935 // /* location on stack that holds
3936 // * the next overflow argument
3937 // */
3938 // char *reg_save_area;
3939 // /* where r3:r10 and f1:f8 (if saved)
3940 // * are stored
3941 // */
3942 // } va_list[1];
3943
3944 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3945 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3946 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3947 PtrVT);
3948 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3949 PtrVT);
3950
3951 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3952 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3953
3954 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3955 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3956
3957 uint64_t FPROffset = 1;
3958 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3959
3960 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3961
3962 // Store first byte : number of int regs
3963 SDValue firstStore =
3964 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3965 MachinePointerInfo(SV), MVT::i8);
3966 uint64_t nextOffset = FPROffset;
3967 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3968 ConstFPROffset);
3969
3970 // Store second byte : number of float regs
3971 SDValue secondStore =
3972 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3973 MachinePointerInfo(SV, nextOffset), MVT::i8);
3974 nextOffset += StackOffset;
3975 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3976
3977 // Store second word : arguments given on stack
3978 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3979 MachinePointerInfo(SV, nextOffset));
3980 nextOffset += FrameOffset;
3981 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3982
3983 // Store third word : arguments given in registers
3984 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3985 MachinePointerInfo(SV, nextOffset));
3986}
3987
3988/// FPR - The set of FP registers that should be allocated for arguments
3989/// on Darwin and AIX.
3990static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3991 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3992 PPC::F11, PPC::F12, PPC::F13};
3993
3994/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3995/// the stack.
3996static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3997 unsigned PtrByteSize) {
3998 unsigned ArgSize = ArgVT.getStoreSize();
3999 if (Flags.isByVal())
4000 ArgSize = Flags.getByValSize();
4001
4002 // Round up to multiples of the pointer size, except for array members,
4003 // which are always packed.
4004 if (!Flags.isInConsecutiveRegs())
4005 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4006
4007 return ArgSize;
4008}
4009
4010/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4011/// on the stack.
4013 ISD::ArgFlagsTy Flags,
4014 unsigned PtrByteSize) {
4015 Align Alignment(PtrByteSize);
4016
4017 // Altivec parameters are padded to a 16 byte boundary.
4018 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4019 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4020 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4021 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4022 Alignment = Align(16);
4023
4024 // ByVal parameters are aligned as requested.
4025 if (Flags.isByVal()) {
4026 auto BVAlign = Flags.getNonZeroByValAlign();
4027 if (BVAlign > PtrByteSize) {
4028 if (BVAlign.value() % PtrByteSize != 0)
4030 "ByVal alignment is not a multiple of the pointer size");
4031
4032 Alignment = BVAlign;
4033 }
4034 }
4035
4036 // Array members are always packed to their original alignment.
4037 if (Flags.isInConsecutiveRegs()) {
4038 // If the array member was split into multiple registers, the first
4039 // needs to be aligned to the size of the full type. (Except for
4040 // ppcf128, which is only aligned as its f64 components.)
4041 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4042 Alignment = Align(OrigVT.getStoreSize());
4043 else
4044 Alignment = Align(ArgVT.getStoreSize());
4045 }
4046
4047 return Alignment;
4048}
4049
4050/// CalculateStackSlotUsed - Return whether this argument will use its
4051/// stack slot (instead of being passed in registers). ArgOffset,
4052/// AvailableFPRs, and AvailableVRs must hold the current argument
4053/// position, and will be updated to account for this argument.
4054static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4055 unsigned PtrByteSize, unsigned LinkageSize,
4056 unsigned ParamAreaSize, unsigned &ArgOffset,
4057 unsigned &AvailableFPRs,
4058 unsigned &AvailableVRs) {
4059 bool UseMemory = false;
4060
4061 // Respect alignment of argument on the stack.
4062 Align Alignment =
4063 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4064 ArgOffset = alignTo(ArgOffset, Alignment);
4065 // If there's no space left in the argument save area, we must
4066 // use memory (this check also catches zero-sized arguments).
4067 if (ArgOffset >= LinkageSize + ParamAreaSize)
4068 UseMemory = true;
4069
4070 // Allocate argument on the stack.
4071 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4072 if (Flags.isInConsecutiveRegsLast())
4073 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4074 // If we overran the argument save area, we must use memory
4075 // (this check catches arguments passed partially in memory)
4076 if (ArgOffset > LinkageSize + ParamAreaSize)
4077 UseMemory = true;
4078
4079 // However, if the argument is actually passed in an FPR or a VR,
4080 // we don't use memory after all.
4081 if (!Flags.isByVal()) {
4082 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4083 if (AvailableFPRs > 0) {
4084 --AvailableFPRs;
4085 return false;
4086 }
4087 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4088 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4089 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4090 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4091 if (AvailableVRs > 0) {
4092 --AvailableVRs;
4093 return false;
4094 }
4095 }
4096
4097 return UseMemory;
4098}
4099
4100/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4101/// ensure minimum alignment required for target.
4103 unsigned NumBytes) {
4104 return alignTo(NumBytes, Lowering->getStackAlign());
4105}
4106
4107SDValue PPCTargetLowering::LowerFormalArguments(
4108 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4109 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4110 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4111 if (Subtarget.isAIXABI())
4112 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4113 InVals);
4114 if (Subtarget.is64BitELFABI())
4115 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4116 InVals);
4117 assert(Subtarget.is32BitELFABI());
4118 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4119 InVals);
4120}
4121
4122SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4123 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4124 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4125 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4126
4127 // 32-bit SVR4 ABI Stack Frame Layout:
4128 // +-----------------------------------+
4129 // +--> | Back chain |
4130 // | +-----------------------------------+
4131 // | | Floating-point register save area |
4132 // | +-----------------------------------+
4133 // | | General register save area |
4134 // | +-----------------------------------+
4135 // | | CR save word |
4136 // | +-----------------------------------+
4137 // | | VRSAVE save word |
4138 // | +-----------------------------------+
4139 // | | Alignment padding |
4140 // | +-----------------------------------+
4141 // | | Vector register save area |
4142 // | +-----------------------------------+
4143 // | | Local variable space |
4144 // | +-----------------------------------+
4145 // | | Parameter list area |
4146 // | +-----------------------------------+
4147 // | | LR save word |
4148 // | +-----------------------------------+
4149 // SP--> +--- | Back chain |
4150 // +-----------------------------------+
4151 //
4152 // Specifications:
4153 // System V Application Binary Interface PowerPC Processor Supplement
4154 // AltiVec Technology Programming Interface Manual
4155
4156 MachineFunction &MF = DAG.getMachineFunction();
4157 MachineFrameInfo &MFI = MF.getFrameInfo();
4158 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4159
4160 EVT PtrVT = getPointerTy(MF.getDataLayout());
4161 // Potential tail calls could cause overwriting of argument stack slots.
4162 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4163 (CallConv == CallingConv::Fast));
4164 const Align PtrAlign(4);
4165
4166 // Assign locations to all of the incoming arguments.
4168 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4169 *DAG.getContext());
4170
4171 // Reserve space for the linkage area on the stack.
4172 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4173 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4174 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4175
4176 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4177 CCValAssign &VA = ArgLocs[i];
4178
4179 // Arguments stored in registers.
4180 if (VA.isRegLoc()) {
4181 const TargetRegisterClass *RC;
4182 EVT ValVT = VA.getValVT();
4183
4184 switch (ValVT.getSimpleVT().SimpleTy) {
4185 default:
4186 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4187 case MVT::i1:
4188 case MVT::i32:
4189 RC = &PPC::GPRCRegClass;
4190 break;
4191 case MVT::f32:
4192 if (Subtarget.hasP8Vector())
4193 RC = &PPC::VSSRCRegClass;
4194 else if (Subtarget.hasSPE())
4195 RC = &PPC::GPRCRegClass;
4196 else
4197 RC = &PPC::F4RCRegClass;
4198 break;
4199 case MVT::f64:
4200 if (Subtarget.hasVSX())
4201 RC = &PPC::VSFRCRegClass;
4202 else if (Subtarget.hasSPE())
4203 // SPE passes doubles in GPR pairs.
4204 RC = &PPC::GPRCRegClass;
4205 else
4206 RC = &PPC::F8RCRegClass;
4207 break;
4208 case MVT::v16i8:
4209 case MVT::v8i16:
4210 case MVT::v4i32:
4211 RC = &PPC::VRRCRegClass;
4212 break;
4213 case MVT::v4f32:
4214 RC = &PPC::VRRCRegClass;
4215 break;
4216 case MVT::v2f64:
4217 case MVT::v2i64:
4218 RC = &PPC::VRRCRegClass;
4219 break;
4220 }
4221
4222 SDValue ArgValue;
4223 // Transform the arguments stored in physical registers into
4224 // virtual ones.
4225 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4226 assert(i + 1 < e && "No second half of double precision argument");
4227 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4228 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4229 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4230 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4231 if (!Subtarget.isLittleEndian())
4232 std::swap (ArgValueLo, ArgValueHi);
4233 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4234 ArgValueHi);
4235 } else {
4236 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4237 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4238 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4239 if (ValVT == MVT::i1)
4240 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4241 }
4242
4243 InVals.push_back(ArgValue);
4244 } else {
4245 // Argument stored in memory.
4246 assert(VA.isMemLoc());
4247
4248 // Get the extended size of the argument type in stack
4249 unsigned ArgSize = VA.getLocVT().getStoreSize();
4250 // Get the actual size of the argument type
4251 unsigned ObjSize = VA.getValVT().getStoreSize();
4252 unsigned ArgOffset = VA.getLocMemOffset();
4253 // Stack objects in PPC32 are right justified.
4254 ArgOffset += ArgSize - ObjSize;
4255 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4256
4257 // Create load nodes to retrieve arguments from the stack.
4258 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4259 InVals.push_back(
4260 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4261 }
4262 }
4263
4264 // Assign locations to all of the incoming aggregate by value arguments.
4265 // Aggregates passed by value are stored in the local variable space of the
4266 // caller's stack frame, right above the parameter list area.
4267 SmallVector<CCValAssign, 16> ByValArgLocs;
4268 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4269 ByValArgLocs, *DAG.getContext());
4270
4271 // Reserve stack space for the allocations in CCInfo.
4272 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4273
4274 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4275
4276 // Area that is at least reserved in the caller of this function.
4277 unsigned MinReservedArea = CCByValInfo.getStackSize();
4278 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4279
4280 // Set the size that is at least reserved in caller of this function. Tail
4281 // call optimized function's reserved stack space needs to be aligned so that
4282 // taking the difference between two stack areas will result in an aligned
4283 // stack.
4284 MinReservedArea =
4285 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4286 FuncInfo->setMinReservedArea(MinReservedArea);
4287
4289
4290 // If the function takes variable number of arguments, make a frame index for
4291 // the start of the first vararg value... for expansion of llvm.va_start.
4292 if (isVarArg) {
4293 static const MCPhysReg GPArgRegs[] = {
4294 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4295 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4296 };
4297 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4298
4299 static const MCPhysReg FPArgRegs[] = {
4300 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4301 PPC::F8
4302 };
4303 unsigned NumFPArgRegs = std::size(FPArgRegs);
4304
4305 if (useSoftFloat() || hasSPE())
4306 NumFPArgRegs = 0;
4307
4308 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4309 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4310
4311 // Make room for NumGPArgRegs and NumFPArgRegs.
4312 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4313 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4314
4316 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4317
4318 FuncInfo->setVarArgsFrameIndex(
4319 MFI.CreateStackObject(Depth, Align(8), false));
4320 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4321
4322 // The fixed integer arguments of a variadic function are stored to the
4323 // VarArgsFrameIndex on the stack so that they may be loaded by
4324 // dereferencing the result of va_next.
4325 for (MCPhysReg GPArgReg : GPArgRegs) {
4326 // Get an existing live-in vreg, or add a new one.
4327 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4328 if (!VReg)
4329 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4330
4331 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4332 SDValue Store =
4333 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4334 MemOps.push_back(Store);
4335 // Increment the address by four for the next argument to store
4336 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4337 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4338 }
4339
4340 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4341 // is set.
4342 // The double arguments are stored to the VarArgsFrameIndex
4343 // on the stack.
4344 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4345 // Get an existing live-in vreg, or add a new one.
4346 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4347 if (!VReg)
4348 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4349
4350 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4351 SDValue Store =
4352 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4353 MemOps.push_back(Store);
4354 // Increment the address by eight for the next argument to store
4355 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4356 PtrVT);
4357 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4358 }
4359 }
4360
4361 if (!MemOps.empty())
4362 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4363
4364 return Chain;
4365}
4366
4367// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4368// value to MVT::i64 and then truncate to the correct register size.
4369SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4370 EVT ObjectVT, SelectionDAG &DAG,
4371 SDValue ArgVal,
4372 const SDLoc &dl) const {
4373 if (Flags.isSExt())
4374 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4375 DAG.getValueType(ObjectVT));
4376 else if (Flags.isZExt())
4377 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4378 DAG.getValueType(ObjectVT));
4379
4380 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4381}
4382
4383SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4384 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4385 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4386 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4387 // TODO: add description of PPC stack frame format, or at least some docs.
4388 //
4389 bool isELFv2ABI = Subtarget.isELFv2ABI();
4390 bool isLittleEndian = Subtarget.isLittleEndian();
4391 MachineFunction &MF = DAG.getMachineFunction();
4392 MachineFrameInfo &MFI = MF.getFrameInfo();
4393 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4394
4395 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4396 "fastcc not supported on varargs functions");
4397
4398 EVT PtrVT = getPointerTy(MF.getDataLayout());
4399 // Potential tail calls could cause overwriting of argument stack slots.
4400 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4401 (CallConv == CallingConv::Fast));
4402 unsigned PtrByteSize = 8;
4403 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4404
4405 static const MCPhysReg GPR[] = {
4406 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4407 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4408 };
4409 static const MCPhysReg VR[] = {
4410 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4411 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4412 };
4413
4414 const unsigned Num_GPR_Regs = std::size(GPR);
4415 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4416 const unsigned Num_VR_Regs = std::size(VR);
4417
4418 // Do a first pass over the arguments to determine whether the ABI
4419 // guarantees that our caller has allocated the parameter save area
4420 // on its stack frame. In the ELFv1 ABI, this is always the case;
4421 // in the ELFv2 ABI, it is true if this is a vararg function or if
4422 // any parameter is located in a stack slot.
4423
4424 bool HasParameterArea = !isELFv2ABI || isVarArg;
4425 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4426 unsigned NumBytes = LinkageSize;
4427 unsigned AvailableFPRs = Num_FPR_Regs;
4428 unsigned AvailableVRs = Num_VR_Regs;
4429 for (const ISD::InputArg &In : Ins) {
4430 if (In.Flags.isNest())
4431 continue;
4432
4433 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4434 LinkageSize, ParamAreaSize, NumBytes,
4435 AvailableFPRs, AvailableVRs))
4436 HasParameterArea = true;
4437 }
4438
4439 // Add DAG nodes to load the arguments or copy them out of registers. On
4440 // entry to a function on PPC, the arguments start after the linkage area,
4441 // although the first ones are often in registers.
4442
4443 unsigned ArgOffset = LinkageSize;
4444 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4447 unsigned CurArgIdx = 0;
4448 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4449 SDValue ArgVal;
4450 bool needsLoad = false;
4451 EVT ObjectVT = Ins[ArgNo].VT;
4452 EVT OrigVT = Ins[ArgNo].ArgVT;
4453 unsigned ObjSize = ObjectVT.getStoreSize();
4454 unsigned ArgSize = ObjSize;
4455 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4456 if (Ins[ArgNo].isOrigArg()) {
4457 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4458 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4459 }
4460 // We re-align the argument offset for each argument, except when using the
4461 // fast calling convention, when we need to make sure we do that only when
4462 // we'll actually use a stack slot.
4463 unsigned CurArgOffset;
4464 Align Alignment;
4465 auto ComputeArgOffset = [&]() {
4466 /* Respect alignment of argument on the stack. */
4467 Alignment =
4468 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4469 ArgOffset = alignTo(ArgOffset, Alignment);
4470 CurArgOffset = ArgOffset;
4471 };
4472
4473 if (CallConv != CallingConv::Fast) {
4474 ComputeArgOffset();
4475
4476 /* Compute GPR index associated with argument offset. */
4477 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4478 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4479 }
4480
4481 // FIXME the codegen can be much improved in some cases.
4482 // We do not have to keep everything in memory.
4483 if (Flags.isByVal()) {
4484 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4485
4486 if (CallConv == CallingConv::Fast)
4487 ComputeArgOffset();
4488
4489 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4490 ObjSize = Flags.getByValSize();
4491 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4492 // Empty aggregate parameters do not take up registers. Examples:
4493 // struct { } a;
4494 // union { } b;
4495 // int c[0];
4496 // etc. However, we have to provide a place-holder in InVals, so
4497 // pretend we have an 8-byte item at the current address for that
4498 // purpose.
4499 if (!ObjSize) {
4500 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4501 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4502 InVals.push_back(FIN);
4503 continue;
4504 }
4505
4506 // Create a stack object covering all stack doublewords occupied
4507 // by the argument. If the argument is (fully or partially) on
4508 // the stack, or if the argument is fully in registers but the
4509 // caller has allocated the parameter save anyway, we can refer
4510 // directly to the caller's stack frame. Otherwise, create a
4511 // local copy in our own frame.
4512 int FI;
4513 if (HasParameterArea ||
4514 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4515 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4516 else
4517 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4518 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4519
4520 // Handle aggregates smaller than 8 bytes.
4521 if (ObjSize < PtrByteSize) {
4522 // The value of the object is its address, which differs from the
4523 // address of the enclosing doubleword on big-endian systems.
4524 SDValue Arg = FIN;
4525 if (!isLittleEndian) {
4526 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4527 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4528 }
4529 InVals.push_back(Arg);
4530
4531 if (GPR_idx != Num_GPR_Regs) {
4532 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4533 FuncInfo->addLiveInAttr(VReg, Flags);
4534 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4535 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4536 SDValue Store =
4537 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4538 MachinePointerInfo(&*FuncArg), ObjType);
4539 MemOps.push_back(Store);
4540 }
4541 // Whether we copied from a register or not, advance the offset
4542 // into the parameter save area by a full doubleword.
4543 ArgOffset += PtrByteSize;
4544 continue;
4545 }
4546
4547 // The value of the object is its address, which is the address of
4548 // its first stack doubleword.
4549 InVals.push_back(FIN);
4550
4551 // Store whatever pieces of the object are in registers to memory.
4552 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4553 if (GPR_idx == Num_GPR_Regs)
4554 break;
4555
4556 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4557 FuncInfo->addLiveInAttr(VReg, Flags);
4558 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4559 SDValue Addr = FIN;
4560 if (j) {
4561 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4562 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4563 }
4564 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4565 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4566 SDValue Store =
4567 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4568 MachinePointerInfo(&*FuncArg, j), ObjType);
4569 MemOps.push_back(Store);
4570 ++GPR_idx;
4571 }
4572 ArgOffset += ArgSize;
4573 continue;
4574 }
4575
4576 switch (ObjectVT.getSimpleVT().SimpleTy) {
4577 default: llvm_unreachable("Unhandled argument type!");
4578 case MVT::i1:
4579 case MVT::i32:
4580 case MVT::i64:
4581 if (Flags.isNest()) {
4582 // The 'nest' parameter, if any, is passed in R11.
4583 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4584 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4585
4586 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4587 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4588
4589 break;
4590 }
4591
4592 // These can be scalar arguments or elements of an integer array type
4593 // passed directly. Clang may use those instead of "byval" aggregate
4594 // types to avoid forcing arguments to memory unnecessarily.
4595 if (GPR_idx != Num_GPR_Regs) {
4596 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4597 FuncInfo->addLiveInAttr(VReg, Flags);
4598 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4599
4600 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4601 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4602 // value to MVT::i64 and then truncate to the correct register size.
4603 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4604 } else {
4605 if (CallConv == CallingConv::Fast)
4606 ComputeArgOffset();
4607
4608 needsLoad = true;
4609 ArgSize = PtrByteSize;
4610 }
4611 if (CallConv != CallingConv::Fast || needsLoad)
4612 ArgOffset += 8;
4613 break;
4614
4615 case MVT::f32:
4616 case MVT::f64:
4617 // These can be scalar arguments or elements of a float array type
4618 // passed directly. The latter are used to implement ELFv2 homogenous
4619 // float aggregates.
4620 if (FPR_idx != Num_FPR_Regs) {
4621 unsigned VReg;
4622
4623 if (ObjectVT == MVT::f32)
4624 VReg = MF.addLiveIn(FPR[FPR_idx],
4625 Subtarget.hasP8Vector()
4626 ? &PPC::VSSRCRegClass
4627 : &PPC::F4RCRegClass);
4628 else
4629 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4630 ? &PPC::VSFRCRegClass
4631 : &PPC::F8RCRegClass);
4632
4633 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4634 ++FPR_idx;
4635 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4636 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4637 // once we support fp <-> gpr moves.
4638
4639 // This can only ever happen in the presence of f32 array types,
4640 // since otherwise we never run out of FPRs before running out
4641 // of GPRs.
4642 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4643 FuncInfo->addLiveInAttr(VReg, Flags);
4644 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4645
4646 if (ObjectVT == MVT::f32) {
4647 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4648 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4649 DAG.getConstant(32, dl, MVT::i32));
4650 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4651 }
4652
4653 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4654 } else {
4655 if (CallConv == CallingConv::Fast)
4656 ComputeArgOffset();
4657
4658 needsLoad = true;
4659 }
4660
4661 // When passing an array of floats, the array occupies consecutive
4662 // space in the argument area; only round up to the next doubleword
4663 // at the end of the array. Otherwise, each float takes 8 bytes.
4664 if (CallConv != CallingConv::Fast || needsLoad) {
4665 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4666 ArgOffset += ArgSize;
4667 if (Flags.isInConsecutiveRegsLast())
4668 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4669 }
4670 break;
4671 case MVT::v4f32:
4672 case MVT::v4i32:
4673 case MVT::v8i16:
4674 case MVT::v16i8:
4675 case MVT::v2f64:
4676 case MVT::v2i64:
4677 case MVT::v1i128:
4678 case MVT::f128:
4679 // These can be scalar arguments or elements of a vector array type
4680 // passed directly. The latter are used to implement ELFv2 homogenous
4681 // vector aggregates.
4682 if (VR_idx != Num_VR_Regs) {
4683 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4684 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4685 ++VR_idx;
4686 } else {
4687 if (CallConv == CallingConv::Fast)
4688 ComputeArgOffset();
4689 needsLoad = true;
4690 }
4691 if (CallConv != CallingConv::Fast || needsLoad)
4692 ArgOffset += 16;
4693 break;
4694 }
4695
4696 // We need to load the argument to a virtual register if we determined
4697 // above that we ran out of physical registers of the appropriate type.
4698 if (needsLoad) {
4699 if (ObjSize < ArgSize && !isLittleEndian)
4700 CurArgOffset += ArgSize - ObjSize;
4701 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4702 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4703 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4704 }
4705
4706 InVals.push_back(ArgVal);
4707 }
4708
4709 // Area that is at least reserved in the caller of this function.
4710 unsigned MinReservedArea;
4711 if (HasParameterArea)
4712 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4713 else
4714 MinReservedArea = LinkageSize;
4715
4716 // Set the size that is at least reserved in caller of this function. Tail
4717 // call optimized functions' reserved stack space needs to be aligned so that
4718 // taking the difference between two stack areas will result in an aligned
4719 // stack.
4720 MinReservedArea =
4721 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4722 FuncInfo->setMinReservedArea(MinReservedArea);
4723
4724 // If the function takes variable number of arguments, make a frame index for
4725 // the start of the first vararg value... for expansion of llvm.va_start.
4726 // On ELFv2ABI spec, it writes:
4727 // C programs that are intended to be *portable* across different compilers
4728 // and architectures must use the header file <stdarg.h> to deal with variable
4729 // argument lists.
4730 if (isVarArg && MFI.hasVAStart()) {
4731 int Depth = ArgOffset;
4732
4733 FuncInfo->setVarArgsFrameIndex(
4734 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4735 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4736
4737 // If this function is vararg, store any remaining integer argument regs
4738 // to their spots on the stack so that they may be loaded by dereferencing
4739 // the result of va_next.
4740 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4741 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4742 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4743 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4744 SDValue Store =
4745 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4746 MemOps.push_back(Store);
4747 // Increment the address by four for the next argument to store
4748 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4749 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4750 }
4751 }
4752
4753 if (!MemOps.empty())
4754 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4755
4756 return Chain;
4757}
4758
4759/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4760/// adjusted to accommodate the arguments for the tailcall.
4761static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4762 unsigned ParamSize) {
4763
4764 if (!isTailCall) return 0;
4765
4767 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4768 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4769 // Remember only if the new adjustment is bigger.
4770 if (SPDiff < FI->getTailCallSPDelta())
4771 FI->setTailCallSPDelta(SPDiff);
4772
4773 return SPDiff;
4774}
4775
4776static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4777
4778static bool callsShareTOCBase(const Function *Caller,
4779 const GlobalValue *CalleeGV,
4780 const TargetMachine &TM) {
4781 // It does not make sense to call callsShareTOCBase() with a caller that
4782 // is PC Relative since PC Relative callers do not have a TOC.
4783#ifndef NDEBUG
4784 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4785 assert(!STICaller->isUsingPCRelativeCalls() &&
4786 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4787#endif
4788
4789 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4790 // don't have enough information to determine if the caller and callee share
4791 // the same TOC base, so we have to pessimistically assume they don't for
4792 // correctness.
4793 if (!CalleeGV)
4794 return false;
4795
4796 // If the callee is preemptable, then the static linker will use a plt-stub
4797 // which saves the toc to the stack, and needs a nop after the call
4798 // instruction to convert to a toc-restore.
4799 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4800 return false;
4801
4802 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4803 // We may need a TOC restore in the situation where the caller requires a
4804 // valid TOC but the callee is PC Relative and does not.
4805 const Function *F = dyn_cast<Function>(CalleeGV);
4806 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4807
4808 // If we have an Alias we can try to get the function from there.
4809 if (Alias) {
4810 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4811 F = dyn_cast<Function>(GlobalObj);
4812 }
4813
4814 // If we still have no valid function pointer we do not have enough
4815 // information to determine if the callee uses PC Relative calls so we must
4816 // assume that it does.
4817 if (!F)
4818 return false;
4819
4820 // If the callee uses PC Relative we cannot guarantee that the callee won't
4821 // clobber the TOC of the caller and so we must assume that the two
4822 // functions do not share a TOC base.
4823 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4824 if (STICallee->isUsingPCRelativeCalls())
4825 return false;
4826
4827 // If the GV is not a strong definition then we need to assume it can be
4828 // replaced by another function at link time. The function that replaces
4829 // it may not share the same TOC as the caller since the callee may be
4830 // replaced by a PC Relative version of the same function.
4831 if (!CalleeGV->isStrongDefinitionForLinker())
4832 return false;
4833
4834 // The medium and large code models are expected to provide a sufficiently
4835 // large TOC to provide all data addressing needs of a module with a
4836 // single TOC.
4837 if (CodeModel::Medium == TM.getCodeModel() ||
4839 return true;
4840
4841 // Any explicitly-specified sections and section prefixes must also match.
4842 // Also, if we're using -ffunction-sections, then each function is always in
4843 // a different section (the same is true for COMDAT functions).
4844 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4845 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4846 return false;
4847 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4848 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4849 return false;
4850 }
4851
4852 return true;
4853}
4854
4855static bool
4857 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4858 assert(Subtarget.is64BitELFABI());
4859
4860 const unsigned PtrByteSize = 8;
4861 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4862
4863 static const MCPhysReg GPR[] = {
4864 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4865 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4866 };
4867 static const MCPhysReg VR[] = {
4868 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4869 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4870 };
4871
4872 const unsigned NumGPRs = std::size(GPR);
4873 const unsigned NumFPRs = 13;
4874 const unsigned NumVRs = std::size(VR);
4875 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4876
4877 unsigned NumBytes = LinkageSize;
4878 unsigned AvailableFPRs = NumFPRs;
4879 unsigned AvailableVRs = NumVRs;
4880
4881 for (const ISD::OutputArg& Param : Outs) {
4882 if (Param.Flags.isNest()) continue;
4883
4884 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4885 LinkageSize, ParamAreaSize, NumBytes,
4886 AvailableFPRs, AvailableVRs))
4887 return true;
4888 }
4889 return false;
4890}
4891
4892static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4893 if (CB.arg_size() != CallerFn->arg_size())
4894 return false;
4895
4896 auto CalleeArgIter = CB.arg_begin();
4897 auto CalleeArgEnd = CB.arg_end();
4898 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4899
4900 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4901 const Value* CalleeArg = *CalleeArgIter;
4902 const Value* CallerArg = &(*CallerArgIter);
4903 if (CalleeArg == CallerArg)
4904 continue;
4905
4906 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4907 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4908 // }
4909 // 1st argument of callee is undef and has the same type as caller.
4910 if (CalleeArg->getType() == CallerArg->getType() &&
4911 isa<UndefValue>(CalleeArg))
4912 continue;
4913
4914 return false;
4915 }
4916
4917 return true;
4918}
4919
4920// Returns true if TCO is possible between the callers and callees
4921// calling conventions.
4922static bool
4924 CallingConv::ID CalleeCC) {
4925 // Tail calls are possible with fastcc and ccc.
4926 auto isTailCallableCC = [] (CallingConv::ID CC){
4927 return CC == CallingConv::C || CC == CallingConv::Fast;
4928 };
4929 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4930 return false;
4931
4932 // We can safely tail call both fastcc and ccc callees from a c calling
4933 // convention caller. If the caller is fastcc, we may have less stack space
4934 // than a non-fastcc caller with the same signature so disable tail-calls in
4935 // that case.
4936 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4937}
4938
4939bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4940 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4941 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4943 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4944 bool isCalleeExternalSymbol) const {
4945 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4946
4947 if (DisableSCO && !TailCallOpt) return false;
4948
4949 // Variadic argument functions are not supported.
4950 if (isVarArg) return false;
4951
4952 // Check that the calling conventions are compatible for tco.
4953 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4954 return false;
4955
4956 // Caller contains any byval parameter is not supported.
4957 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4958 return false;
4959
4960 // Callee contains any byval parameter is not supported, too.
4961 // Note: This is a quick work around, because in some cases, e.g.
4962 // caller's stack size > callee's stack size, we are still able to apply
4963 // sibling call optimization. For example, gcc is able to do SCO for caller1
4964 // in the following example, but not for caller2.
4965 // struct test {
4966 // long int a;
4967 // char ary[56];
4968 // } gTest;
4969 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4970 // b->a = v.a;
4971 // return 0;
4972 // }
4973 // void caller1(struct test a, struct test c, struct test *b) {
4974 // callee(gTest, b); }
4975 // void caller2(struct test *b) { callee(gTest, b); }
4976 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4977 return false;
4978
4979 // If callee and caller use different calling conventions, we cannot pass
4980 // parameters on stack since offsets for the parameter area may be different.
4981 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4982 return false;
4983
4984 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4985 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4986 // callee potentially have different TOC bases then we cannot tail call since
4987 // we need to restore the TOC pointer after the call.
4988 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4989 // We cannot guarantee this for indirect calls or calls to external functions.
4990 // When PC-Relative addressing is used, the concept of the TOC is no longer
4991 // applicable so this check is not required.
4992 // Check first for indirect calls.
4993 if (!Subtarget.isUsingPCRelativeCalls() &&
4994 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
4995 return false;
4996
4997 // Check if we share the TOC base.
4998 if (!Subtarget.isUsingPCRelativeCalls() &&
4999 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5000 return false;
5001
5002 // TCO allows altering callee ABI, so we don't have to check further.
5003 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5004 return true;
5005
5006 if (DisableSCO) return false;
5007
5008 // If callee use the same argument list that caller is using, then we can
5009 // apply SCO on this case. If it is not, then we need to check if callee needs
5010 // stack for passing arguments.
5011 // PC Relative tail calls may not have a CallBase.
5012 // If there is no CallBase we cannot verify if we have the same argument
5013 // list so assume that we don't have the same argument list.
5014 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5015 needStackSlotPassParameters(Subtarget, Outs))
5016 return false;
5017 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5018 return false;
5019
5020 return true;
5021}
5022
5023/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5024/// for tail call optimization. Targets which want to do tail call
5025/// optimization should implement this function.
5026bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5027 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5028 CallingConv::ID CallerCC, bool isVarArg,
5029 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5030 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5031 return false;
5032
5033 // Variable argument functions are not supported.
5034 if (isVarArg)
5035 return false;
5036
5037 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5038 // Functions containing by val parameters are not supported.
5039 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5040 return false;
5041
5042 // Non-PIC/GOT tail calls are supported.
5043 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5044 return true;
5045
5046 // At the moment we can only do local tail calls (in same module, hidden
5047 // or protected) if we are generating PIC.
5048 if (CalleeGV)
5049 return CalleeGV->hasHiddenVisibility() ||
5050 CalleeGV->hasProtectedVisibility();
5051 }
5052
5053 return false;
5054}
5055
5056/// isCallCompatibleAddress - Return the immediate to use if the specified
5057/// 32-bit value is representable in the immediate field of a BxA instruction.
5060 if (!C) return nullptr;
5061
5062 int Addr = C->getZExtValue();
5063 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5064 SignExtend32<26>(Addr) != Addr)
5065 return nullptr; // Top 6 bits have to be sext of immediate.
5066
5067 return DAG
5069 (int)C->getZExtValue() >> 2, SDLoc(Op),
5071 .getNode();
5072}
5073
5074namespace {
5075
5076struct TailCallArgumentInfo {
5077 SDValue Arg;
5078 SDValue FrameIdxOp;
5079 int FrameIdx = 0;
5080
5081 TailCallArgumentInfo() = default;
5082};
5083
5084} // end anonymous namespace
5085
5086/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5088 SelectionDAG &DAG, SDValue Chain,
5089 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5090 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5091 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5092 SDValue Arg = TailCallArgs[i].Arg;
5093 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5094 int FI = TailCallArgs[i].FrameIdx;
5095 // Store relative to framepointer.
5096 MemOpChains.push_back(DAG.getStore(
5097 Chain, dl, Arg, FIN,
5099 }
5100}
5101
5102/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5103/// the appropriate stack slot for the tail call optimized function call.
5105 SDValue OldRetAddr, SDValue OldFP,
5106 int SPDiff, const SDLoc &dl) {
5107 if (SPDiff) {
5108 // Calculate the new stack slot for the return address.
5110 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5111 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5112 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5113 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5114 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5115 NewRetAddrLoc, true);
5116 SDValue NewRetAddrFrIdx =
5117 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5118 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5119 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5120 }
5121 return Chain;
5122}
5123
5124/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5125/// the position of the argument.
5127 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5128 int SPDiff, unsigned ArgOffset,
5129 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5130 int Offset = ArgOffset + SPDiff;
5131 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5132 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5133 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5134 SDValue FIN = DAG.getFrameIndex(FI, VT);
5135 TailCallArgumentInfo Info;
5136 Info.Arg = Arg;
5137 Info.FrameIdxOp = FIN;
5138 Info.FrameIdx = FI;
5139 TailCallArguments.push_back(Info);
5140}
5141
5142/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5143/// stack slot. Returns the chain as result and the loaded frame pointers in
5144/// LROpOut/FPOpout. Used when tail calling.
5145SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5146 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5147 SDValue &FPOpOut, const SDLoc &dl) const {
5148 if (SPDiff) {
5149 // Load the LR and FP stack slot for later adjusting.
5150 LROpOut = getReturnAddrFrameIndex(DAG);
5151 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5152 MachinePointerInfo());
5153 Chain = SDValue(LROpOut.getNode(), 1);
5154 }
5155 return Chain;
5156}
5157
5158/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5159/// by "Src" to address "Dst" of size "Size". Alignment information is
5160/// specified by the specific parameter attribute. The copy will be passed as
5161/// a byval function parameter.
5162/// Sometimes what we are copying is the end of a larger object, the part that
5163/// does not fit in registers.
5165 SDValue Chain, ISD::ArgFlagsTy Flags,
5166 SelectionDAG &DAG, const SDLoc &dl) {
5167 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5168 return DAG.getMemcpy(
5169 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5170 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5171}
5172
5173/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5174/// tail calls.
5176 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5177 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5178 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5179 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5181 if (!isTailCall) {
5182 if (isVector) {
5183 SDValue StackPtr;
5184 if (isPPC64)
5185 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5186 else
5187 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5188 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5189 DAG.getConstant(ArgOffset, dl, PtrVT));
5190 }
5191 MemOpChains.push_back(
5192 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5193 // Calculate and remember argument location.
5194 } else
5195 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5196 TailCallArguments);
5197}
5198
5199static void
5201 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5202 SDValue FPOp,
5203 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5204 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5205 // might overwrite each other in case of tail call optimization.
5206 SmallVector<SDValue, 8> MemOpChains2;
5207 // Do not flag preceding copytoreg stuff together with the following stuff.
5208 InGlue = SDValue();
5209 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5210 MemOpChains2, dl);
5211 if (!MemOpChains2.empty())
5212 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5213
5214 // Store the return address to the appropriate stack slot.
5215 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5216
5217 // Emit callseq_end just before tailcall node.
5218 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5219 InGlue = Chain.getValue(1);
5220}
5221
5222// Is this global address that of a function that can be called by name? (as
5223// opposed to something that must hold a descriptor for an indirect call).
5224static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5225 if (GV) {
5226 if (GV->isThreadLocal())
5227 return false;
5228
5229 return GV->getValueType()->isFunctionTy();
5230 }
5231
5232 return false;
5233}
5234
5235SDValue PPCTargetLowering::LowerCallResult(
5236 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5237 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5238 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5240 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5241 *DAG.getContext());
5242
5243 CCRetInfo.AnalyzeCallResult(
5244 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5246 : RetCC_PPC);
5247
5248 // Copy all of the result registers out of their specified physreg.
5249 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5250 CCValAssign &VA = RVLocs[i];
5251 assert(VA.isRegLoc() && "Can only return in registers!");
5252
5253 SDValue Val;
5254
5255 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5256 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5257 InGlue);
5258 Chain = Lo.getValue(1);
5259 InGlue = Lo.getValue(2);
5260 VA = RVLocs[++i]; // skip ahead to next loc
5261 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5262 InGlue);
5263 Chain = Hi.getValue(1);
5264 InGlue = Hi.getValue(2);
5265 if (!Subtarget.isLittleEndian())
5266 std::swap (Lo, Hi);
5267 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5268 } else {
5269 Val = DAG.getCopyFromReg(Chain, dl,
5270 VA.getLocReg(), VA.getLocVT(), InGlue);
5271 Chain = Val.getValue(1);
5272 InGlue = Val.getValue(2);
5273 }
5274
5275 switch (VA.getLocInfo()) {
5276 default: llvm_unreachable("Unknown loc info!");
5277 case CCValAssign::Full: break;
5278 case CCValAssign::AExt:
5279 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5280 break;
5281 case CCValAssign::ZExt:
5282 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5283 DAG.getValueType(VA.getValVT()));
5284 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5285 break;
5286 case CCValAssign::SExt:
5287 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5288 DAG.getValueType(VA.getValVT()));
5289 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5290 break;
5291 }
5292
5293 InVals.push_back(Val);
5294 }
5295
5296 return Chain;
5297}
5298
5299static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5300 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5301 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5302 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5303
5304 // PatchPoint calls are not indirect.
5305 if (isPatchPoint)
5306 return false;
5307
5309 return false;
5310
5311 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5312 // becuase the immediate function pointer points to a descriptor instead of
5313 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5314 // pointer immediate points to the global entry point, while the BLA would
5315 // need to jump to the local entry point (see rL211174).
5316 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5317 isBLACompatibleAddress(Callee, DAG))
5318 return false;
5319
5320 return true;
5321}
5322
5323// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5324static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5325 return Subtarget.isAIXABI() ||
5326 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5327}
5328
5330 const Function &Caller, const SDValue &Callee,
5331 const PPCSubtarget &Subtarget,
5332 const TargetMachine &TM,
5333 bool IsStrictFPCall = false) {
5334 if (CFlags.IsTailCall)
5335 return PPCISD::TC_RETURN;
5336
5337 unsigned RetOpc = 0;
5338 // This is a call through a function pointer.
5339 if (CFlags.IsIndirect) {
5340 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5341 // indirect calls. The save of the caller's TOC pointer to the stack will be
5342 // inserted into the DAG as part of call lowering. The restore of the TOC
5343 // pointer is modeled by using a pseudo instruction for the call opcode that
5344 // represents the 2 instruction sequence of an indirect branch and link,
5345 // immediately followed by a load of the TOC pointer from the stack save
5346 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5347 // as it is not saved or used.
5348 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5349 : PPCISD::BCTRL;
5350 } else if (Subtarget.isUsingPCRelativeCalls()) {
5351 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5352 RetOpc = PPCISD::CALL_NOTOC;
5353 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5354 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5355 // immediately following the call instruction if the caller and callee may
5356 // have different TOC bases. At link time if the linker determines the calls
5357 // may not share a TOC base, the call is redirected to a trampoline inserted
5358 // by the linker. The trampoline will (among other things) save the callers
5359 // TOC pointer at an ABI designated offset in the linkage area and the
5360 // linker will rewrite the nop to be a load of the TOC pointer from the
5361 // linkage area into gpr2.
5362 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5363 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5364 RetOpc =
5365 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5366 } else
5367 RetOpc = PPCISD::CALL;
5368 if (IsStrictFPCall) {
5369 switch (RetOpc) {
5370 default:
5371 llvm_unreachable("Unknown call opcode");
5372 case PPCISD::BCTRL_LOAD_TOC:
5373 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5374 break;
5375 case PPCISD::BCTRL:
5376 RetOpc = PPCISD::BCTRL_RM;
5377 break;
5378 case PPCISD::CALL_NOTOC:
5379 RetOpc = PPCISD::CALL_NOTOC_RM;
5380 break;
5381 case PPCISD::CALL:
5382 RetOpc = PPCISD::CALL_RM;
5383 break;
5384 case PPCISD::CALL_NOP:
5385 RetOpc = PPCISD::CALL_NOP_RM;
5386 break;
5387 }
5388 }
5389 return RetOpc;
5390}
5391
5392static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5393 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5394 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5395 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5396 return SDValue(Dest, 0);
5397
5398 // Returns true if the callee is local, and false otherwise.
5399 auto isLocalCallee = [&]() {
5401 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5402
5403 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5405 };
5406
5407 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5408 // a static relocation model causes some versions of GNU LD (2.17.50, at
5409 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5410 // built with secure-PLT.
5411 bool UsePlt =
5412 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5414
5415 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5416 const TargetMachine &TM = Subtarget.getTargetMachine();
5418 auto *S =
5419 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5420
5422 return DAG.getMCSymbol(S, PtrVT);
5423 };
5424
5425 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5426 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5427 if (isFunctionGlobalAddress(GV)) {
5428 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5429
5430 if (Subtarget.isAIXABI()) {
5431 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5432 return getAIXFuncEntryPointSymbolSDNode(GV);
5433 }
5434 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5435 UsePlt ? PPCII::MO_PLT : 0);
5436 }
5437
5439 const char *SymName = S->getSymbol();
5440 if (Subtarget.isAIXABI()) {
5441 // If there exists a user-declared function whose name is the same as the
5442 // ExternalSymbol's, then we pick up the user-declared version.
5444 if (const Function *F =
5445 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5446 return getAIXFuncEntryPointSymbolSDNode(F);
5447
5448 // On AIX, direct function calls reference the symbol for the function's
5449 // entry point, which is named by prepending a "." before the function's
5450 // C-linkage name. A Qualname is returned here because an external
5451 // function entry point is a csect with XTY_ER property.
5452 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5453 auto &Context = DAG.getMachineFunction().getContext();
5454 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5455 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5457 return Sec->getQualNameSymbol();
5458 };
5459
5460 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5461 }
5462 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5463 UsePlt ? PPCII::MO_PLT : 0);
5464 }
5465
5466 // No transformation needed.
5467 assert(Callee.getNode() && "What no callee?");
5468 return Callee;
5469}
5470
5472 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5473 "Expected a CALLSEQ_STARTSDNode.");
5474
5475 // The last operand is the chain, except when the node has glue. If the node
5476 // has glue, then the last operand is the glue, and the chain is the second
5477 // last operand.
5478 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5479 if (LastValue.getValueType() != MVT::Glue)
5480 return LastValue;
5481
5482 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5483}
5484
5485// Creates the node that moves a functions address into the count register
5486// to prepare for an indirect call instruction.
5487static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5488 SDValue &Glue, SDValue &Chain,
5489 const SDLoc &dl) {
5490 SDValue MTCTROps[] = {Chain, Callee, Glue};
5491 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5492 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5493 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5494 // The glue is the second value produced.
5495 Glue = Chain.getValue(1);
5496}
5497
5499 SDValue &Glue, SDValue &Chain,
5500 SDValue CallSeqStart,
5501 const CallBase *CB, const SDLoc &dl,
5502 bool hasNest,
5503 const PPCSubtarget &Subtarget) {
5504 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5505 // entry point, but to the function descriptor (the function entry point
5506 // address is part of the function descriptor though).
5507 // The function descriptor is a three doubleword structure with the
5508 // following fields: function entry point, TOC base address and
5509 // environment pointer.
5510 // Thus for a call through a function pointer, the following actions need
5511 // to be performed:
5512 // 1. Save the TOC of the caller in the TOC save area of its stack
5513 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5514 // 2. Load the address of the function entry point from the function
5515 // descriptor.
5516 // 3. Load the TOC of the callee from the function descriptor into r2.
5517 // 4. Load the environment pointer from the function descriptor into
5518 // r11.
5519 // 5. Branch to the function entry point address.
5520 // 6. On return of the callee, the TOC of the caller needs to be
5521 // restored (this is done in FinishCall()).
5522 //
5523 // The loads are scheduled at the beginning of the call sequence, and the
5524 // register copies are flagged together to ensure that no other
5525 // operations can be scheduled in between. E.g. without flagging the
5526 // copies together, a TOC access in the caller could be scheduled between
5527 // the assignment of the callee TOC and the branch to the callee, which leads
5528 // to incorrect code.
5529
5530 // Start by loading the function address from the descriptor.
5531 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5532 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5536
5537 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5538
5539 // Registers used in building the DAG.
5540 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5541 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5542
5543 // Offsets of descriptor members.
5544 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5545 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5546
5547 const MVT RegVT = Subtarget.getScalarIntVT();
5548 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5549
5550 // One load for the functions entry point address.
5551 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5552 Alignment, MMOFlags);
5553
5554 // One for loading the TOC anchor for the module that contains the called
5555 // function.
5556 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5557 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5558 SDValue TOCPtr =
5559 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5560 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5561
5562 // One for loading the environment pointer.
5563 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5564 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5565 SDValue LoadEnvPtr =
5566 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5567 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5568
5569
5570 // Then copy the newly loaded TOC anchor to the TOC pointer.
5571 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5572 Chain = TOCVal.getValue(0);
5573 Glue = TOCVal.getValue(1);
5574
5575 // If the function call has an explicit 'nest' parameter, it takes the
5576 // place of the environment pointer.
5577 assert((!hasNest || !Subtarget.isAIXABI()) &&
5578 "Nest parameter is not supported on AIX.");
5579 if (!hasNest) {
5580 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5581 Chain = EnvVal.getValue(0);
5582 Glue = EnvVal.getValue(1);
5583 }
5584
5585 // The rest of the indirect call sequence is the same as the non-descriptor
5586 // DAG.
5587 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5588}
5589
5590static void
5592 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5593 SelectionDAG &DAG,
5594 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5595 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5596 const PPCSubtarget &Subtarget) {
5597 const bool IsPPC64 = Subtarget.isPPC64();
5598 // MVT for a general purpose register.
5599 const MVT RegVT = Subtarget.getScalarIntVT();
5600
5601 // First operand is always the chain.
5602 Ops.push_back(Chain);
5603
5604 // If it's a direct call pass the callee as the second operand.
5605 if (!CFlags.IsIndirect)
5606 Ops.push_back(Callee);
5607 else {
5608 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5609
5610 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5611 // on the stack (this would have been done in `LowerCall_64SVR4` or
5612 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5613 // represents both the indirect branch and a load that restores the TOC
5614 // pointer from the linkage area. The operand for the TOC restore is an add
5615 // of the TOC save offset to the stack pointer. This must be the second
5616 // operand: after the chain input but before any other variadic arguments.
5617 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5618 // saved or used.
5619 if (isTOCSaveRestoreRequired(Subtarget)) {
5620 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5621
5622 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5623 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5624 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5625 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5626 Ops.push_back(AddTOC);
5627 }
5628
5629 // Add the register used for the environment pointer.
5630 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5631 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5632 RegVT));
5633
5634
5635 // Add CTR register as callee so a bctr can be emitted later.
5636 if (CFlags.IsTailCall)
5637 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5638 }
5639
5640 // If this is a tail call add stack pointer delta.
5641 if (CFlags.IsTailCall)
5642 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5643
5644 // Add argument registers to the end of the list so that they are known live
5645 // into the call.
5646 for (const auto &[Reg, N] : RegsToPass)
5647 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5648
5649 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5650 // no way to mark dependencies as implicit here.
5651 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5652 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5653 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5654 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5655
5656 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5657 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5658 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5659
5660 // Add a register mask operand representing the call-preserved registers.
5661 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5662 const uint32_t *Mask =
5663 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5664 assert(Mask && "Missing call preserved mask for calling convention");
5665 Ops.push_back(DAG.getRegisterMask(Mask));
5666
5667 // If the glue is valid, it is the last operand.
5668 if (Glue.getNode())
5669 Ops.push_back(Glue);
5670}
5671
5672SDValue PPCTargetLowering::FinishCall(
5673 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5674 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5675 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5676 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5677 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5678
5679 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5680 Subtarget.isAIXABI())
5681 setUsesTOCBasePtr(DAG);
5682
5683 unsigned CallOpc =
5684 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5685 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5686
5687 if (!CFlags.IsIndirect)
5688 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5689 else if (Subtarget.usesFunctionDescriptors())
5690 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5691 dl, CFlags.HasNest, Subtarget);
5692 else
5693 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5694
5695 // Build the operand list for the call instruction.
5697 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5698 SPDiff, Subtarget);
5699
5700 // Emit tail call.
5701 if (CFlags.IsTailCall) {
5702 // Indirect tail call when using PC Relative calls do not have the same
5703 // constraints.
5704 assert(((Callee.getOpcode() == ISD::Register &&
5705 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5706 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5707 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5708 isa<ConstantSDNode>(Callee) ||
5709 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5710 "Expecting a global address, external symbol, absolute value, "
5711 "register or an indirect tail call when PC Relative calls are "
5712 "used.");
5713 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5714 assert(CallOpc == PPCISD::TC_RETURN &&
5715 "Unexpected call opcode for a tail call.");
5717 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5718 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5719 return Ret;
5720 }
5721
5722 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5723 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5724 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5725 Glue = Chain.getValue(1);
5726
5727 // When performing tail call optimization the callee pops its arguments off
5728 // the stack. Account for this here so these bytes can be pushed back on in
5729 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5730 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5732 ? NumBytes
5733 : 0;
5734
5735 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5736 Glue = Chain.getValue(1);
5737
5738 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5739 DAG, InVals);
5740}
5741
5743 CallingConv::ID CalleeCC = CB->getCallingConv();
5744 const Function *CallerFunc = CB->getCaller();
5745 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5746 const Function *CalleeFunc = CB->getCalledFunction();
5747 if (!CalleeFunc)
5748 return false;
5749 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5750
5753
5754 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5755 CalleeFunc->getAttributes(), Outs, *this,
5756 CalleeFunc->getDataLayout());
5757
5758 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5759 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5760 false /*isCalleeExternalSymbol*/);
5761}
5762
5763bool PPCTargetLowering::isEligibleForTCO(
5764 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5765 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5767 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5768 bool isCalleeExternalSymbol) const {
5769 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5770 return false;
5771
5772 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5773 return IsEligibleForTailCallOptimization_64SVR4(
5774 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5775 isCalleeExternalSymbol);
5776 else
5777 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5778 isVarArg, Ins);
5779}
5780
5781SDValue
5782PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5783 SmallVectorImpl<SDValue> &InVals) const {
5784 SelectionDAG &DAG = CLI.DAG;
5785 SDLoc &dl = CLI.DL;
5787 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5789 SDValue Chain = CLI.Chain;
5790 SDValue Callee = CLI.Callee;
5791 bool &isTailCall = CLI.IsTailCall;
5792 CallingConv::ID CallConv = CLI.CallConv;
5793 bool isVarArg = CLI.IsVarArg;
5794 bool isPatchPoint = CLI.IsPatchPoint;
5795 const CallBase *CB = CLI.CB;
5796
5797 if (isTailCall) {
5799 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5800 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5801 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5802 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5803
5804 isTailCall =
5805 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5806 &(MF.getFunction()), IsCalleeExternalSymbol);
5807 if (isTailCall) {
5808 ++NumTailCalls;
5809 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5810 ++NumSiblingCalls;
5811
5812 // PC Relative calls no longer guarantee that the callee is a Global
5813 // Address Node. The callee could be an indirect tail call in which
5814 // case the SDValue for the callee could be a load (to load the address
5815 // of a function pointer) or it may be a register copy (to move the
5816 // address of the callee from a function parameter into a virtual
5817 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5818 assert((Subtarget.isUsingPCRelativeCalls() ||
5819 isa<GlobalAddressSDNode>(Callee)) &&
5820 "Callee should be an llvm::Function object.");
5821
5822 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5823 << "\nTCO callee: ");
5824 LLVM_DEBUG(Callee.dump());
5825 }
5826 }
5827
5828 if (!isTailCall && CB && CB->isMustTailCall())
5829 report_fatal_error("failed to perform tail call elimination on a call "
5830 "site marked musttail");
5831
5832 // When long calls (i.e. indirect calls) are always used, calls are always
5833 // made via function pointer. If we have a function name, first translate it
5834 // into a pointer.
5835 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5836 !isTailCall)
5837 Callee = LowerGlobalAddress(Callee, DAG);
5838
5839 CallFlags CFlags(
5840 CallConv, isTailCall, isVarArg, isPatchPoint,
5841 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5842 // hasNest
5843 Subtarget.is64BitELFABI() &&
5844 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5845 CLI.NoMerge);
5846
5847 if (Subtarget.isAIXABI())
5848 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5849 InVals, CB);
5850
5851 assert(Subtarget.isSVR4ABI());
5852 if (Subtarget.isPPC64())
5853 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5854 InVals, CB);
5855 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5856 InVals, CB);
5857}
5858
5859SDValue PPCTargetLowering::LowerCall_32SVR4(
5860 SDValue Chain, SDValue Callee, CallFlags CFlags,
5862 const SmallVectorImpl<SDValue> &OutVals,
5863 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5865 const CallBase *CB) const {
5866 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5867 // of the 32-bit SVR4 ABI stack frame layout.
5868
5869 const CallingConv::ID CallConv = CFlags.CallConv;
5870 const bool IsVarArg = CFlags.IsVarArg;
5871 const bool IsTailCall = CFlags.IsTailCall;
5872
5873 assert((CallConv == CallingConv::C ||
5874 CallConv == CallingConv::Cold ||
5875 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5876
5877 const Align PtrAlign(4);
5878
5879 MachineFunction &MF = DAG.getMachineFunction();
5880
5881 // Mark this function as potentially containing a function that contains a
5882 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5883 // and restoring the callers stack pointer in this functions epilog. This is
5884 // done because by tail calling the called function might overwrite the value
5885 // in this function's (MF) stack pointer stack slot 0(SP).
5886 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5887 CallConv == CallingConv::Fast)
5888 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5889
5890 // Count how many bytes are to be pushed on the stack, including the linkage
5891 // area, parameter list area and the part of the local variable space which
5892 // contains copies of aggregates which are passed by value.
5893
5894 // Assign locations to all of the outgoing arguments.
5896 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5897
5898 // Reserve space for the linkage area on the stack.
5899 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5900 PtrAlign);
5901
5902 if (IsVarArg) {
5903 // Handle fixed and variable vector arguments differently.
5904 // Fixed vector arguments go into registers as long as registers are
5905 // available. Variable vector arguments always go into memory.
5906 unsigned NumArgs = Outs.size();
5907
5908 for (unsigned i = 0; i != NumArgs; ++i) {
5909 MVT ArgVT = Outs[i].VT;
5910 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5911 bool Result;
5912
5913 if (!ArgFlags.isVarArg()) {
5914 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5915 Outs[i].OrigTy, CCInfo);
5916 } else {
5918 ArgFlags, Outs[i].OrigTy, CCInfo);
5919 }
5920
5921 if (Result) {
5922#ifndef NDEBUG
5923 errs() << "Call operand #" << i << " has unhandled type "
5924 << ArgVT << "\n";
5925#endif
5926 llvm_unreachable(nullptr);
5927 }
5928 }
5929 } else {
5930 // All arguments are treated the same.
5931 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5932 }
5933
5934 // Assign locations to all of the outgoing aggregate by value arguments.
5935 SmallVector<CCValAssign, 16> ByValArgLocs;
5936 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5937
5938 // Reserve stack space for the allocations in CCInfo.
5939 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5940
5941 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5942
5943 // Size of the linkage area, parameter list area and the part of the local
5944 // space variable where copies of aggregates which are passed by value are
5945 // stored.
5946 unsigned NumBytes = CCByValInfo.getStackSize();
5947
5948 // Calculate by how many bytes the stack has to be adjusted in case of tail
5949 // call optimization.
5950 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5951
5952 // Adjust the stack pointer for the new arguments...
5953 // These operations are automatically eliminated by the prolog/epilog pass
5954 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5955 SDValue CallSeqStart = Chain;
5956
5957 // Load the return address and frame pointer so it can be moved somewhere else
5958 // later.
5959 SDValue LROp, FPOp;
5960 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5961
5962 // Set up a copy of the stack pointer for use loading and storing any
5963 // arguments that may not fit in the registers available for argument
5964 // passing.
5965 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5966
5968 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5969 SmallVector<SDValue, 8> MemOpChains;
5970
5971 bool seenFloatArg = false;
5972 // Walk the register/memloc assignments, inserting copies/loads.
5973 // i - Tracks the index into the list of registers allocated for the call
5974 // RealArgIdx - Tracks the index into the list of actual function arguments
5975 // j - Tracks the index into the list of byval arguments
5976 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5977 i != e;
5978 ++i, ++RealArgIdx) {
5979 CCValAssign &VA = ArgLocs[i];
5980 SDValue Arg = OutVals[RealArgIdx];
5981 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5982
5983 if (Flags.isByVal()) {
5984 // Argument is an aggregate which is passed by value, thus we need to
5985 // create a copy of it in the local variable space of the current stack
5986 // frame (which is the stack frame of the caller) and pass the address of
5987 // this copy to the callee.
5988 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5989 CCValAssign &ByValVA = ByValArgLocs[j++];
5990 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5991
5992 // Memory reserved in the local variable space of the callers stack frame.
5993 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5994
5995 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5996 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5997 StackPtr, PtrOff);
5998
5999 // Create a copy of the argument in the local area of the current
6000 // stack frame.
6001 SDValue MemcpyCall =
6002 CreateCopyOfByValArgument(Arg, PtrOff,
6003 CallSeqStart.getNode()->getOperand(0),
6004 Flags, DAG, dl);
6005
6006 // This must go outside the CALLSEQ_START..END.
6007 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6008 SDLoc(MemcpyCall));
6009 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6010 NewCallSeqStart.getNode());
6011 Chain = CallSeqStart = NewCallSeqStart;
6012
6013 // Pass the address of the aggregate copy on the stack either in a
6014 // physical register or in the parameter list area of the current stack
6015 // frame to the callee.
6016 Arg = PtrOff;
6017 }
6018
6019 // When useCRBits() is true, there can be i1 arguments.
6020 // It is because getRegisterType(MVT::i1) => MVT::i1,
6021 // and for other integer types getRegisterType() => MVT::i32.
6022 // Extend i1 and ensure callee will get i32.
6023 if (Arg.getValueType() == MVT::i1)
6024 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6025 dl, MVT::i32, Arg);
6026
6027 if (VA.isRegLoc()) {
6028 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6029 // Put argument in a physical register.
6030 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6031 bool IsLE = Subtarget.isLittleEndian();
6032 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6033 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6034 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6035 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6036 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6037 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6038 SVal.getValue(0)));
6039 } else
6040 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6041 } else {
6042 // Put argument in the parameter list area of the current stack frame.
6043 assert(VA.isMemLoc());
6044 unsigned LocMemOffset = VA.getLocMemOffset();
6045
6046 if (!IsTailCall) {
6047 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6048 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6049 StackPtr, PtrOff);
6050
6051 MemOpChains.push_back(
6052 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6053 } else {
6054 // Calculate and remember argument location.
6055 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6056 TailCallArguments);
6057 }
6058 }
6059 }
6060
6061 if (!MemOpChains.empty())
6062 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6063
6064 // Build a sequence of copy-to-reg nodes chained together with token chain
6065 // and flag operands which copy the outgoing args into the appropriate regs.
6066 SDValue InGlue;
6067 for (const auto &[Reg, N] : RegsToPass) {
6068 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6069 InGlue = Chain.getValue(1);
6070 }
6071
6072 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6073 // registers.
6074 if (IsVarArg) {
6075 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6076 SDValue Ops[] = { Chain, InGlue };
6077
6078 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6079 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6080
6081 InGlue = Chain.getValue(1);
6082 }
6083
6084 if (IsTailCall)
6085 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6086 TailCallArguments);
6087
6088 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6089 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6090}
6091
6092// Copy an argument into memory, being careful to do this outside the
6093// call sequence for the call to which the argument belongs.
6094SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6095 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6096 SelectionDAG &DAG, const SDLoc &dl) const {
6097 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6098 CallSeqStart.getNode()->getOperand(0),
6099 Flags, DAG, dl);
6100 // The MEMCPY must go outside the CALLSEQ_START..END.
6101 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6102 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6103 SDLoc(MemcpyCall));
6104 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6105 NewCallSeqStart.getNode());
6106 return NewCallSeqStart;
6107}
6108
6109SDValue PPCTargetLowering::LowerCall_64SVR4(
6110 SDValue Chain, SDValue Callee, CallFlags CFlags,
6112 const SmallVectorImpl<SDValue> &OutVals,
6113 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6115 const CallBase *CB) const {
6116 bool isELFv2ABI = Subtarget.isELFv2ABI();
6117 bool isLittleEndian = Subtarget.isLittleEndian();
6118 unsigned NumOps = Outs.size();
6119 bool IsSibCall = false;
6120 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6121
6122 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6123 unsigned PtrByteSize = 8;
6124
6125 MachineFunction &MF = DAG.getMachineFunction();
6126
6127 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6128 IsSibCall = true;
6129
6130 // Mark this function as potentially containing a function that contains a
6131 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6132 // and restoring the callers stack pointer in this functions epilog. This is
6133 // done because by tail calling the called function might overwrite the value
6134 // in this function's (MF) stack pointer stack slot 0(SP).
6135 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6136 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6137
6138 assert(!(IsFastCall && CFlags.IsVarArg) &&
6139 "fastcc not supported on varargs functions");
6140
6141 // Count how many bytes are to be pushed on the stack, including the linkage
6142 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6143 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6144 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6145 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6146 unsigned NumBytes = LinkageSize;
6147 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6148
6149 static const MCPhysReg GPR[] = {
6150 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6151 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6152 };
6153 static const MCPhysReg VR[] = {
6154 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6155 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6156 };
6157
6158 const unsigned NumGPRs = std::size(GPR);
6159 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6160 const unsigned NumVRs = std::size(VR);
6161
6162 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6163 // can be passed to the callee in registers.
6164 // For the fast calling convention, there is another check below.
6165 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6166 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6167 if (!HasParameterArea) {
6168 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6169 unsigned AvailableFPRs = NumFPRs;
6170 unsigned AvailableVRs = NumVRs;
6171 unsigned NumBytesTmp = NumBytes;
6172 for (unsigned i = 0; i != NumOps; ++i) {
6173 if (Outs[i].Flags.isNest()) continue;
6174 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6175 PtrByteSize, LinkageSize, ParamAreaSize,
6176 NumBytesTmp, AvailableFPRs, AvailableVRs))
6177 HasParameterArea = true;
6178 }
6179 }
6180
6181 // When using the fast calling convention, we don't provide backing for
6182 // arguments that will be in registers.
6183 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6184
6185 // Avoid allocating parameter area for fastcc functions if all the arguments
6186 // can be passed in the registers.
6187 if (IsFastCall)
6188 HasParameterArea = false;
6189
6190 // Add up all the space actually used.
6191 for (unsigned i = 0; i != NumOps; ++i) {
6192 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6193 EVT ArgVT = Outs[i].VT;
6194 EVT OrigVT = Outs[i].ArgVT;
6195
6196 if (Flags.isNest())
6197 continue;
6198
6199 if (IsFastCall) {
6200 if (Flags.isByVal()) {
6201 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6202 if (NumGPRsUsed > NumGPRs)
6203 HasParameterArea = true;
6204 } else {
6205 switch (ArgVT.getSimpleVT().SimpleTy) {
6206 default: llvm_unreachable("Unexpected ValueType for argument!");
6207 case MVT::i1:
6208 case MVT::i32:
6209 case MVT::i64:
6210 if (++NumGPRsUsed <= NumGPRs)
6211 continue;
6212 break;
6213 case MVT::v4i32:
6214 case MVT::v8i16:
6215 case MVT::v16i8:
6216 case MVT::v2f64:
6217 case MVT::v2i64:
6218 case MVT::v1i128:
6219 case MVT::f128:
6220 if (++NumVRsUsed <= NumVRs)
6221 continue;
6222 break;
6223 case MVT::v4f32:
6224 if (++NumVRsUsed <= NumVRs)
6225 continue;
6226 break;
6227 case MVT::f32:
6228 case MVT::f64:
6229 if (++NumFPRsUsed <= NumFPRs)
6230 continue;
6231 break;
6232 }
6233 HasParameterArea = true;
6234 }
6235 }
6236
6237 /* Respect alignment of argument on the stack. */
6238 auto Alignement =
6239 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6240 NumBytes = alignTo(NumBytes, Alignement);
6241
6242 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6243 if (Flags.isInConsecutiveRegsLast())
6244 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6245 }
6246
6247 unsigned NumBytesActuallyUsed = NumBytes;
6248
6249 // In the old ELFv1 ABI,
6250 // the prolog code of the callee may store up to 8 GPR argument registers to
6251 // the stack, allowing va_start to index over them in memory if its varargs.
6252 // Because we cannot tell if this is needed on the caller side, we have to
6253 // conservatively assume that it is needed. As such, make sure we have at
6254 // least enough stack space for the caller to store the 8 GPRs.
6255 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6256 // really requires memory operands, e.g. a vararg function.
6257 if (HasParameterArea)
6258 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6259 else
6260 NumBytes = LinkageSize;
6261
6262 // Tail call needs the stack to be aligned.
6263 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6264 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6265
6266 int SPDiff = 0;
6267
6268 // Calculate by how many bytes the stack has to be adjusted in case of tail
6269 // call optimization.
6270 if (!IsSibCall)
6271 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6272
6273 // To protect arguments on the stack from being clobbered in a tail call,
6274 // force all the loads to happen before doing any other lowering.
6275 if (CFlags.IsTailCall)
6276 Chain = DAG.getStackArgumentTokenFactor(Chain);
6277
6278 // Adjust the stack pointer for the new arguments...
6279 // These operations are automatically eliminated by the prolog/epilog pass
6280 if (!IsSibCall)
6281 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6282 SDValue CallSeqStart = Chain;
6283
6284 // Load the return address and frame pointer so it can be move somewhere else
6285 // later.
6286 SDValue LROp, FPOp;
6287 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6288
6289 // Set up a copy of the stack pointer for use loading and storing any
6290 // arguments that may not fit in the registers available for argument
6291 // passing.
6292 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6293
6294 // Figure out which arguments are going to go in registers, and which in
6295 // memory. Also, if this is a vararg function, floating point operations
6296 // must be stored to our stack, and loaded into integer regs as well, if
6297 // any integer regs are available for argument passing.
6298 unsigned ArgOffset = LinkageSize;
6299
6301 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6302
6303 SmallVector<SDValue, 8> MemOpChains;
6304 for (unsigned i = 0; i != NumOps; ++i) {
6305 SDValue Arg = OutVals[i];
6306 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6307 EVT ArgVT = Outs[i].VT;
6308 EVT OrigVT = Outs[i].ArgVT;
6309
6310 // PtrOff will be used to store the current argument to the stack if a
6311 // register cannot be found for it.
6312 SDValue PtrOff;
6313
6314 // We re-align the argument offset for each argument, except when using the
6315 // fast calling convention, when we need to make sure we do that only when
6316 // we'll actually use a stack slot.
6317 auto ComputePtrOff = [&]() {
6318 /* Respect alignment of argument on the stack. */
6319 auto Alignment =
6320 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6321 ArgOffset = alignTo(ArgOffset, Alignment);
6322
6323 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6324
6325 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6326 };
6327
6328 if (!IsFastCall) {
6329 ComputePtrOff();
6330
6331 /* Compute GPR index associated with argument offset. */
6332 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6333 GPR_idx = std::min(GPR_idx, NumGPRs);
6334 }
6335
6336 // Promote integers to 64-bit values.
6337 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6338 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6339 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6340 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6341 }
6342
6343 // FIXME memcpy is used way more than necessary. Correctness first.
6344 // Note: "by value" is code for passing a structure by value, not
6345 // basic types.
6346 if (Flags.isByVal()) {
6347 // Note: Size includes alignment padding, so
6348 // struct x { short a; char b; }
6349 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6350 // These are the proper values we need for right-justifying the
6351 // aggregate in a parameter register.
6352 unsigned Size = Flags.getByValSize();
6353
6354 // An empty aggregate parameter takes up no storage and no
6355 // registers.
6356 if (Size == 0)
6357 continue;
6358
6359 if (IsFastCall)
6360 ComputePtrOff();
6361
6362 // All aggregates smaller than 8 bytes must be passed right-justified.
6363 if (Size==1 || Size==2 || Size==4) {
6364 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6365 if (GPR_idx != NumGPRs) {
6366 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6367 MachinePointerInfo(), VT);
6368 MemOpChains.push_back(Load.getValue(1));
6369 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6370
6371 ArgOffset += PtrByteSize;
6372 continue;
6373 }
6374 }
6375
6376 if (GPR_idx == NumGPRs && Size < 8) {
6377 SDValue AddPtr = PtrOff;
6378 if (!isLittleEndian) {
6379 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6380 PtrOff.getValueType());
6381 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6382 }
6383 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6384 CallSeqStart,
6385 Flags, DAG, dl);
6386 ArgOffset += PtrByteSize;
6387 continue;
6388 }
6389 // Copy the object to parameter save area if it can not be entirely passed
6390 // by registers.
6391 // FIXME: we only need to copy the parts which need to be passed in
6392 // parameter save area. For the parts passed by registers, we don't need
6393 // to copy them to the stack although we need to allocate space for them
6394 // in parameter save area.
6395 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6396 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6397 CallSeqStart,
6398 Flags, DAG, dl);
6399
6400 // When a register is available, pass a small aggregate right-justified.
6401 if (Size < 8 && GPR_idx != NumGPRs) {
6402 // The easiest way to get this right-justified in a register
6403 // is to copy the structure into the rightmost portion of a
6404 // local variable slot, then load the whole slot into the
6405 // register.
6406 // FIXME: The memcpy seems to produce pretty awful code for
6407 // small aggregates, particularly for packed ones.
6408 // FIXME: It would be preferable to use the slot in the
6409 // parameter save area instead of a new local variable.
6410 SDValue AddPtr = PtrOff;
6411 if (!isLittleEndian) {
6412 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6413 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6414 }
6415 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6416 CallSeqStart,
6417 Flags, DAG, dl);
6418
6419 // Load the slot into the register.
6420 SDValue Load =
6421 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6422 MemOpChains.push_back(Load.getValue(1));
6423 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6424
6425 // Done with this argument.
6426 ArgOffset += PtrByteSize;
6427 continue;
6428 }
6429
6430 // For aggregates larger than PtrByteSize, copy the pieces of the
6431 // object that fit into registers from the parameter save area.
6432 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6433 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6434 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6435 if (GPR_idx != NumGPRs) {
6436 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6437 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6438 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6439 MachinePointerInfo(), ObjType);
6440
6441 MemOpChains.push_back(Load.getValue(1));
6442 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6443 ArgOffset += PtrByteSize;
6444 } else {
6445 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6446 break;
6447 }
6448 }
6449 continue;
6450 }
6451
6452 switch (Arg.getSimpleValueType().SimpleTy) {
6453 default: llvm_unreachable("Unexpected ValueType for argument!");
6454 case MVT::i1:
6455 case MVT::i32:
6456 case MVT::i64:
6457 if (Flags.isNest()) {
6458 // The 'nest' parameter, if any, is passed in R11.
6459 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6460 break;
6461 }
6462
6463 // These can be scalar arguments or elements of an integer array type
6464 // passed directly. Clang may use those instead of "byval" aggregate
6465 // types to avoid forcing arguments to memory unnecessarily.
6466 if (GPR_idx != NumGPRs) {
6467 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6468 } else {
6469 if (IsFastCall)
6470 ComputePtrOff();
6471
6472 assert(HasParameterArea &&
6473 "Parameter area must exist to pass an argument in memory.");
6474 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6475 true, CFlags.IsTailCall, false, MemOpChains,
6476 TailCallArguments, dl);
6477 if (IsFastCall)
6478 ArgOffset += PtrByteSize;
6479 }
6480 if (!IsFastCall)
6481 ArgOffset += PtrByteSize;
6482 break;
6483 case MVT::f32:
6484 case MVT::f64: {
6485 // These can be scalar arguments or elements of a float array type
6486 // passed directly. The latter are used to implement ELFv2 homogenous
6487 // float aggregates.
6488
6489 // Named arguments go into FPRs first, and once they overflow, the
6490 // remaining arguments go into GPRs and then the parameter save area.
6491 // Unnamed arguments for vararg functions always go to GPRs and
6492 // then the parameter save area. For now, put all arguments to vararg
6493 // routines always in both locations (FPR *and* GPR or stack slot).
6494 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6495 bool NeededLoad = false;
6496
6497 // First load the argument into the next available FPR.
6498 if (FPR_idx != NumFPRs)
6499 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6500
6501 // Next, load the argument into GPR or stack slot if needed.
6502 if (!NeedGPROrStack)
6503 ;
6504 else if (GPR_idx != NumGPRs && !IsFastCall) {
6505 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6506 // once we support fp <-> gpr moves.
6507
6508 // In the non-vararg case, this can only ever happen in the
6509 // presence of f32 array types, since otherwise we never run
6510 // out of FPRs before running out of GPRs.
6511 SDValue ArgVal;
6512
6513 // Double values are always passed in a single GPR.
6514 if (Arg.getValueType() != MVT::f32) {
6515 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6516
6517 // Non-array float values are extended and passed in a GPR.
6518 } else if (!Flags.isInConsecutiveRegs()) {
6519 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6520 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6521
6522 // If we have an array of floats, we collect every odd element
6523 // together with its predecessor into one GPR.
6524 } else if (ArgOffset % PtrByteSize != 0) {
6525 SDValue Lo, Hi;
6526 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6527 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6528 if (!isLittleEndian)
6529 std::swap(Lo, Hi);
6530 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6531
6532 // The final element, if even, goes into the first half of a GPR.
6533 } else if (Flags.isInConsecutiveRegsLast()) {
6534 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6535 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6536 if (!isLittleEndian)
6537 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6538 DAG.getConstant(32, dl, MVT::i32));
6539
6540 // Non-final even elements are skipped; they will be handled
6541 // together the with subsequent argument on the next go-around.
6542 } else
6543 ArgVal = SDValue();
6544
6545 if (ArgVal.getNode())
6546 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6547 } else {
6548 if (IsFastCall)
6549 ComputePtrOff();
6550
6551 // Single-precision floating-point values are mapped to the
6552 // second (rightmost) word of the stack doubleword.
6553 if (Arg.getValueType() == MVT::f32 &&
6554 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6555 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6556 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6557 }
6558
6559 assert(HasParameterArea &&
6560 "Parameter area must exist to pass an argument in memory.");
6561 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6562 true, CFlags.IsTailCall, false, MemOpChains,
6563 TailCallArguments, dl);
6564
6565 NeededLoad = true;
6566 }
6567 // When passing an array of floats, the array occupies consecutive
6568 // space in the argument area; only round up to the next doubleword
6569 // at the end of the array. Otherwise, each float takes 8 bytes.
6570 if (!IsFastCall || NeededLoad) {
6571 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6572 Flags.isInConsecutiveRegs()) ? 4 : 8;
6573 if (Flags.isInConsecutiveRegsLast())
6574 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6575 }
6576 break;
6577 }
6578 case MVT::v4f32:
6579 case MVT::v4i32:
6580 case MVT::v8i16:
6581 case MVT::v16i8:
6582 case MVT::v2f64:
6583 case MVT::v2i64:
6584 case MVT::v1i128:
6585 case MVT::f128:
6586 // These can be scalar arguments or elements of a vector array type
6587 // passed directly. The latter are used to implement ELFv2 homogenous
6588 // vector aggregates.
6589
6590 // For a varargs call, named arguments go into VRs or on the stack as
6591 // usual; unnamed arguments always go to the stack or the corresponding
6592 // GPRs when within range. For now, we always put the value in both
6593 // locations (or even all three).
6594 if (CFlags.IsVarArg) {
6595 assert(HasParameterArea &&
6596 "Parameter area must exist if we have a varargs call.");
6597 // We could elide this store in the case where the object fits
6598 // entirely in R registers. Maybe later.
6599 SDValue Store =
6600 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6601 MemOpChains.push_back(Store);
6602 if (VR_idx != NumVRs) {
6603 SDValue Load =
6604 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6605 MemOpChains.push_back(Load.getValue(1));
6606 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6607 }
6608 ArgOffset += 16;
6609 for (unsigned i=0; i<16; i+=PtrByteSize) {
6610 if (GPR_idx == NumGPRs)
6611 break;
6612 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6613 DAG.getConstant(i, dl, PtrVT));
6614 SDValue Load =
6615 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6616 MemOpChains.push_back(Load.getValue(1));
6617 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6618 }
6619 break;
6620 }
6621
6622 // Non-varargs Altivec params go into VRs or on the stack.
6623 if (VR_idx != NumVRs) {
6624 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6625 } else {
6626 if (IsFastCall)
6627 ComputePtrOff();
6628
6629 assert(HasParameterArea &&
6630 "Parameter area must exist to pass an argument in memory.");
6631 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6632 true, CFlags.IsTailCall, true, MemOpChains,
6633 TailCallArguments, dl);
6634 if (IsFastCall)
6635 ArgOffset += 16;
6636 }
6637
6638 if (!IsFastCall)
6639 ArgOffset += 16;
6640 break;
6641 }
6642 }
6643
6644 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6645 "mismatch in size of parameter area");
6646 (void)NumBytesActuallyUsed;
6647
6648 if (!MemOpChains.empty())
6649 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6650
6651 // Check if this is an indirect call (MTCTR/BCTRL).
6652 // See prepareDescriptorIndirectCall and buildCallOperands for more
6653 // information about calls through function pointers in the 64-bit SVR4 ABI.
6654 if (CFlags.IsIndirect) {
6655 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6656 // caller in the TOC save area.
6657 if (isTOCSaveRestoreRequired(Subtarget)) {
6658 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6659 // Load r2 into a virtual register and store it to the TOC save area.
6660 setUsesTOCBasePtr(DAG);
6661 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6662 // TOC save area offset.
6663 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6664 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6665 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6666 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6668 DAG.getMachineFunction(), TOCSaveOffset));
6669 }
6670 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6671 // This does not mean the MTCTR instruction must use R12; it's easier
6672 // to model this as an extra parameter, so do that.
6673 if (isELFv2ABI && !CFlags.IsPatchPoint)
6674 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6675 }
6676
6677 // Build a sequence of copy-to-reg nodes chained together with token chain
6678 // and flag operands which copy the outgoing args into the appropriate regs.
6679 SDValue InGlue;
6680 for (const auto &[Reg, N] : RegsToPass) {
6681 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6682 InGlue = Chain.getValue(1);
6683 }
6684
6685 if (CFlags.IsTailCall && !IsSibCall)
6686 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6687 TailCallArguments);
6688
6689 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6690 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6691}
6692
6693// Returns true when the shadow of a general purpose argument register
6694// in the parameter save area is aligned to at least 'RequiredAlign'.
6695static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6696 assert(RequiredAlign.value() <= 16 &&
6697 "Required alignment greater than stack alignment.");
6698 switch (Reg) {
6699 default:
6700 report_fatal_error("called on invalid register.");
6701 case PPC::R5:
6702 case PPC::R9:
6703 case PPC::X3:
6704 case PPC::X5:
6705 case PPC::X7:
6706 case PPC::X9:
6707 // These registers are 16 byte aligned which is the most strict aligment
6708 // we can support.
6709 return true;
6710 case PPC::R3:
6711 case PPC::R7:
6712 case PPC::X4:
6713 case PPC::X6:
6714 case PPC::X8:
6715 case PPC::X10:
6716 // The shadow of these registers in the PSA is 8 byte aligned.
6717 return RequiredAlign <= 8;
6718 case PPC::R4:
6719 case PPC::R6:
6720 case PPC::R8:
6721 case PPC::R10:
6722 return RequiredAlign <= 4;
6723 }
6724}
6725
6726static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6727 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6728 Type *OrigTy, CCState &State) {
6729 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6730 State.getMachineFunction().getSubtarget());
6731 const bool IsPPC64 = Subtarget.isPPC64();
6732 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6733 const Align PtrAlign(PtrSize);
6734 const Align StackAlign(16);
6735 const MVT RegVT = Subtarget.getScalarIntVT();
6736
6737 if (ValVT == MVT::f128)
6738 report_fatal_error("f128 is unimplemented on AIX.");
6739
6740 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6741 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6742 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6743 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6744 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6745 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6746
6747 static const MCPhysReg VR[] = {// Vector registers.
6748 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6749 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6750 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6751
6752 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6753
6754 if (ArgFlags.isNest()) {
6755 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6756 if (!EnvReg)
6757 report_fatal_error("More then one nest argument.");
6758 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6759 return false;
6760 }
6761
6762 if (ArgFlags.isByVal()) {
6763 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6764 if (ByValAlign > StackAlign)
6765 report_fatal_error("Pass-by-value arguments with alignment greater than "
6766 "16 are not supported.");
6767
6768 const unsigned ByValSize = ArgFlags.getByValSize();
6769 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6770
6771 // An empty aggregate parameter takes up no storage and no registers,
6772 // but needs a MemLoc for a stack slot for the formal arguments side.
6773 if (ByValSize == 0) {
6775 State.getStackSize(), RegVT, LocInfo));
6776 return false;
6777 }
6778
6779 // Shadow allocate any registers that are not properly aligned.
6780 unsigned NextReg = State.getFirstUnallocated(GPRs);
6781 while (NextReg != GPRs.size() &&
6782 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6783 // Shadow allocate next registers since its aligment is not strict enough.
6784 MCRegister Reg = State.AllocateReg(GPRs);
6785 // Allocate the stack space shadowed by said register.
6786 State.AllocateStack(PtrSize, PtrAlign);
6787 assert(Reg && "Alocating register unexpectedly failed.");
6788 (void)Reg;
6789 NextReg = State.getFirstUnallocated(GPRs);
6790 }
6791
6792 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6793 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6794 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6795 if (MCRegister Reg = State.AllocateReg(GPRs))
6796 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6797 else {
6800 LocInfo));
6801 break;
6802 }
6803 }
6804 return false;
6805 }
6806
6807 // Arguments always reserve parameter save area.
6808 switch (ValVT.SimpleTy) {
6809 default:
6810 report_fatal_error("Unhandled value type for argument.");
6811 case MVT::i64:
6812 // i64 arguments should have been split to i32 for PPC32.
6813 assert(IsPPC64 && "PPC32 should have split i64 values.");
6814 [[fallthrough]];
6815 case MVT::i1:
6816 case MVT::i32: {
6817 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6818 // AIX integer arguments are always passed in register width.
6819 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6820 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6822 if (MCRegister Reg = State.AllocateReg(GPRs))
6823 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6824 else
6825 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6826
6827 return false;
6828 }
6829 case MVT::f32:
6830 case MVT::f64: {
6831 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6832 const unsigned StoreSize = LocVT.getStoreSize();
6833 // Floats are always 4-byte aligned in the PSA on AIX.
6834 // This includes f64 in 64-bit mode for ABI compatibility.
6835 const unsigned Offset =
6836 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6837 MCRegister FReg = State.AllocateReg(FPR);
6838 if (FReg)
6839 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6840
6841 // Reserve and initialize GPRs or initialize the PSA as required.
6842 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6843 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6844 assert(FReg && "An FPR should be available when a GPR is reserved.");
6845 if (State.isVarArg()) {
6846 // Successfully reserved GPRs are only initialized for vararg calls.
6847 // Custom handling is required for:
6848 // f64 in PPC32 needs to be split into 2 GPRs.
6849 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6850 State.addLoc(
6851 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6852 }
6853 } else {
6854 // If there are insufficient GPRs, the PSA needs to be initialized.
6855 // Initialization occurs even if an FPR was initialized for
6856 // compatibility with the AIX XL compiler. The full memory for the
6857 // argument will be initialized even if a prior word is saved in GPR.
6858 // A custom memLoc is used when the argument also passes in FPR so
6859 // that the callee handling can skip over it easily.
6860 State.addLoc(
6861 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6862 LocInfo)
6863 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6864 break;
6865 }
6866 }
6867
6868 return false;
6869 }
6870 case MVT::v4f32:
6871 case MVT::v4i32:
6872 case MVT::v8i16:
6873 case MVT::v16i8:
6874 case MVT::v2i64:
6875 case MVT::v2f64:
6876 case MVT::v1i128: {
6877 const unsigned VecSize = 16;
6878 const Align VecAlign(VecSize);
6879
6880 if (!State.isVarArg()) {
6881 // If there are vector registers remaining we don't consume any stack
6882 // space.
6883 if (MCRegister VReg = State.AllocateReg(VR)) {
6884 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6885 return false;
6886 }
6887 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6888 // might be allocated in the portion of the PSA that is shadowed by the
6889 // GPRs.
6890 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6891 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6892 return false;
6893 }
6894
6895 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6896 // Burn any underaligned registers and their shadowed stack space until
6897 // we reach the required alignment.
6898 while (NextRegIndex != GPRs.size() &&
6899 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6900 // Shadow allocate register and its stack shadow.
6901 MCRegister Reg = State.AllocateReg(GPRs);
6902 State.AllocateStack(PtrSize, PtrAlign);
6903 assert(Reg && "Allocating register unexpectedly failed.");
6904 (void)Reg;
6905 NextRegIndex = State.getFirstUnallocated(GPRs);
6906 }
6907
6908 // Vectors that are passed as fixed arguments are handled differently.
6909 // They are passed in VRs if any are available (unlike arguments passed
6910 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6911 // functions)
6912 if (!ArgFlags.isVarArg()) {
6913 if (MCRegister VReg = State.AllocateReg(VR)) {
6914 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6915 // Shadow allocate GPRs and stack space even though we pass in a VR.
6916 for (unsigned I = 0; I != VecSize; I += PtrSize)
6917 State.AllocateReg(GPRs);
6918 State.AllocateStack(VecSize, VecAlign);
6919 return false;
6920 }
6921 // No vector registers remain so pass on the stack.
6922 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6923 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6924 return false;
6925 }
6926
6927 // If all GPRS are consumed then we pass the argument fully on the stack.
6928 if (NextRegIndex == GPRs.size()) {
6929 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6930 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6931 return false;
6932 }
6933
6934 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6935 // half of the argument, and then need to pass the remaining half on the
6936 // stack.
6937 if (GPRs[NextRegIndex] == PPC::R9) {
6938 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6939 State.addLoc(
6940 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6941
6942 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
6943 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
6944 assert(FirstReg && SecondReg &&
6945 "Allocating R9 or R10 unexpectedly failed.");
6946 State.addLoc(
6947 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6948 State.addLoc(
6949 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6950 return false;
6951 }
6952
6953 // We have enough GPRs to fully pass the vector argument, and we have
6954 // already consumed any underaligned registers. Start with the custom
6955 // MemLoc and then the custom RegLocs.
6956 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6957 State.addLoc(
6958 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6959 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6960 const MCRegister Reg = State.AllocateReg(GPRs);
6961 assert(Reg && "Failed to allocated register for vararg vector argument");
6962 State.addLoc(
6963 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6964 }
6965 return false;
6966 }
6967 }
6968 return true;
6969}
6970
6971// So far, this function is only used by LowerFormalArguments_AIX()
6973 bool IsPPC64,
6974 bool HasP8Vector,
6975 bool HasVSX) {
6976 assert((IsPPC64 || SVT != MVT::i64) &&
6977 "i64 should have been split for 32-bit codegen.");
6978
6979 switch (SVT) {
6980 default:
6981 report_fatal_error("Unexpected value type for formal argument");
6982 case MVT::i1:
6983 case MVT::i32:
6984 case MVT::i64:
6985 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6986 case MVT::f32:
6987 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6988 case MVT::f64:
6989 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6990 case MVT::v4f32:
6991 case MVT::v4i32:
6992 case MVT::v8i16:
6993 case MVT::v16i8:
6994 case MVT::v2i64:
6995 case MVT::v2f64:
6996 case MVT::v1i128:
6997 return &PPC::VRRCRegClass;
6998 }
6999}
7000
7002 SelectionDAG &DAG, SDValue ArgValue,
7003 MVT LocVT, const SDLoc &dl) {
7004 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7005 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7006
7007 if (Flags.isSExt())
7008 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7009 DAG.getValueType(ValVT));
7010 else if (Flags.isZExt())
7011 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7012 DAG.getValueType(ValVT));
7013
7014 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7015}
7016
7017static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7018 const unsigned LASize = FL->getLinkageSize();
7019
7020 if (PPC::GPRCRegClass.contains(Reg)) {
7021 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7022 "Reg must be a valid argument register!");
7023 return LASize + 4 * (Reg - PPC::R3);
7024 }
7025
7026 if (PPC::G8RCRegClass.contains(Reg)) {
7027 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7028 "Reg must be a valid argument register!");
7029 return LASize + 8 * (Reg - PPC::X3);
7030 }
7031
7032 llvm_unreachable("Only general purpose registers expected.");
7033}
7034
7035// AIX ABI Stack Frame Layout:
7036//
7037// Low Memory +--------------------------------------------+
7038// SP +---> | Back chain | ---+
7039// | +--------------------------------------------+ |
7040// | | Saved Condition Register | |
7041// | +--------------------------------------------+ |
7042// | | Saved Linkage Register | |
7043// | +--------------------------------------------+ | Linkage Area
7044// | | Reserved for compilers | |
7045// | +--------------------------------------------+ |
7046// | | Reserved for binders | |
7047// | +--------------------------------------------+ |
7048// | | Saved TOC pointer | ---+
7049// | +--------------------------------------------+
7050// | | Parameter save area |
7051// | +--------------------------------------------+
7052// | | Alloca space |
7053// | +--------------------------------------------+
7054// | | Local variable space |
7055// | +--------------------------------------------+
7056// | | Float/int conversion temporary |
7057// | +--------------------------------------------+
7058// | | Save area for AltiVec registers |
7059// | +--------------------------------------------+
7060// | | AltiVec alignment padding |
7061// | +--------------------------------------------+
7062// | | Save area for VRSAVE register |
7063// | +--------------------------------------------+
7064// | | Save area for General Purpose registers |
7065// | +--------------------------------------------+
7066// | | Save area for Floating Point registers |
7067// | +--------------------------------------------+
7068// +---- | Back chain |
7069// High Memory +--------------------------------------------+
7070//
7071// Specifications:
7072// AIX 7.2 Assembler Language Reference
7073// Subroutine linkage convention
7074
7075SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7076 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7077 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7078 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7079
7080 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7081 CallConv == CallingConv::Fast) &&
7082 "Unexpected calling convention!");
7083
7084 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7085 report_fatal_error("Tail call support is unimplemented on AIX.");
7086
7087 if (useSoftFloat())
7088 report_fatal_error("Soft float support is unimplemented on AIX.");
7089
7090 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7091
7092 const bool IsPPC64 = Subtarget.isPPC64();
7093 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7094
7095 // Assign locations to all of the incoming arguments.
7097 MachineFunction &MF = DAG.getMachineFunction();
7098 MachineFrameInfo &MFI = MF.getFrameInfo();
7099 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7100 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7101
7102 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7103 // Reserve space for the linkage area on the stack.
7104 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7105 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7106 uint64_t SaveStackPos = CCInfo.getStackSize();
7107 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7108 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7109
7111
7112 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7113 CCValAssign &VA = ArgLocs[I++];
7114 MVT LocVT = VA.getLocVT();
7115 MVT ValVT = VA.getValVT();
7116 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7117
7118 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7119 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7120 // For compatibility with the AIX XL compiler, the float args in the
7121 // parameter save area are initialized even if the argument is available
7122 // in register. The caller is required to initialize both the register
7123 // and memory, however, the callee can choose to expect it in either.
7124 // The memloc is dismissed here because the argument is retrieved from
7125 // the register.
7126 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7127 continue;
7128
7129 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7130 const TargetRegisterClass *RegClass = getRegClassForSVT(
7131 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7132 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7133 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7134 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7135 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7136 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7137 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7138 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7139 MachinePointerInfo(), Align(PtrByteSize));
7140 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7141 MemOps.push_back(StoreReg);
7142 }
7143
7144 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7145 unsigned StoreSize =
7146 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7147 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7148 }
7149
7150 auto HandleMemLoc = [&]() {
7151 const unsigned LocSize = LocVT.getStoreSize();
7152 const unsigned ValSize = ValVT.getStoreSize();
7153 assert((ValSize <= LocSize) &&
7154 "Object size is larger than size of MemLoc");
7155 int CurArgOffset = VA.getLocMemOffset();
7156 // Objects are right-justified because AIX is big-endian.
7157 if (LocSize > ValSize)
7158 CurArgOffset += LocSize - ValSize;
7159 // Potential tail calls could cause overwriting of argument stack slots.
7160 const bool IsImmutable =
7162 (CallConv == CallingConv::Fast));
7163 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7164 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7165 SDValue ArgValue =
7166 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7167
7168 // While the ABI specifies the argument type is (sign or zero) extended
7169 // out to register width, not all code is compliant. We truncate and
7170 // re-extend to be more forgiving of these callers when the argument type
7171 // is smaller than register width.
7172 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7173 ValVT.isInteger() &&
7174 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7175 // It is possible to have either real integer values
7176 // or integers that were not originally integers.
7177 // In the latter case, these could have came from structs,
7178 // and these integers would not have an extend on the parameter.
7179 // Since these types of integers do not have an extend specified
7180 // in the first place, the type of extend that we do should not matter.
7181 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7182 ? MVT::i8
7183 : ArgVT;
7184 SDValue ArgValueTrunc =
7185 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7186 SDValue ArgValueExt =
7187 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7188 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7189 InVals.push_back(ArgValueExt);
7190 } else {
7191 InVals.push_back(ArgValue);
7192 }
7193 };
7194
7195 // Vector arguments to VaArg functions are passed both on the stack, and
7196 // in any available GPRs. Load the value from the stack and add the GPRs
7197 // as live ins.
7198 if (VA.isMemLoc() && VA.needsCustom()) {
7199 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7200 assert(isVarArg && "Only use custom memloc for vararg.");
7201 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7202 // matching custom RegLocs.
7203 const unsigned OriginalValNo = VA.getValNo();
7204 (void)OriginalValNo;
7205
7206 auto HandleCustomVecRegLoc = [&]() {
7207 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7208 "Missing custom RegLoc.");
7209 VA = ArgLocs[I++];
7210 assert(VA.getValVT().isVector() &&
7211 "Unexpected Val type for custom RegLoc.");
7212 assert(VA.getValNo() == OriginalValNo &&
7213 "ValNo mismatch between custom MemLoc and RegLoc.");
7215 MF.addLiveIn(VA.getLocReg(),
7216 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7217 Subtarget.hasVSX()));
7218 };
7219
7220 HandleMemLoc();
7221 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7222 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7223 // R10.
7224 HandleCustomVecRegLoc();
7225 HandleCustomVecRegLoc();
7226
7227 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7228 // we passed the vector in R5, R6, R7 and R8.
7229 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7230 assert(!IsPPC64 &&
7231 "Only 2 custom RegLocs expected for 64-bit codegen.");
7232 HandleCustomVecRegLoc();
7233 HandleCustomVecRegLoc();
7234 }
7235
7236 continue;
7237 }
7238
7239 if (VA.isRegLoc()) {
7240 if (VA.getValVT().isScalarInteger())
7242 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7243 switch (VA.getValVT().SimpleTy) {
7244 default:
7245 report_fatal_error("Unhandled value type for argument.");
7246 case MVT::f32:
7248 break;
7249 case MVT::f64:
7251 break;
7252 }
7253 } else if (VA.getValVT().isVector()) {
7254 switch (VA.getValVT().SimpleTy) {
7255 default:
7256 report_fatal_error("Unhandled value type for argument.");
7257 case MVT::v16i8:
7259 break;
7260 case MVT::v8i16:
7262 break;
7263 case MVT::v4i32:
7264 case MVT::v2i64:
7265 case MVT::v1i128:
7267 break;
7268 case MVT::v4f32:
7269 case MVT::v2f64:
7271 break;
7272 }
7273 }
7274 }
7275
7276 if (Flags.isByVal() && VA.isMemLoc()) {
7277 const unsigned Size =
7278 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7279 PtrByteSize);
7280 const int FI = MF.getFrameInfo().CreateFixedObject(
7281 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7282 /* IsAliased */ true);
7283 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7284 InVals.push_back(FIN);
7285
7286 continue;
7287 }
7288
7289 if (Flags.isByVal()) {
7290 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7291
7292 const MCPhysReg ArgReg = VA.getLocReg();
7293 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7294
7295 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7296 const int FI = MF.getFrameInfo().CreateFixedObject(
7297 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7298 /* IsAliased */ true);
7299 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7300 InVals.push_back(FIN);
7301
7302 // Add live ins for all the RegLocs for the same ByVal.
7303 const TargetRegisterClass *RegClass =
7304 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7305
7306 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7307 unsigned Offset) {
7308 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7309 // Since the callers side has left justified the aggregate in the
7310 // register, we can simply store the entire register into the stack
7311 // slot.
7312 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7313 // The store to the fixedstack object is needed becuase accessing a
7314 // field of the ByVal will use a gep and load. Ideally we will optimize
7315 // to extracting the value from the register directly, and elide the
7316 // stores when the arguments address is not taken, but that will need to
7317 // be future work.
7318 SDValue Store = DAG.getStore(
7319 CopyFrom.getValue(1), dl, CopyFrom,
7322
7323 MemOps.push_back(Store);
7324 };
7325
7326 unsigned Offset = 0;
7327 HandleRegLoc(VA.getLocReg(), Offset);
7328 Offset += PtrByteSize;
7329 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7330 Offset += PtrByteSize) {
7331 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7332 "RegLocs should be for ByVal argument.");
7333
7334 const CCValAssign RL = ArgLocs[I++];
7335 HandleRegLoc(RL.getLocReg(), Offset);
7337 }
7338
7339 if (Offset != StackSize) {
7340 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7341 "Expected MemLoc for remaining bytes.");
7342 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7343 // Consume the MemLoc.The InVal has already been emitted, so nothing
7344 // more needs to be done.
7345 ++I;
7346 }
7347
7348 continue;
7349 }
7350
7351 if (VA.isRegLoc() && !VA.needsCustom()) {
7352 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7353 Register VReg =
7354 MF.addLiveIn(VA.getLocReg(),
7355 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7356 Subtarget.hasVSX()));
7357 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7358 if (ValVT.isScalarInteger() &&
7359 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7360 ArgValue =
7361 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7362 }
7363 InVals.push_back(ArgValue);
7364 continue;
7365 }
7366 if (VA.isMemLoc()) {
7367 HandleMemLoc();
7368 continue;
7369 }
7370 }
7371
7372 // On AIX a minimum of 8 words is saved to the parameter save area.
7373 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7374 // Area that is at least reserved in the caller of this function.
7375 unsigned CallerReservedArea = std::max<unsigned>(
7376 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7377
7378 // Set the size that is at least reserved in caller of this function. Tail
7379 // call optimized function's reserved stack space needs to be aligned so
7380 // that taking the difference between two stack areas will result in an
7381 // aligned stack.
7382 CallerReservedArea =
7383 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7384 FuncInfo->setMinReservedArea(CallerReservedArea);
7385
7386 if (isVarArg) {
7387 FuncInfo->setVarArgsFrameIndex(
7388 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7389 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7390
7391 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7392 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7393
7394 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7395 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7396 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7397
7398 // The fixed integer arguments of a variadic function are stored to the
7399 // VarArgsFrameIndex on the stack so that they may be loaded by
7400 // dereferencing the result of va_next.
7401 for (unsigned GPRIndex =
7402 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7403 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7404
7405 const Register VReg =
7406 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7407 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7408
7409 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7410 SDValue Store =
7411 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7412 MemOps.push_back(Store);
7413 // Increment the address for the next argument to store.
7414 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7415 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7416 }
7417 }
7418
7419 if (!MemOps.empty())
7420 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7421
7422 return Chain;
7423}
7424
7425SDValue PPCTargetLowering::LowerCall_AIX(
7426 SDValue Chain, SDValue Callee, CallFlags CFlags,
7428 const SmallVectorImpl<SDValue> &OutVals,
7429 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7431 const CallBase *CB) const {
7432 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7433 // AIX ABI stack frame layout.
7434
7435 assert((CFlags.CallConv == CallingConv::C ||
7436 CFlags.CallConv == CallingConv::Cold ||
7437 CFlags.CallConv == CallingConv::Fast) &&
7438 "Unexpected calling convention!");
7439
7440 if (CFlags.IsPatchPoint)
7441 report_fatal_error("This call type is unimplemented on AIX.");
7442
7443 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7444
7445 MachineFunction &MF = DAG.getMachineFunction();
7447 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7448 *DAG.getContext());
7449
7450 // Reserve space for the linkage save area (LSA) on the stack.
7451 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7452 // [SP][CR][LR][2 x reserved][TOC].
7453 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7454 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7455 const bool IsPPC64 = Subtarget.isPPC64();
7456 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7457 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7458 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7459 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7460
7461 // The prolog code of the callee may store up to 8 GPR argument registers to
7462 // the stack, allowing va_start to index over them in memory if the callee
7463 // is variadic.
7464 // Because we cannot tell if this is needed on the caller side, we have to
7465 // conservatively assume that it is needed. As such, make sure we have at
7466 // least enough stack space for the caller to store the 8 GPRs.
7467 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7468 const unsigned NumBytes = std::max<unsigned>(
7469 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7470
7471 // Adjust the stack pointer for the new arguments...
7472 // These operations are automatically eliminated by the prolog/epilog pass.
7473 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7474 SDValue CallSeqStart = Chain;
7475
7477 SmallVector<SDValue, 8> MemOpChains;
7478
7479 // Set up a copy of the stack pointer for loading and storing any
7480 // arguments that may not fit in the registers available for argument
7481 // passing.
7482 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7483 : DAG.getRegister(PPC::R1, MVT::i32);
7484
7485 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7486 const unsigned ValNo = ArgLocs[I].getValNo();
7487 SDValue Arg = OutVals[ValNo];
7488 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7489
7490 if (Flags.isByVal()) {
7491 const unsigned ByValSize = Flags.getByValSize();
7492
7493 // Nothing to do for zero-sized ByVals on the caller side.
7494 if (!ByValSize) {
7495 ++I;
7496 continue;
7497 }
7498
7499 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7500 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7501 (LoadOffset != 0)
7502 ? DAG.getObjectPtrOffset(
7503 dl, Arg, TypeSize::getFixed(LoadOffset))
7504 : Arg,
7505 MachinePointerInfo(), VT);
7506 };
7507
7508 unsigned LoadOffset = 0;
7509
7510 // Initialize registers, which are fully occupied by the by-val argument.
7511 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7512 SDValue Load = GetLoad(PtrVT, LoadOffset);
7513 MemOpChains.push_back(Load.getValue(1));
7514 LoadOffset += PtrByteSize;
7515 const CCValAssign &ByValVA = ArgLocs[I++];
7516 assert(ByValVA.getValNo() == ValNo &&
7517 "Unexpected location for pass-by-value argument.");
7518 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7519 }
7520
7521 if (LoadOffset == ByValSize)
7522 continue;
7523
7524 // There must be one more loc to handle the remainder.
7525 assert(ArgLocs[I].getValNo() == ValNo &&
7526 "Expected additional location for by-value argument.");
7527
7528 if (ArgLocs[I].isMemLoc()) {
7529 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7530 const CCValAssign &ByValVA = ArgLocs[I++];
7531 ISD::ArgFlagsTy MemcpyFlags = Flags;
7532 // Only memcpy the bytes that don't pass in register.
7533 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7534 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7535 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7536 dl, Arg, TypeSize::getFixed(LoadOffset))
7537 : Arg,
7539 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7540 CallSeqStart, MemcpyFlags, DAG, dl);
7541 continue;
7542 }
7543
7544 // Initialize the final register residue.
7545 // Any residue that occupies the final by-val arg register must be
7546 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7547 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7548 // 2 and 1 byte loads.
7549 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7550 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7551 "Unexpected register residue for by-value argument.");
7552 SDValue ResidueVal;
7553 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7554 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7555 const MVT VT =
7556 N == 1 ? MVT::i8
7557 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7558 SDValue Load = GetLoad(VT, LoadOffset);
7559 MemOpChains.push_back(Load.getValue(1));
7560 LoadOffset += N;
7561 Bytes += N;
7562
7563 // By-val arguments are passed left-justfied in register.
7564 // Every load here needs to be shifted, otherwise a full register load
7565 // should have been used.
7566 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7567 "Unexpected load emitted during handling of pass-by-value "
7568 "argument.");
7569 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7570 EVT ShiftAmountTy =
7571 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7572 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7573 SDValue ShiftedLoad =
7574 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7575 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7576 ShiftedLoad)
7577 : ShiftedLoad;
7578 }
7579
7580 const CCValAssign &ByValVA = ArgLocs[I++];
7581 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7582 continue;
7583 }
7584
7585 CCValAssign &VA = ArgLocs[I++];
7586 const MVT LocVT = VA.getLocVT();
7587 const MVT ValVT = VA.getValVT();
7588
7589 switch (VA.getLocInfo()) {
7590 default:
7591 report_fatal_error("Unexpected argument extension type.");
7592 case CCValAssign::Full:
7593 break;
7594 case CCValAssign::ZExt:
7595 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7596 break;
7597 case CCValAssign::SExt:
7598 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7599 break;
7600 }
7601
7602 if (VA.isRegLoc() && !VA.needsCustom()) {
7603 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7604 continue;
7605 }
7606
7607 // Vector arguments passed to VarArg functions need custom handling when
7608 // they are passed (at least partially) in GPRs.
7609 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7610 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7611 // Store value to its stack slot.
7612 SDValue PtrOff =
7613 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7614 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7615 SDValue Store =
7616 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7617 MemOpChains.push_back(Store);
7618 const unsigned OriginalValNo = VA.getValNo();
7619 // Then load the GPRs from the stack
7620 unsigned LoadOffset = 0;
7621 auto HandleCustomVecRegLoc = [&]() {
7622 assert(I != E && "Unexpected end of CCvalAssigns.");
7623 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7624 "Expected custom RegLoc.");
7625 CCValAssign RegVA = ArgLocs[I++];
7626 assert(RegVA.getValNo() == OriginalValNo &&
7627 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7628 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7629 DAG.getConstant(LoadOffset, dl, PtrVT));
7630 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7631 MemOpChains.push_back(Load.getValue(1));
7632 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7633 LoadOffset += PtrByteSize;
7634 };
7635
7636 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7637 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7638 // R10.
7639 HandleCustomVecRegLoc();
7640 HandleCustomVecRegLoc();
7641
7642 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7643 ArgLocs[I].getValNo() == OriginalValNo) {
7644 assert(!IsPPC64 &&
7645 "Only 2 custom RegLocs expected for 64-bit codegen.");
7646 HandleCustomVecRegLoc();
7647 HandleCustomVecRegLoc();
7648 }
7649
7650 continue;
7651 }
7652
7653 if (VA.isMemLoc()) {
7654 SDValue PtrOff =
7655 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7656 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7657 MemOpChains.push_back(
7658 DAG.getStore(Chain, dl, Arg, PtrOff,
7660 Subtarget.getFrameLowering()->getStackAlign()));
7661
7662 continue;
7663 }
7664
7665 if (!ValVT.isFloatingPoint())
7667 "Unexpected register handling for calling convention.");
7668
7669 // Custom handling is used for GPR initializations for vararg float
7670 // arguments.
7671 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7672 LocVT.isInteger() &&
7673 "Custom register handling only expected for VarArg.");
7674
7675 SDValue ArgAsInt =
7676 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7677
7678 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7679 // f32 in 32-bit GPR
7680 // f64 in 64-bit GPR
7681 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7682 else if (Arg.getValueType().getFixedSizeInBits() <
7683 LocVT.getFixedSizeInBits())
7684 // f32 in 64-bit GPR.
7685 RegsToPass.push_back(std::make_pair(
7686 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7687 else {
7688 // f64 in two 32-bit GPRs
7689 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7690 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7691 "Unexpected custom register for argument!");
7692 CCValAssign &GPR1 = VA;
7693 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7694 DAG.getConstant(32, dl, MVT::i8));
7695 RegsToPass.push_back(std::make_pair(
7696 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7697
7698 if (I != E) {
7699 // If only 1 GPR was available, there will only be one custom GPR and
7700 // the argument will also pass in memory.
7701 CCValAssign &PeekArg = ArgLocs[I];
7702 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7703 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7704 CCValAssign &GPR2 = ArgLocs[I++];
7705 RegsToPass.push_back(std::make_pair(
7706 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7707 }
7708 }
7709 }
7710 }
7711
7712 if (!MemOpChains.empty())
7713 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7714
7715 // For indirect calls, we need to save the TOC base to the stack for
7716 // restoration after the call.
7717 if (CFlags.IsIndirect) {
7718 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7719 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7720 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7721 const MVT PtrVT = Subtarget.getScalarIntVT();
7722 const unsigned TOCSaveOffset =
7723 Subtarget.getFrameLowering()->getTOCSaveOffset();
7724
7725 setUsesTOCBasePtr(DAG);
7726 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7727 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7728 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7729 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7730 Chain = DAG.getStore(
7731 Val.getValue(1), dl, Val, AddPtr,
7732 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7733 }
7734
7735 // Build a sequence of copy-to-reg nodes chained together with token chain
7736 // and flag operands which copy the outgoing args into the appropriate regs.
7737 SDValue InGlue;
7738 for (auto Reg : RegsToPass) {
7739 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7740 InGlue = Chain.getValue(1);
7741 }
7742
7743 const int SPDiff = 0;
7744 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7745 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7746}
7747
7748bool
7749PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7750 MachineFunction &MF, bool isVarArg,
7753 const Type *RetTy) const {
7755 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7756 return CCInfo.CheckReturn(
7757 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7759 : RetCC_PPC);
7760}
7761
7762SDValue
7763PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7764 bool isVarArg,
7766 const SmallVectorImpl<SDValue> &OutVals,
7767 const SDLoc &dl, SelectionDAG &DAG) const {
7769 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7770 *DAG.getContext());
7771 CCInfo.AnalyzeReturn(Outs,
7772 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7774 : RetCC_PPC);
7775
7776 SDValue Glue;
7777 SmallVector<SDValue, 4> RetOps(1, Chain);
7778
7779 // Copy the result values into the output registers.
7780 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7781 CCValAssign &VA = RVLocs[i];
7782 assert(VA.isRegLoc() && "Can only return in registers!");
7783
7784 SDValue Arg = OutVals[RealResIdx];
7785
7786 switch (VA.getLocInfo()) {
7787 default: llvm_unreachable("Unknown loc info!");
7788 case CCValAssign::Full: break;
7789 case CCValAssign::AExt:
7790 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7791 break;
7792 case CCValAssign::ZExt:
7793 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7794 break;
7795 case CCValAssign::SExt:
7796 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7797 break;
7798 }
7799 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7800 bool isLittleEndian = Subtarget.isLittleEndian();
7801 // Legalize ret f64 -> ret 2 x i32.
7802 SDValue SVal =
7803 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7804 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7805 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7806 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7807 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7808 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7809 Glue = Chain.getValue(1);
7810 VA = RVLocs[++i]; // skip ahead to next loc
7811 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7812 } else
7813 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7814 Glue = Chain.getValue(1);
7815 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7816 }
7817
7818 RetOps[0] = Chain; // Update chain.
7819
7820 // Add the glue if we have it.
7821 if (Glue.getNode())
7822 RetOps.push_back(Glue);
7823
7824 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7825}
7826
7827SDValue
7828PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7829 SelectionDAG &DAG) const {
7830 SDLoc dl(Op);
7831
7832 // Get the correct type for integers.
7833 EVT IntVT = Op.getValueType();
7834
7835 // Get the inputs.
7836 SDValue Chain = Op.getOperand(0);
7837 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7838 // Build a DYNAREAOFFSET node.
7839 SDValue Ops[2] = {Chain, FPSIdx};
7840 SDVTList VTs = DAG.getVTList(IntVT);
7841 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7842}
7843
7844SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7845 SelectionDAG &DAG) const {
7846 // When we pop the dynamic allocation we need to restore the SP link.
7847 SDLoc dl(Op);
7848
7849 // Get the correct type for pointers.
7850 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7851
7852 // Construct the stack pointer operand.
7853 bool isPPC64 = Subtarget.isPPC64();
7854 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7855 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7856
7857 // Get the operands for the STACKRESTORE.
7858 SDValue Chain = Op.getOperand(0);
7859 SDValue SaveSP = Op.getOperand(1);
7860
7861 // Load the old link SP.
7862 SDValue LoadLinkSP =
7863 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7864
7865 // Restore the stack pointer.
7866 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7867
7868 // Store the old link SP.
7869 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7870}
7871
7872SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7873 MachineFunction &MF = DAG.getMachineFunction();
7874 bool isPPC64 = Subtarget.isPPC64();
7875 EVT PtrVT = getPointerTy(MF.getDataLayout());
7876
7877 // Get current frame pointer save index. The users of this index will be
7878 // primarily DYNALLOC instructions.
7879 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7880 int RASI = FI->getReturnAddrSaveIndex();
7881
7882 // If the frame pointer save index hasn't been defined yet.
7883 if (!RASI) {
7884 // Find out what the fix offset of the frame pointer save area.
7885 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7886 // Allocate the frame index for frame pointer save area.
7887 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7888 // Save the result.
7889 FI->setReturnAddrSaveIndex(RASI);
7890 }
7891 return DAG.getFrameIndex(RASI, PtrVT);
7892}
7893
7894SDValue
7895PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7896 MachineFunction &MF = DAG.getMachineFunction();
7897 bool isPPC64 = Subtarget.isPPC64();
7898 EVT PtrVT = getPointerTy(MF.getDataLayout());
7899
7900 // Get current frame pointer save index. The users of this index will be
7901 // primarily DYNALLOC instructions.
7902 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7903 int FPSI = FI->getFramePointerSaveIndex();
7904
7905 // If the frame pointer save index hasn't been defined yet.
7906 if (!FPSI) {
7907 // Find out what the fix offset of the frame pointer save area.
7908 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7909 // Allocate the frame index for frame pointer save area.
7910 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7911 // Save the result.
7912 FI->setFramePointerSaveIndex(FPSI);
7913 }
7914 return DAG.getFrameIndex(FPSI, PtrVT);
7915}
7916
7917SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7918 SelectionDAG &DAG) const {
7919 MachineFunction &MF = DAG.getMachineFunction();
7920 // Get the inputs.
7921 SDValue Chain = Op.getOperand(0);
7922 SDValue Size = Op.getOperand(1);
7923 SDLoc dl(Op);
7924
7925 // Get the correct type for pointers.
7926 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7927 // Negate the size.
7928 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7929 DAG.getConstant(0, dl, PtrVT), Size);
7930 // Construct a node for the frame pointer save index.
7931 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7932 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7933 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7934 if (hasInlineStackProbe(MF))
7935 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7936 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7937}
7938
7939SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7940 SelectionDAG &DAG) const {
7941 MachineFunction &MF = DAG.getMachineFunction();
7942
7943 bool isPPC64 = Subtarget.isPPC64();
7944 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7945
7946 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7947 return DAG.getFrameIndex(FI, PtrVT);
7948}
7949
7950SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7951 SelectionDAG &DAG) const {
7952 SDLoc DL(Op);
7953 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7954 DAG.getVTList(MVT::i32, MVT::Other),
7955 Op.getOperand(0), Op.getOperand(1));
7956}
7957
7958SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7959 SelectionDAG &DAG) const {
7960 SDLoc DL(Op);
7961 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7962 Op.getOperand(0), Op.getOperand(1));
7963}
7964
7965SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7966 if (Op.getValueType().isVector())
7967 return LowerVectorLoad(Op, DAG);
7968
7969 assert(Op.getValueType() == MVT::i1 &&
7970 "Custom lowering only for i1 loads");
7971
7972 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7973
7974 SDLoc dl(Op);
7975 LoadSDNode *LD = cast<LoadSDNode>(Op);
7976
7977 SDValue Chain = LD->getChain();
7978 SDValue BasePtr = LD->getBasePtr();
7979 MachineMemOperand *MMO = LD->getMemOperand();
7980
7981 SDValue NewLD =
7982 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7983 BasePtr, MVT::i8, MMO);
7984 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7985
7986 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7987 return DAG.getMergeValues(Ops, dl);
7988}
7989
7990SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7991 if (Op.getOperand(1).getValueType().isVector())
7992 return LowerVectorStore(Op, DAG);
7993
7994 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7995 "Custom lowering only for i1 stores");
7996
7997 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7998
7999 SDLoc dl(Op);
8000 StoreSDNode *ST = cast<StoreSDNode>(Op);
8001
8002 SDValue Chain = ST->getChain();
8003 SDValue BasePtr = ST->getBasePtr();
8004 SDValue Value = ST->getValue();
8005 MachineMemOperand *MMO = ST->getMemOperand();
8006
8008 Value);
8009 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8010}
8011
8012// FIXME: Remove this once the ANDI glue bug is fixed:
8013SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8014 assert(Op.getValueType() == MVT::i1 &&
8015 "Custom lowering only for i1 results");
8016
8017 SDLoc DL(Op);
8018 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8019}
8020
8021SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8022 SelectionDAG &DAG) const {
8023
8024 // Implements a vector truncate that fits in a vector register as a shuffle.
8025 // We want to legalize vector truncates down to where the source fits in
8026 // a vector register (and target is therefore smaller than vector register
8027 // size). At that point legalization will try to custom lower the sub-legal
8028 // result and get here - where we can contain the truncate as a single target
8029 // operation.
8030
8031 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8032 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8033 //
8034 // We will implement it for big-endian ordering as this (where x denotes
8035 // undefined):
8036 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8037 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8038 //
8039 // The same operation in little-endian ordering will be:
8040 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8041 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8042
8043 EVT TrgVT = Op.getValueType();
8044 assert(TrgVT.isVector() && "Vector type expected.");
8045 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8046 EVT EltVT = TrgVT.getVectorElementType();
8047 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8048 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8050 return SDValue();
8051
8052 SDValue N1 = Op.getOperand(0);
8053 EVT SrcVT = N1.getValueType();
8054 unsigned SrcSize = SrcVT.getSizeInBits();
8055 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8058 return SDValue();
8059 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8060 return SDValue();
8061
8062 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8063 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8064
8065 SDLoc DL(Op);
8066 SDValue Op1, Op2;
8067 if (SrcSize == 256) {
8068 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8069 EVT SplitVT =
8071 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8072 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8073 DAG.getConstant(0, DL, VecIdxTy));
8074 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8075 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8076 }
8077 else {
8078 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8079 Op2 = DAG.getUNDEF(WideVT);
8080 }
8081
8082 // First list the elements we want to keep.
8083 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8084 SmallVector<int, 16> ShuffV;
8085 if (Subtarget.isLittleEndian())
8086 for (unsigned i = 0; i < TrgNumElts; ++i)
8087 ShuffV.push_back(i * SizeMult);
8088 else
8089 for (unsigned i = 1; i <= TrgNumElts; ++i)
8090 ShuffV.push_back(i * SizeMult - 1);
8091
8092 // Populate the remaining elements with undefs.
8093 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8094 // ShuffV.push_back(i + WideNumElts);
8095 ShuffV.push_back(WideNumElts + 1);
8096
8097 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8098 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8099 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8100}
8101
8102/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8103/// possible.
8104SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8105 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8106 EVT ResVT = Op.getValueType();
8107 EVT CmpVT = Op.getOperand(0).getValueType();
8108 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8109 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8110 SDLoc dl(Op);
8111
8112 // Without power9-vector, we don't have native instruction for f128 comparison.
8113 // Following transformation to libcall is needed for setcc:
8114 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8115 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8116 SDValue Z = DAG.getSetCC(
8117 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8118 LHS, RHS, CC);
8119 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8120 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8121 }
8122
8123 // Not FP, or using SPE? Not a fsel.
8124 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8125 Subtarget.hasSPE())
8126 return Op;
8127
8128 SDNodeFlags Flags = Op.getNode()->getFlags();
8129
8130 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8131 // presence of infinities.
8132 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8133 switch (CC) {
8134 default:
8135 break;
8136 case ISD::SETOGT:
8137 case ISD::SETGT:
8138 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8139 case ISD::SETOLT:
8140 case ISD::SETLT:
8141 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8142 }
8143 }
8144
8145 // We might be able to do better than this under some circumstances, but in
8146 // general, fsel-based lowering of select is a finite-math-only optimization.
8147 // For more information, see section F.3 of the 2.06 ISA specification.
8148 // With ISA 3.0
8149 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8150 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8151 ResVT == MVT::f128)
8152 return Op;
8153
8154 // If the RHS of the comparison is a 0.0, we don't need to do the
8155 // subtraction at all.
8156 SDValue Sel1;
8158 switch (CC) {
8159 default: break; // SETUO etc aren't handled by fsel.
8160 case ISD::SETNE:
8161 std::swap(TV, FV);
8162 [[fallthrough]];
8163 case ISD::SETEQ:
8164 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8165 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8166 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8167 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8168 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8169 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8170 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8171 case ISD::SETULT:
8172 case ISD::SETLT:
8173 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8174 [[fallthrough]];
8175 case ISD::SETOGE:
8176 case ISD::SETGE:
8177 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8178 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8179 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8180 case ISD::SETUGT:
8181 case ISD::SETGT:
8182 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8183 [[fallthrough]];
8184 case ISD::SETOLE:
8185 case ISD::SETLE:
8186 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8187 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8188 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8189 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8190 }
8191
8192 SDValue Cmp;
8193 switch (CC) {
8194 default: break; // SETUO etc aren't handled by fsel.
8195 case ISD::SETNE:
8196 std::swap(TV, FV);
8197 [[fallthrough]];
8198 case ISD::SETEQ:
8199 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8200 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8201 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8202 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8203 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8204 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8205 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8206 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8207 case ISD::SETULT:
8208 case ISD::SETLT:
8209 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8210 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8211 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8212 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8213 case ISD::SETOGE:
8214 case ISD::SETGE:
8215 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8216 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8217 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8218 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8219 case ISD::SETUGT:
8220 case ISD::SETGT:
8221 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8222 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8223 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8224 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8225 case ISD::SETOLE:
8226 case ISD::SETLE:
8227 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8228 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8229 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8230 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8231 }
8232 return Op;
8233}
8234
8235static unsigned getPPCStrictOpcode(unsigned Opc) {
8236 switch (Opc) {
8237 default:
8238 llvm_unreachable("No strict version of this opcode!");
8239 case PPCISD::FCTIDZ:
8240 return PPCISD::STRICT_FCTIDZ;
8241 case PPCISD::FCTIWZ:
8242 return PPCISD::STRICT_FCTIWZ;
8243 case PPCISD::FCTIDUZ:
8244 return PPCISD::STRICT_FCTIDUZ;
8245 case PPCISD::FCTIWUZ:
8246 return PPCISD::STRICT_FCTIWUZ;
8247 case PPCISD::FCFID:
8248 return PPCISD::STRICT_FCFID;
8249 case PPCISD::FCFIDU:
8250 return PPCISD::STRICT_FCFIDU;
8251 case PPCISD::FCFIDS:
8252 return PPCISD::STRICT_FCFIDS;
8253 case PPCISD::FCFIDUS:
8254 return PPCISD::STRICT_FCFIDUS;
8255 }
8256}
8257
8259 const PPCSubtarget &Subtarget) {
8260 SDLoc dl(Op);
8261 bool IsStrict = Op->isStrictFPOpcode();
8262 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8263 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8264
8265 // TODO: Any other flags to propagate?
8266 SDNodeFlags Flags;
8267 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8268
8269 // For strict nodes, source is the second operand.
8270 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8271 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8272 MVT DestTy = Op.getSimpleValueType();
8273 assert(Src.getValueType().isFloatingPoint() &&
8274 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8275 DestTy == MVT::i64) &&
8276 "Invalid FP_TO_INT types");
8277 if (Src.getValueType() == MVT::f32) {
8278 if (IsStrict) {
8279 Src =
8281 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8282 Chain = Src.getValue(1);
8283 } else
8284 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8285 }
8286 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8287 DestTy = Subtarget.getScalarIntVT();
8288 unsigned Opc = ISD::DELETED_NODE;
8289 switch (DestTy.SimpleTy) {
8290 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8291 case MVT::i32:
8292 Opc = IsSigned ? PPCISD::FCTIWZ
8293 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8294 break;
8295 case MVT::i64:
8296 assert((IsSigned || Subtarget.hasFPCVT()) &&
8297 "i64 FP_TO_UINT is supported only with FPCVT");
8298 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8299 }
8300 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8301 SDValue Conv;
8302 if (IsStrict) {
8304 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8305 Flags);
8306 } else {
8307 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8308 }
8309 return Conv;
8310}
8311
8312void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8313 SelectionDAG &DAG,
8314 const SDLoc &dl) const {
8315 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8316 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8317 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8318 bool IsStrict = Op->isStrictFPOpcode();
8319
8320 // Convert the FP value to an int value through memory.
8321 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8322 (IsSigned || Subtarget.hasFPCVT());
8323 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8324 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8325 MachinePointerInfo MPI =
8327
8328 // Emit a store to the stack slot.
8329 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8330 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8331 if (i32Stack) {
8332 MachineFunction &MF = DAG.getMachineFunction();
8333 Alignment = Align(4);
8334 MachineMemOperand *MMO =
8335 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8336 SDValue Ops[] = { Chain, Tmp, FIPtr };
8337 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8338 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8339 } else
8340 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8341
8342 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8343 // add in a bias on big endian.
8344 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8345 !Subtarget.isLittleEndian()) {
8346 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8347 DAG.getConstant(4, dl, FIPtr.getValueType()));
8348 MPI = MPI.getWithOffset(4);
8349 }
8350
8351 RLI.Chain = Chain;
8352 RLI.Ptr = FIPtr;
8353 RLI.MPI = MPI;
8354 RLI.Alignment = Alignment;
8355}
8356
8357/// Custom lowers floating point to integer conversions to use
8358/// the direct move instructions available in ISA 2.07 to avoid the
8359/// need for load/store combinations.
8360SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8361 SelectionDAG &DAG,
8362 const SDLoc &dl) const {
8363 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8364 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8365 if (Op->isStrictFPOpcode())
8366 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8367 else
8368 return Mov;
8369}
8370
8371SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8372 const SDLoc &dl) const {
8373 bool IsStrict = Op->isStrictFPOpcode();
8374 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8375 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8376 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8377 EVT SrcVT = Src.getValueType();
8378 EVT DstVT = Op.getValueType();
8379
8380 // FP to INT conversions are legal for f128.
8381 if (SrcVT == MVT::f128)
8382 return Subtarget.hasP9Vector() ? Op : SDValue();
8383
8384 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8385 // PPC (the libcall is not available).
8386 if (SrcVT == MVT::ppcf128) {
8387 if (DstVT == MVT::i32) {
8388 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8389 // set other fast-math flags to FP operations in both strict and
8390 // non-strict cases. (FP_TO_SINT, FSUB)
8391 SDNodeFlags Flags;
8392 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8393
8394 if (IsSigned) {
8395 SDValue Lo, Hi;
8396 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8397
8398 // Add the two halves of the long double in round-to-zero mode, and use
8399 // a smaller FP_TO_SINT.
8400 if (IsStrict) {
8401 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8402 DAG.getVTList(MVT::f64, MVT::Other),
8403 {Op.getOperand(0), Lo, Hi}, Flags);
8404 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8405 DAG.getVTList(MVT::i32, MVT::Other),
8406 {Res.getValue(1), Res}, Flags);
8407 } else {
8408 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8409 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8410 }
8411 } else {
8412 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8413 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8414 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8415 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8416 if (IsStrict) {
8417 // Sel = Src < 0x80000000
8418 // FltOfs = select Sel, 0.0, 0x80000000
8419 // IntOfs = select Sel, 0, 0x80000000
8420 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8421 SDValue Chain = Op.getOperand(0);
8422 EVT SetCCVT =
8423 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8424 EVT DstSetCCVT =
8425 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8426 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8427 Chain, true);
8428 Chain = Sel.getValue(1);
8429
8430 SDValue FltOfs = DAG.getSelect(
8431 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8432 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8433
8434 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8435 DAG.getVTList(SrcVT, MVT::Other),
8436 {Chain, Src, FltOfs}, Flags);
8437 Chain = Val.getValue(1);
8438 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8439 DAG.getVTList(DstVT, MVT::Other),
8440 {Chain, Val}, Flags);
8441 Chain = SInt.getValue(1);
8442 SDValue IntOfs = DAG.getSelect(
8443 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8444 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8445 return DAG.getMergeValues({Result, Chain}, dl);
8446 } else {
8447 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8448 // FIXME: generated code sucks.
8449 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8450 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8451 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8452 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8453 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8454 }
8455 }
8456 }
8457
8458 return SDValue();
8459 }
8460
8461 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8462 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8463
8464 ReuseLoadInfo RLI;
8465 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8466
8467 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8468 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8469}
8470
8471// We're trying to insert a regular store, S, and then a load, L. If the
8472// incoming value, O, is a load, we might just be able to have our load use the
8473// address used by O. However, we don't know if anything else will store to
8474// that address before we can load from it. To prevent this situation, we need
8475// to insert our load, L, into the chain as a peer of O. To do this, we give L
8476// the same chain operand as O, we create a token factor from the chain results
8477// of O and L, and we replace all uses of O's chain result with that token
8478// factor (this last part is handled by makeEquivalentMemoryOrdering).
8479bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8480 ReuseLoadInfo &RLI,
8481 SelectionDAG &DAG,
8482 ISD::LoadExtType ET) const {
8483 // Conservatively skip reusing for constrained FP nodes.
8484 if (Op->isStrictFPOpcode())
8485 return false;
8486
8487 SDLoc dl(Op);
8488 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8489 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8490 if (ET == ISD::NON_EXTLOAD &&
8491 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8492 isOperationLegalOrCustom(Op.getOpcode(),
8493 Op.getOperand(0).getValueType())) {
8494
8495 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8496 return true;
8497 }
8498
8499 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8500 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8501 LD->isNonTemporal())
8502 return false;
8503 if (LD->getMemoryVT() != MemVT)
8504 return false;
8505
8506 // If the result of the load is an illegal type, then we can't build a
8507 // valid chain for reuse since the legalised loads and token factor node that
8508 // ties the legalised loads together uses a different output chain then the
8509 // illegal load.
8510 if (!isTypeLegal(LD->getValueType(0)))
8511 return false;
8512
8513 RLI.Ptr = LD->getBasePtr();
8514 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8515 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8516 "Non-pre-inc AM on PPC?");
8517 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8518 LD->getOffset());
8519 }
8520
8521 RLI.Chain = LD->getChain();
8522 RLI.MPI = LD->getPointerInfo();
8523 RLI.IsDereferenceable = LD->isDereferenceable();
8524 RLI.IsInvariant = LD->isInvariant();
8525 RLI.Alignment = LD->getAlign();
8526 RLI.AAInfo = LD->getAAInfo();
8527 RLI.Ranges = LD->getRanges();
8528
8529 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8530 return true;
8531}
8532
8533/// Analyze profitability of direct move
8534/// prefer float load to int load plus direct move
8535/// when there is no integer use of int load
8536bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8537 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8538 if (Origin->getOpcode() != ISD::LOAD)
8539 return true;
8540
8541 // If there is no LXSIBZX/LXSIHZX, like Power8,
8542 // prefer direct move if the memory size is 1 or 2 bytes.
8543 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8544 if (!Subtarget.hasP9Vector() &&
8545 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8546 return true;
8547
8548 for (SDUse &Use : Origin->uses()) {
8549
8550 // Only look at the users of the loaded value.
8551 if (Use.getResNo() != 0)
8552 continue;
8553
8554 SDNode *User = Use.getUser();
8555 if (User->getOpcode() != ISD::SINT_TO_FP &&
8556 User->getOpcode() != ISD::UINT_TO_FP &&
8557 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8558 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8559 return true;
8560 }
8561
8562 return false;
8563}
8564
8566 const PPCSubtarget &Subtarget,
8567 SDValue Chain = SDValue()) {
8568 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8569 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8570 SDLoc dl(Op);
8571
8572 // TODO: Any other flags to propagate?
8573 SDNodeFlags Flags;
8574 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8575
8576 // If we have FCFIDS, then use it when converting to single-precision.
8577 // Otherwise, convert to double-precision and then round.
8578 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8579 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8580 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8581 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8582 if (Op->isStrictFPOpcode()) {
8583 if (!Chain)
8584 Chain = Op.getOperand(0);
8585 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8586 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8587 } else
8588 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8589}
8590
8591/// Custom lowers integer to floating point conversions to use
8592/// the direct move instructions available in ISA 2.07 to avoid the
8593/// need for load/store combinations.
8594SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8595 SelectionDAG &DAG,
8596 const SDLoc &dl) const {
8597 assert((Op.getValueType() == MVT::f32 ||
8598 Op.getValueType() == MVT::f64) &&
8599 "Invalid floating point type as target of conversion");
8600 assert(Subtarget.hasFPCVT() &&
8601 "Int to FP conversions with direct moves require FPCVT");
8602 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8603 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8604 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8605 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8606 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8607 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8608 return convertIntToFP(Op, Mov, DAG, Subtarget);
8609}
8610
8611static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8612
8613 EVT VecVT = Vec.getValueType();
8614 assert(VecVT.isVector() && "Expected a vector type.");
8615 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8616
8617 EVT EltVT = VecVT.getVectorElementType();
8618 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8619 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8620
8621 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8622 SmallVector<SDValue, 16> Ops(NumConcat);
8623 Ops[0] = Vec;
8624 SDValue UndefVec = DAG.getUNDEF(VecVT);
8625 for (unsigned i = 1; i < NumConcat; ++i)
8626 Ops[i] = UndefVec;
8627
8628 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8629}
8630
8631SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8632 const SDLoc &dl) const {
8633 bool IsStrict = Op->isStrictFPOpcode();
8634 unsigned Opc = Op.getOpcode();
8635 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8638 "Unexpected conversion type");
8639 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8640 "Supports conversions to v2f64/v4f32 only.");
8641
8642 // TODO: Any other flags to propagate?
8643 SDNodeFlags Flags;
8644 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8645
8646 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8647 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8648
8649 SDValue Wide = widenVec(DAG, Src, dl);
8650 EVT WideVT = Wide.getValueType();
8651 unsigned WideNumElts = WideVT.getVectorNumElements();
8652 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8653
8654 SmallVector<int, 16> ShuffV;
8655 for (unsigned i = 0; i < WideNumElts; ++i)
8656 ShuffV.push_back(i + WideNumElts);
8657
8658 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8659 int SaveElts = FourEltRes ? 4 : 2;
8660 if (Subtarget.isLittleEndian())
8661 for (int i = 0; i < SaveElts; i++)
8662 ShuffV[i * Stride] = i;
8663 else
8664 for (int i = 1; i <= SaveElts; i++)
8665 ShuffV[i * Stride - 1] = i - 1;
8666
8667 SDValue ShuffleSrc2 =
8668 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8669 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8670
8671 SDValue Extend;
8672 if (SignedConv) {
8673 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8674 EVT ExtVT = Src.getValueType();
8675 if (Subtarget.hasP9Altivec())
8676 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8677 IntermediateVT.getVectorNumElements());
8678
8679 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8680 DAG.getValueType(ExtVT));
8681 } else
8682 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8683
8684 if (IsStrict)
8685 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8686 {Op.getOperand(0), Extend}, Flags);
8687
8688 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8689}
8690
8691SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8692 SelectionDAG &DAG) const {
8693 SDLoc dl(Op);
8694 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8695 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8696 bool IsStrict = Op->isStrictFPOpcode();
8697 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8698 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8699
8700 // TODO: Any other flags to propagate?
8701 SDNodeFlags Flags;
8702 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8703
8704 EVT InVT = Src.getValueType();
8705 EVT OutVT = Op.getValueType();
8706 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8707 isOperationCustom(Op.getOpcode(), InVT))
8708 return LowerINT_TO_FPVector(Op, DAG, dl);
8709
8710 // Conversions to f128 are legal.
8711 if (Op.getValueType() == MVT::f128)
8712 return Subtarget.hasP9Vector() ? Op : SDValue();
8713
8714 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8715 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8716 return SDValue();
8717
8718 if (Src.getValueType() == MVT::i1) {
8719 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8720 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8721 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8722 if (IsStrict)
8723 return DAG.getMergeValues({Sel, Chain}, dl);
8724 else
8725 return Sel;
8726 }
8727
8728 // If we have direct moves, we can do all the conversion, skip the store/load
8729 // however, without FPCVT we can't do most conversions.
8730 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8731 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8732 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8733
8734 assert((IsSigned || Subtarget.hasFPCVT()) &&
8735 "UINT_TO_FP is supported only with FPCVT");
8736
8737 if (Src.getValueType() == MVT::i64) {
8738 SDValue SINT = Src;
8739 // When converting to single-precision, we actually need to convert
8740 // to double-precision first and then round to single-precision.
8741 // To avoid double-rounding effects during that operation, we have
8742 // to prepare the input operand. Bits that might be truncated when
8743 // converting to double-precision are replaced by a bit that won't
8744 // be lost at this stage, but is below the single-precision rounding
8745 // position.
8746 //
8747 // However, if afn is in effect, accept double
8748 // rounding to avoid the extra overhead.
8749 // FIXME: Currently INT_TO_FP can't support fast math flags because
8750 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8751 // false.
8752 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8753 !Op->getFlags().hasApproximateFuncs()) {
8754
8755 // Twiddle input to make sure the low 11 bits are zero. (If this
8756 // is the case, we are guaranteed the value will fit into the 53 bit
8757 // mantissa of an IEEE double-precision value without rounding.)
8758 // If any of those low 11 bits were not zero originally, make sure
8759 // bit 12 (value 2048) is set instead, so that the final rounding
8760 // to single-precision gets the correct result.
8761 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8762 SINT, DAG.getConstant(2047, dl, MVT::i64));
8763 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8764 Round, DAG.getConstant(2047, dl, MVT::i64));
8765 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8766 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8767 DAG.getSignedConstant(-2048, dl, MVT::i64));
8768
8769 // However, we cannot use that value unconditionally: if the magnitude
8770 // of the input value is small, the bit-twiddling we did above might
8771 // end up visibly changing the output. Fortunately, in that case, we
8772 // don't need to twiddle bits since the original input will convert
8773 // exactly to double-precision floating-point already. Therefore,
8774 // construct a conditional to use the original value if the top 11
8775 // bits are all sign-bit copies, and use the rounded value computed
8776 // above otherwise.
8777 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8778 SINT, DAG.getConstant(53, dl, MVT::i32));
8779 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8780 Cond, DAG.getConstant(1, dl, MVT::i64));
8781 Cond = DAG.getSetCC(
8782 dl,
8783 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8784 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8785
8786 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8787 }
8788
8789 ReuseLoadInfo RLI;
8790 SDValue Bits;
8791
8792 MachineFunction &MF = DAG.getMachineFunction();
8793 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8794 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8795 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8796 if (RLI.ResChain)
8797 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8798 } else if (Subtarget.hasLFIWAX() &&
8799 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8800 MachineMemOperand *MMO =
8802 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8803 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8804 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8805 DAG.getVTList(MVT::f64, MVT::Other),
8806 Ops, MVT::i32, MMO);
8807 if (RLI.ResChain)
8808 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8809 } else if (Subtarget.hasFPCVT() &&
8810 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8811 MachineMemOperand *MMO =
8813 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8814 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8815 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8816 DAG.getVTList(MVT::f64, MVT::Other),
8817 Ops, MVT::i32, MMO);
8818 if (RLI.ResChain)
8819 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8820 } else if (((Subtarget.hasLFIWAX() &&
8821 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8822 (Subtarget.hasFPCVT() &&
8823 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8824 SINT.getOperand(0).getValueType() == MVT::i32) {
8825 MachineFrameInfo &MFI = MF.getFrameInfo();
8826 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8827
8828 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8829 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8830
8831 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8833 DAG.getMachineFunction(), FrameIdx));
8834 Chain = Store;
8835
8836 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8837 "Expected an i32 store");
8838
8839 RLI.Ptr = FIdx;
8840 RLI.Chain = Chain;
8841 RLI.MPI =
8843 RLI.Alignment = Align(4);
8844
8845 MachineMemOperand *MMO =
8847 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8848 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8850 PPCISD::LFIWZX : PPCISD::LFIWAX,
8851 dl, DAG.getVTList(MVT::f64, MVT::Other),
8852 Ops, MVT::i32, MMO);
8853 Chain = Bits.getValue(1);
8854 } else
8855 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8856
8857 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8858 if (IsStrict)
8859 Chain = FP.getValue(1);
8860
8861 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8862 if (IsStrict)
8863 FP = DAG.getNode(
8864 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8865 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8866 Flags);
8867 else
8868 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8869 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8870 }
8871 return FP;
8872 }
8873
8874 assert(Src.getValueType() == MVT::i32 &&
8875 "Unhandled INT_TO_FP type in custom expander!");
8876 // Since we only generate this in 64-bit mode, we can take advantage of
8877 // 64-bit registers. In particular, sign extend the input value into the
8878 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8879 // then lfd it and fcfid it.
8880 MachineFunction &MF = DAG.getMachineFunction();
8881 MachineFrameInfo &MFI = MF.getFrameInfo();
8882 EVT PtrVT = getPointerTy(MF.getDataLayout());
8883
8884 SDValue Ld;
8885 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8886 ReuseLoadInfo RLI;
8887 bool ReusingLoad;
8888 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8889 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8890 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8891
8892 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8894 DAG.getMachineFunction(), FrameIdx));
8895 Chain = Store;
8896
8897 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8898 "Expected an i32 store");
8899
8900 RLI.Ptr = FIdx;
8901 RLI.Chain = Chain;
8902 RLI.MPI =
8904 RLI.Alignment = Align(4);
8905 }
8906
8907 MachineMemOperand *MMO =
8909 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8910 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8911 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8912 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8913 MVT::i32, MMO);
8914 Chain = Ld.getValue(1);
8915 if (ReusingLoad && RLI.ResChain) {
8916 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
8917 }
8918 } else {
8919 assert(Subtarget.isPPC64() &&
8920 "i32->FP without LFIWAX supported only on PPC64");
8921
8922 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8923 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8924
8925 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8926
8927 // STD the extended value into the stack slot.
8928 SDValue Store = DAG.getStore(
8929 Chain, dl, Ext64, FIdx,
8931 Chain = Store;
8932
8933 // Load the value as a double.
8934 Ld = DAG.getLoad(
8935 MVT::f64, dl, Chain, FIdx,
8937 Chain = Ld.getValue(1);
8938 }
8939
8940 // FCFID it and return it.
8941 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8942 if (IsStrict)
8943 Chain = FP.getValue(1);
8944 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8945 if (IsStrict)
8946 FP = DAG.getNode(
8947 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8948 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
8949 else
8950 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8951 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8952 }
8953 return FP;
8954}
8955
8956SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8957 SelectionDAG &DAG) const {
8958 SDLoc Dl(Op);
8959 MachineFunction &MF = DAG.getMachineFunction();
8960 EVT PtrVT = getPointerTy(MF.getDataLayout());
8961 SDValue Chain = Op.getOperand(0);
8962
8963 // If requested mode is constant, just use simpler mtfsb/mffscrni
8964 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8965 uint64_t Mode = CVal->getZExtValue();
8966 assert(Mode < 4 && "Unsupported rounding mode!");
8967 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
8968 if (Subtarget.isISA3_0())
8969 return SDValue(
8970 DAG.getMachineNode(
8971 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
8972 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
8973 1);
8974 SDNode *SetHi = DAG.getMachineNode(
8975 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
8976 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
8977 SDNode *SetLo = DAG.getMachineNode(
8978 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
8979 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
8980 return SDValue(SetLo, 0);
8981 }
8982
8983 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
8984 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
8985 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
8986 DAG.getConstant(3, Dl, MVT::i32));
8987 SDValue DstFlag = DAG.getNode(
8988 ISD::XOR, Dl, MVT::i32, SrcFlag,
8989 DAG.getNode(ISD::AND, Dl, MVT::i32,
8990 DAG.getNOT(Dl,
8991 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
8992 MVT::i32),
8993 One));
8994 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
8995 SDValue MFFS;
8996 if (!Subtarget.isISA3_0()) {
8997 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
8998 Chain = MFFS.getValue(1);
8999 }
9000 SDValue NewFPSCR;
9001 if (Subtarget.isPPC64()) {
9002 if (Subtarget.isISA3_0()) {
9003 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9004 } else {
9005 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9006 SDNode *InsertRN = DAG.getMachineNode(
9007 PPC::RLDIMI, Dl, MVT::i64,
9008 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9009 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9010 DAG.getTargetConstant(0, Dl, MVT::i32),
9011 DAG.getTargetConstant(62, Dl, MVT::i32)});
9012 NewFPSCR = SDValue(InsertRN, 0);
9013 }
9014 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9015 } else {
9016 // In 32-bit mode, store f64, load and update the lower half.
9017 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9018 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9019 SDValue Addr = Subtarget.isLittleEndian()
9020 ? StackSlot
9021 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9022 DAG.getConstant(4, Dl, PtrVT));
9023 if (Subtarget.isISA3_0()) {
9024 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9025 } else {
9026 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9027 SDValue Tmp =
9028 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9029 Chain = Tmp.getValue(1);
9030 Tmp = SDValue(DAG.getMachineNode(
9031 PPC::RLWIMI, Dl, MVT::i32,
9032 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9033 DAG.getTargetConstant(30, Dl, MVT::i32),
9034 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9035 0);
9036 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9037 }
9038 NewFPSCR =
9039 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9040 Chain = NewFPSCR.getValue(1);
9041 }
9042 if (Subtarget.isISA3_0())
9043 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9044 {NewFPSCR, Chain}),
9045 1);
9046 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9047 SDNode *MTFSF = DAG.getMachineNode(
9048 PPC::MTFSF, Dl, MVT::Other,
9049 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9050 return SDValue(MTFSF, 0);
9051}
9052
9053SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9054 SelectionDAG &DAG) const {
9055 SDLoc dl(Op);
9056 /*
9057 The rounding mode is in bits 30:31 of FPSR, and has the following
9058 settings:
9059 00 Round to nearest
9060 01 Round to 0
9061 10 Round to +inf
9062 11 Round to -inf
9063
9064 GET_ROUNDING, on the other hand, expects the following:
9065 -1 Undefined
9066 0 Round to 0
9067 1 Round to nearest
9068 2 Round to +inf
9069 3 Round to -inf
9070
9071 To perform the conversion, we do:
9072 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9073 */
9074
9075 MachineFunction &MF = DAG.getMachineFunction();
9076 EVT VT = Op.getValueType();
9077 EVT PtrVT = getPointerTy(MF.getDataLayout());
9078
9079 // Save FP Control Word to register
9080 SDValue Chain = Op.getOperand(0);
9081 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9082 Chain = MFFS.getValue(1);
9083
9084 SDValue CWD;
9085 if (isTypeLegal(MVT::i64)) {
9086 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9087 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9088 } else {
9089 // Save FP register to stack slot
9090 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9091 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9092 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9093
9094 // Load FP Control Word from low 32 bits of stack slot.
9096 "Stack slot adjustment is valid only on big endian subtargets!");
9097 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9098 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9099 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9100 Chain = CWD.getValue(1);
9101 }
9102
9103 // Transform as necessary
9104 SDValue CWD1 =
9105 DAG.getNode(ISD::AND, dl, MVT::i32,
9106 CWD, DAG.getConstant(3, dl, MVT::i32));
9107 SDValue CWD2 =
9108 DAG.getNode(ISD::SRL, dl, MVT::i32,
9109 DAG.getNode(ISD::AND, dl, MVT::i32,
9110 DAG.getNode(ISD::XOR, dl, MVT::i32,
9111 CWD, DAG.getConstant(3, dl, MVT::i32)),
9112 DAG.getConstant(3, dl, MVT::i32)),
9113 DAG.getConstant(1, dl, MVT::i32));
9114
9115 SDValue RetVal =
9116 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9117
9118 RetVal =
9120 dl, VT, RetVal);
9121
9122 return DAG.getMergeValues({RetVal, Chain}, dl);
9123}
9124
9125SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9126 EVT VT = Op.getValueType();
9127 uint64_t BitWidth = VT.getSizeInBits();
9128 SDLoc dl(Op);
9129 assert(Op.getNumOperands() == 3 &&
9130 VT == Op.getOperand(1).getValueType() &&
9131 "Unexpected SHL!");
9132
9133 // Expand into a bunch of logical ops. Note that these ops
9134 // depend on the PPC behavior for oversized shift amounts.
9135 SDValue Lo = Op.getOperand(0);
9136 SDValue Hi = Op.getOperand(1);
9137 SDValue Amt = Op.getOperand(2);
9138 EVT AmtVT = Amt.getValueType();
9139
9140 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9141 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9142 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9143 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9144 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9145 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9146 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9147 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9148 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9149 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9150 SDValue OutOps[] = { OutLo, OutHi };
9151 return DAG.getMergeValues(OutOps, dl);
9152}
9153
9154SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9155 EVT VT = Op.getValueType();
9156 SDLoc dl(Op);
9157 uint64_t BitWidth = VT.getSizeInBits();
9158 assert(Op.getNumOperands() == 3 &&
9159 VT == Op.getOperand(1).getValueType() &&
9160 "Unexpected SRL!");
9161
9162 // Expand into a bunch of logical ops. Note that these ops
9163 // depend on the PPC behavior for oversized shift amounts.
9164 SDValue Lo = Op.getOperand(0);
9165 SDValue Hi = Op.getOperand(1);
9166 SDValue Amt = Op.getOperand(2);
9167 EVT AmtVT = Amt.getValueType();
9168
9169 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9170 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9171 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9172 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9173 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9174 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9175 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9176 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9177 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9178 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9179 SDValue OutOps[] = { OutLo, OutHi };
9180 return DAG.getMergeValues(OutOps, dl);
9181}
9182
9183SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9184 SDLoc dl(Op);
9185 EVT VT = Op.getValueType();
9186 uint64_t BitWidth = VT.getSizeInBits();
9187 assert(Op.getNumOperands() == 3 &&
9188 VT == Op.getOperand(1).getValueType() &&
9189 "Unexpected SRA!");
9190
9191 // Expand into a bunch of logical ops, followed by a select_cc.
9192 SDValue Lo = Op.getOperand(0);
9193 SDValue Hi = Op.getOperand(1);
9194 SDValue Amt = Op.getOperand(2);
9195 EVT AmtVT = Amt.getValueType();
9196
9197 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9198 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9199 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9200 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9201 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9202 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9203 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9204 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9205 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9206 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9207 Tmp4, Tmp6, ISD::SETLE);
9208 SDValue OutOps[] = { OutLo, OutHi };
9209 return DAG.getMergeValues(OutOps, dl);
9210}
9211
9212SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9213 SelectionDAG &DAG) const {
9214 SDLoc dl(Op);
9215 EVT VT = Op.getValueType();
9216 unsigned BitWidth = VT.getSizeInBits();
9217
9218 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9219 SDValue X = Op.getOperand(0);
9220 SDValue Y = Op.getOperand(1);
9221 SDValue Z = Op.getOperand(2);
9222 EVT AmtVT = Z.getValueType();
9223
9224 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9225 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9226 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9227 // on PowerPC shift by BW being well defined.
9228 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9229 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9230 SDValue SubZ =
9231 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9232 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9233 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9234 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9235}
9236
9237//===----------------------------------------------------------------------===//
9238// Vector related lowering.
9239//
9240
9241/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9242/// element size of SplatSize. Cast the result to VT.
9243static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9244 SelectionDAG &DAG, const SDLoc &dl) {
9245 static const MVT VTys[] = { // canonical VT to use for each size.
9246 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9247 };
9248
9249 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9250
9251 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9252 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9253 SplatSize = 1;
9254 Val = 0xFF;
9255 }
9256
9257 EVT CanonicalVT = VTys[SplatSize-1];
9258
9259 // Build a canonical splat for this value.
9260 // Explicitly truncate APInt here, as this API is used with a mix of
9261 // signed and unsigned values.
9262 return DAG.getBitcast(
9263 ReqVT,
9264 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9265}
9266
9267/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9268/// specified intrinsic ID.
9270 const SDLoc &dl, EVT DestVT = MVT::Other) {
9271 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9272 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9273 DAG.getConstant(IID, dl, MVT::i32), Op);
9274}
9275
9276/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9277/// specified intrinsic ID.
9279 SelectionDAG &DAG, const SDLoc &dl,
9280 EVT DestVT = MVT::Other) {
9281 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9282 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9283 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9284}
9285
9286/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9287/// specified intrinsic ID.
9288static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9289 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9290 EVT DestVT = MVT::Other) {
9291 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9292 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9293 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9294}
9295
9296/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9297/// amount. The result has the specified value type.
9298static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9299 SelectionDAG &DAG, const SDLoc &dl) {
9300 // Force LHS/RHS to be the right type.
9301 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9302 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9303
9304 int Ops[16];
9305 for (unsigned i = 0; i != 16; ++i)
9306 Ops[i] = i + Amt;
9307 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9308 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9309}
9310
9311/// Do we have an efficient pattern in a .td file for this node?
9312///
9313/// \param V - pointer to the BuildVectorSDNode being matched
9314/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9315///
9316/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9317/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9318/// the opposite is true (expansion is beneficial) are:
9319/// - The node builds a vector out of integers that are not 32 or 64-bits
9320/// - The node builds a vector out of constants
9321/// - The node is a "load-and-splat"
9322/// In all other cases, we will choose to keep the BUILD_VECTOR.
9324 bool HasDirectMove,
9325 bool HasP8Vector) {
9326 EVT VecVT = V->getValueType(0);
9327 bool RightType = VecVT == MVT::v2f64 ||
9328 (HasP8Vector && VecVT == MVT::v4f32) ||
9329 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9330 if (!RightType)
9331 return false;
9332
9333 bool IsSplat = true;
9334 bool IsLoad = false;
9335 SDValue Op0 = V->getOperand(0);
9336
9337 // This function is called in a block that confirms the node is not a constant
9338 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9339 // different constants.
9340 if (V->isConstant())
9341 return false;
9342 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9343 if (V->getOperand(i).isUndef())
9344 return false;
9345 // We want to expand nodes that represent load-and-splat even if the
9346 // loaded value is a floating point truncation or conversion to int.
9347 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9348 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9349 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9350 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9351 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9352 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9353 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9354 IsLoad = true;
9355 // If the operands are different or the input is not a load and has more
9356 // uses than just this BV node, then it isn't a splat.
9357 if (V->getOperand(i) != Op0 ||
9358 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9359 IsSplat = false;
9360 }
9361 return !(IsSplat && IsLoad);
9362}
9363
9364// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9365SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9366
9367 SDLoc dl(Op);
9368 SDValue Op0 = Op->getOperand(0);
9369
9370 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9371 (Op.getValueType() != MVT::f128))
9372 return SDValue();
9373
9374 SDValue Lo = Op0.getOperand(0);
9375 SDValue Hi = Op0.getOperand(1);
9376 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9377 return SDValue();
9378
9379 if (!Subtarget.isLittleEndian())
9380 std::swap(Lo, Hi);
9381
9382 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9383}
9384
9385static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9386 const SDValue *InputLoad = &Op;
9387 while (InputLoad->getOpcode() == ISD::BITCAST)
9388 InputLoad = &InputLoad->getOperand(0);
9389 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9390 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9391 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9392 InputLoad = &InputLoad->getOperand(0);
9393 }
9394 if (InputLoad->getOpcode() != ISD::LOAD)
9395 return nullptr;
9396 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9397 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9398}
9399
9400// Convert the argument APFloat to a single precision APFloat if there is no
9401// loss in information during the conversion to single precision APFloat and the
9402// resulting number is not a denormal number. Return true if successful.
9404 APFloat APFloatToConvert = ArgAPFloat;
9405 bool LosesInfo = true;
9407 &LosesInfo);
9408 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9409 if (Success)
9410 ArgAPFloat = APFloatToConvert;
9411 return Success;
9412}
9413
9414// Bitcast the argument APInt to a double and convert it to a single precision
9415// APFloat, bitcast the APFloat to an APInt and assign it to the original
9416// argument if there is no loss in information during the conversion from
9417// double to single precision APFloat and the resulting number is not a denormal
9418// number. Return true if successful.
9420 double DpValue = ArgAPInt.bitsToDouble();
9421 APFloat APFloatDp(DpValue);
9422 bool Success = convertToNonDenormSingle(APFloatDp);
9423 if (Success)
9424 ArgAPInt = APFloatDp.bitcastToAPInt();
9425 return Success;
9426}
9427
9428// Nondestructive check for convertTonNonDenormSingle.
9430 // Only convert if it loses info, since XXSPLTIDP should
9431 // handle the other case.
9432 APFloat APFloatToConvert = ArgAPFloat;
9433 bool LosesInfo = true;
9435 &LosesInfo);
9436
9437 return (!LosesInfo && !APFloatToConvert.isDenormal());
9438}
9439
9440static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9441 unsigned &Opcode) {
9442 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9443 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9444 return false;
9445
9446 EVT Ty = Op->getValueType(0);
9447 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9448 // as we cannot handle extending loads for these types.
9449 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9450 ISD::isNON_EXTLoad(InputNode))
9451 return true;
9452
9453 EVT MemVT = InputNode->getMemoryVT();
9454 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9455 // memory VT is the same vector element VT type.
9456 // The loads feeding into the v8i16 and v16i8 types will be extending because
9457 // scalar i8/i16 are not legal types.
9458 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9459 (MemVT == Ty.getVectorElementType()))
9460 return true;
9461
9462 if (Ty == MVT::v2i64) {
9463 // Check the extend type, when the input type is i32, and the output vector
9464 // type is v2i64.
9465 if (MemVT == MVT::i32) {
9466 if (ISD::isZEXTLoad(InputNode))
9467 Opcode = PPCISD::ZEXT_LD_SPLAT;
9468 if (ISD::isSEXTLoad(InputNode))
9469 Opcode = PPCISD::SEXT_LD_SPLAT;
9470 }
9471 return true;
9472 }
9473 return false;
9474}
9475
9477 bool IsLittleEndian) {
9478 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9479
9480 BitMask.clearAllBits();
9481 EVT VT = BVN.getValueType(0);
9482 unsigned VTSize = VT.getSizeInBits();
9483 APInt ConstValue(VTSize, 0);
9484
9485 unsigned EltWidth = VT.getScalarSizeInBits();
9486
9487 unsigned BitPos = 0;
9488 for (auto OpVal : BVN.op_values()) {
9489 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9490
9491 if (!CN)
9492 return false;
9493 // The elements in a vector register are ordered in reverse byte order
9494 // between little-endian and big-endian modes.
9495 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9496 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9497 BitPos += EltWidth;
9498 }
9499
9500 for (unsigned J = 0; J < 16; ++J) {
9501 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9502 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9503 return false;
9504 if (ExtractValue == 0xFF)
9505 BitMask.setBit(J);
9506 }
9507 return true;
9508}
9509
9510// If this is a case we can't handle, return null and let the default
9511// expansion code take care of it. If we CAN select this case, and if it
9512// selects to a single instruction, return Op. Otherwise, if we can codegen
9513// this case more efficiently than a constant pool load, lower it to the
9514// sequence of ops that should be used.
9515SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9516 SelectionDAG &DAG) const {
9517 SDLoc dl(Op);
9518 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9519 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9520
9521 if (Subtarget.hasP10Vector()) {
9522 APInt BitMask(32, 0);
9523 // If the value of the vector is all zeros or all ones,
9524 // we do not convert it to MTVSRBMI.
9525 // The xxleqv instruction sets a vector with all ones.
9526 // The xxlxor instruction sets a vector with all zeros.
9527 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9528 BitMask != 0 && BitMask != 0xffff) {
9529 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9530 MachineSDNode *MSDNode =
9531 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9532 SDValue SDV = SDValue(MSDNode, 0);
9533 EVT DVT = BVN->getValueType(0);
9534 EVT SVT = SDV.getValueType();
9535 if (SVT != DVT) {
9536 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9537 }
9538 return SDV;
9539 }
9540 // Recognize build vector patterns to emit VSX vector instructions
9541 // instead of loading value from memory.
9542 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9543 return VecPat;
9544 }
9545 // Check if this is a splat of a constant value.
9546 APInt APSplatBits, APSplatUndef;
9547 unsigned SplatBitSize;
9548 bool HasAnyUndefs;
9549 bool BVNIsConstantSplat =
9550 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9551 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9552
9553 // If it is a splat of a double, check if we can shrink it to a 32 bit
9554 // non-denormal float which when converted back to double gives us the same
9555 // double. This is to exploit the XXSPLTIDP instruction.
9556 // If we lose precision, we use XXSPLTI32DX.
9557 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9558 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9559 // Check the type first to short-circuit so we don't modify APSplatBits if
9560 // this block isn't executed.
9561 if ((Op->getValueType(0) == MVT::v2f64) &&
9562 convertToNonDenormSingle(APSplatBits)) {
9563 SDValue SplatNode = DAG.getNode(
9564 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9565 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9566 return DAG.getBitcast(Op.getValueType(), SplatNode);
9567 } else {
9568 // We may lose precision, so we have to use XXSPLTI32DX.
9569
9570 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9571 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9572 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9573
9574 if (!Hi || !Lo)
9575 // If either load is 0, then we should generate XXLXOR to set to 0.
9576 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9577
9578 if (Hi)
9579 SplatNode = DAG.getNode(
9580 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9581 DAG.getTargetConstant(0, dl, MVT::i32),
9582 DAG.getTargetConstant(Hi, dl, MVT::i32));
9583
9584 if (Lo)
9585 SplatNode =
9586 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9587 DAG.getTargetConstant(1, dl, MVT::i32),
9588 DAG.getTargetConstant(Lo, dl, MVT::i32));
9589
9590 return DAG.getBitcast(Op.getValueType(), SplatNode);
9591 }
9592 }
9593
9594 bool IsSplat64 = false;
9595 uint64_t SplatBits = 0;
9596 int32_t SextVal = 0;
9597 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9598 SplatBits = APSplatBits.getZExtValue();
9599 if (SplatBitSize <= 32) {
9600 SextVal = SignExtend32(SplatBits, SplatBitSize);
9601 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9602 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9603 bool P9Vector = Subtarget.hasP9Vector();
9604 int32_t Hi = P9Vector ? 127 : 15;
9605 int32_t Lo = P9Vector ? -128 : -16;
9606 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9607 SextVal = static_cast<int32_t>(SplatBits);
9608 }
9609 }
9610
9611 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9612 unsigned NewOpcode = PPCISD::LD_SPLAT;
9613
9614 // Handle load-and-splat patterns as we have instructions that will do this
9615 // in one go.
9616 if (DAG.isSplatValue(Op, true) &&
9617 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9618 const SDValue *InputLoad = &Op.getOperand(0);
9619 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9620
9621 // If the input load is an extending load, it will be an i32 -> i64
9622 // extending load and isValidSplatLoad() will update NewOpcode.
9623 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9624 unsigned ElementSize =
9625 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9626
9627 assert(((ElementSize == 2 * MemorySize)
9628 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9629 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9630 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9631 "Unmatched element size and opcode!\n");
9632
9633 // Checking for a single use of this load, we have to check for vector
9634 // width (128 bits) / ElementSize uses (since each operand of the
9635 // BUILD_VECTOR is a separate use of the value.
9636 unsigned NumUsesOfInputLD = 128 / ElementSize;
9637 for (SDValue BVInOp : Op->ops())
9638 if (BVInOp.isUndef())
9639 NumUsesOfInputLD--;
9640
9641 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9642 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9643 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9644 // 15", but function IsValidSplatLoad() now will only return true when
9645 // the data at index 0 is not nullptr. So we will not get into trouble for
9646 // these cases.
9647 //
9648 // case 1 - lfiwzx/lfiwax
9649 // 1.1: load result is i32 and is sign/zero extend to i64;
9650 // 1.2: build a v2i64 vector type with above loaded value;
9651 // 1.3: the vector has only one value at index 0, others are all undef;
9652 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9653 if (NumUsesOfInputLD == 1 &&
9654 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9655 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9656 Subtarget.hasLFIWAX()))
9657 return SDValue();
9658
9659 // case 2 - lxvr[hb]x
9660 // 2.1: load result is at most i16;
9661 // 2.2: build a vector with above loaded value;
9662 // 2.3: the vector has only one value at index 0, others are all undef;
9663 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9664 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9665 Subtarget.isISA3_1() && ElementSize <= 16)
9666 return SDValue();
9667
9668 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9669 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9670 Subtarget.hasVSX()) {
9671 SDValue Ops[] = {
9672 LD->getChain(), // Chain
9673 LD->getBasePtr(), // Ptr
9674 DAG.getValueType(Op.getValueType()) // VT
9675 };
9676 SDValue LdSplt = DAG.getMemIntrinsicNode(
9677 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9678 LD->getMemoryVT(), LD->getMemOperand());
9679 // Replace all uses of the output chain of the original load with the
9680 // output chain of the new load.
9681 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9682 LdSplt.getValue(1));
9683 return LdSplt;
9684 }
9685 }
9686
9687 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9688 // 32-bits can be lowered to VSX instructions under certain conditions.
9689 // Without VSX, there is no pattern more efficient than expanding the node.
9690 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9691 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9692 Subtarget.hasP8Vector()))
9693 return Op;
9694 return SDValue();
9695 }
9696
9697 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9698 unsigned SplatSize = SplatBitSize / 8;
9699
9700 // First, handle single instruction cases.
9701
9702 // All zeros?
9703 if (SplatBits == 0) {
9704 // Canonicalize all zero vectors to be v4i32.
9705 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9706 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9707 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9708 }
9709 return Op;
9710 }
9711
9712 // We have XXSPLTIW for constant splats four bytes wide.
9713 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9714 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9715 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9716 // turned into a 4-byte splat of 0xABABABAB.
9717 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9718 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9719 Op.getValueType(), DAG, dl);
9720
9721 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9722 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9723 dl);
9724
9725 // We have XXSPLTIB for constant splats one byte wide.
9726 if (Subtarget.hasP9Vector() && SplatSize == 1)
9727 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9728 dl);
9729
9730 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9731 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9732 if (SextVal >= -16 && SextVal <= 15) {
9733 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9734 // generate a splat word with extend for size 8.
9735 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9736 SDValue Res =
9737 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9738 if (SplatSize != 8)
9739 return Res;
9740 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9741 }
9742
9743 // Two instruction sequences.
9744
9745 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9746 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9748 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9749 unsigned IID;
9750 EVT VT;
9751 switch (SplatSize) {
9752 default:
9753 llvm_unreachable("Unexpected type for vector constant.");
9754 case 2:
9755 IID = Intrinsic::ppc_altivec_vupklsb;
9756 VT = MVT::v8i16;
9757 break;
9758 case 4:
9759 IID = Intrinsic::ppc_altivec_vextsb2w;
9760 VT = MVT::v4i32;
9761 break;
9762 case 8:
9763 IID = Intrinsic::ppc_altivec_vextsb2d;
9764 VT = MVT::v2i64;
9765 break;
9766 }
9767 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9768 return DAG.getBitcast(Op->getValueType(0), Extend);
9769 }
9770 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9771
9772 // If this value is in the range [-32,30] and is even, use:
9773 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9774 // If this value is in the range [17,31] and is odd, use:
9775 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9776 // If this value is in the range [-31,-17] and is odd, use:
9777 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9778 // Note the last two are three-instruction sequences.
9779 if (SextVal >= -32 && SextVal <= 31) {
9780 // To avoid having these optimizations undone by constant folding,
9781 // we convert to a pseudo that will be expanded later into one of
9782 // the above forms.
9783 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9784 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9785 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9786 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9787 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9788 if (VT == Op.getValueType())
9789 return RetVal;
9790 else
9791 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9792 }
9793
9794 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9795 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9796 // for fneg/fabs.
9797 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9798 // Make -1 and vspltisw -1:
9799 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9800
9801 // Make the VSLW intrinsic, computing 0x8000_0000.
9802 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9803 OnesV, DAG, dl);
9804
9805 // xor by OnesV to invert it.
9806 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9807 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9808 }
9809
9810 // Check to see if this is a wide variety of vsplti*, binop self cases.
9811 static const signed char SplatCsts[] = {
9812 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9813 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9814 };
9815
9816 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9817 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9818 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9819 int i = SplatCsts[idx];
9820
9821 // Figure out what shift amount will be used by altivec if shifted by i in
9822 // this splat size.
9823 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9824
9825 // vsplti + shl self.
9826 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9827 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9828 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9829 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9830 Intrinsic::ppc_altivec_vslw
9831 };
9832 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9833 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9834 }
9835
9836 // vsplti + srl self.
9837 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9838 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9839 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9840 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9841 Intrinsic::ppc_altivec_vsrw
9842 };
9843 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9844 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9845 }
9846
9847 // vsplti + rol self.
9848 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9849 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9850 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9851 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9852 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9853 Intrinsic::ppc_altivec_vrlw
9854 };
9855 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9856 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9857 }
9858
9859 // t = vsplti c, result = vsldoi t, t, 1
9860 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9861 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9862 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9863 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9864 }
9865 // t = vsplti c, result = vsldoi t, t, 2
9866 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9867 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9868 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9869 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9870 }
9871 // t = vsplti c, result = vsldoi t, t, 3
9872 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9873 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9874 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9875 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9876 }
9877 }
9878
9879 return SDValue();
9880}
9881
9882/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9883/// the specified operations to build the shuffle.
9885 SDValue RHS, SelectionDAG &DAG,
9886 const SDLoc &dl) {
9887 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9888 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9889 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9890
9891 enum {
9892 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9893 OP_VMRGHW,
9894 OP_VMRGLW,
9895 OP_VSPLTISW0,
9896 OP_VSPLTISW1,
9897 OP_VSPLTISW2,
9898 OP_VSPLTISW3,
9899 OP_VSLDOI4,
9900 OP_VSLDOI8,
9901 OP_VSLDOI12
9902 };
9903
9904 if (OpNum == OP_COPY) {
9905 if (LHSID == (1*9+2)*9+3) return LHS;
9906 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9907 return RHS;
9908 }
9909
9910 SDValue OpLHS, OpRHS;
9911 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9912 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9913
9914 int ShufIdxs[16];
9915 switch (OpNum) {
9916 default: llvm_unreachable("Unknown i32 permute!");
9917 case OP_VMRGHW:
9918 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9919 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9920 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9921 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9922 break;
9923 case OP_VMRGLW:
9924 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9925 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9926 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9927 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9928 break;
9929 case OP_VSPLTISW0:
9930 for (unsigned i = 0; i != 16; ++i)
9931 ShufIdxs[i] = (i&3)+0;
9932 break;
9933 case OP_VSPLTISW1:
9934 for (unsigned i = 0; i != 16; ++i)
9935 ShufIdxs[i] = (i&3)+4;
9936 break;
9937 case OP_VSPLTISW2:
9938 for (unsigned i = 0; i != 16; ++i)
9939 ShufIdxs[i] = (i&3)+8;
9940 break;
9941 case OP_VSPLTISW3:
9942 for (unsigned i = 0; i != 16; ++i)
9943 ShufIdxs[i] = (i&3)+12;
9944 break;
9945 case OP_VSLDOI4:
9946 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9947 case OP_VSLDOI8:
9948 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9949 case OP_VSLDOI12:
9950 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9951 }
9952 EVT VT = OpLHS.getValueType();
9953 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9954 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9955 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9956 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9957}
9958
9959/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9960/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9961/// SDValue.
9962SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9963 SelectionDAG &DAG) const {
9964 const unsigned BytesInVector = 16;
9965 bool IsLE = Subtarget.isLittleEndian();
9966 SDLoc dl(N);
9967 SDValue V1 = N->getOperand(0);
9968 SDValue V2 = N->getOperand(1);
9969 unsigned ShiftElts = 0, InsertAtByte = 0;
9970 bool Swap = false;
9971
9972 // Shifts required to get the byte we want at element 7.
9973 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9974 0, 15, 14, 13, 12, 11, 10, 9};
9975 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9976 1, 2, 3, 4, 5, 6, 7, 8};
9977
9978 ArrayRef<int> Mask = N->getMask();
9979 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9980
9981 // For each mask element, find out if we're just inserting something
9982 // from V2 into V1 or vice versa.
9983 // Possible permutations inserting an element from V2 into V1:
9984 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9985 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9986 // ...
9987 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9988 // Inserting from V1 into V2 will be similar, except mask range will be
9989 // [16,31].
9990
9991 bool FoundCandidate = false;
9992 // If both vector operands for the shuffle are the same vector, the mask
9993 // will contain only elements from the first one and the second one will be
9994 // undef.
9995 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9996 // Go through the mask of half-words to find an element that's being moved
9997 // from one vector to the other.
9998 for (unsigned i = 0; i < BytesInVector; ++i) {
9999 unsigned CurrentElement = Mask[i];
10000 // If 2nd operand is undefined, we should only look for element 7 in the
10001 // Mask.
10002 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10003 continue;
10004
10005 bool OtherElementsInOrder = true;
10006 // Examine the other elements in the Mask to see if they're in original
10007 // order.
10008 for (unsigned j = 0; j < BytesInVector; ++j) {
10009 if (j == i)
10010 continue;
10011 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10012 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10013 // in which we always assume we're always picking from the 1st operand.
10014 int MaskOffset =
10015 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10016 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10017 OtherElementsInOrder = false;
10018 break;
10019 }
10020 }
10021 // If other elements are in original order, we record the number of shifts
10022 // we need to get the element we want into element 7. Also record which byte
10023 // in the vector we should insert into.
10024 if (OtherElementsInOrder) {
10025 // If 2nd operand is undefined, we assume no shifts and no swapping.
10026 if (V2.isUndef()) {
10027 ShiftElts = 0;
10028 Swap = false;
10029 } else {
10030 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10031 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10032 : BigEndianShifts[CurrentElement & 0xF];
10033 Swap = CurrentElement < BytesInVector;
10034 }
10035 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10036 FoundCandidate = true;
10037 break;
10038 }
10039 }
10040
10041 if (!FoundCandidate)
10042 return SDValue();
10043
10044 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10045 // optionally with VECSHL if shift is required.
10046 if (Swap)
10047 std::swap(V1, V2);
10048 if (V2.isUndef())
10049 V2 = V1;
10050 if (ShiftElts) {
10051 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10052 DAG.getConstant(ShiftElts, dl, MVT::i32));
10053 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10054 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10055 }
10056 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10057 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10058}
10059
10060/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10061/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10062/// SDValue.
10063SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10064 SelectionDAG &DAG) const {
10065 const unsigned NumHalfWords = 8;
10066 const unsigned BytesInVector = NumHalfWords * 2;
10067 // Check that the shuffle is on half-words.
10068 if (!isNByteElemShuffleMask(N, 2, 1))
10069 return SDValue();
10070
10071 bool IsLE = Subtarget.isLittleEndian();
10072 SDLoc dl(N);
10073 SDValue V1 = N->getOperand(0);
10074 SDValue V2 = N->getOperand(1);
10075 unsigned ShiftElts = 0, InsertAtByte = 0;
10076 bool Swap = false;
10077
10078 // Shifts required to get the half-word we want at element 3.
10079 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10080 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10081
10082 uint32_t Mask = 0;
10083 uint32_t OriginalOrderLow = 0x1234567;
10084 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10085 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10086 // 32-bit space, only need 4-bit nibbles per element.
10087 for (unsigned i = 0; i < NumHalfWords; ++i) {
10088 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10089 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10090 }
10091
10092 // For each mask element, find out if we're just inserting something
10093 // from V2 into V1 or vice versa. Possible permutations inserting an element
10094 // from V2 into V1:
10095 // X, 1, 2, 3, 4, 5, 6, 7
10096 // 0, X, 2, 3, 4, 5, 6, 7
10097 // 0, 1, X, 3, 4, 5, 6, 7
10098 // 0, 1, 2, X, 4, 5, 6, 7
10099 // 0, 1, 2, 3, X, 5, 6, 7
10100 // 0, 1, 2, 3, 4, X, 6, 7
10101 // 0, 1, 2, 3, 4, 5, X, 7
10102 // 0, 1, 2, 3, 4, 5, 6, X
10103 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10104
10105 bool FoundCandidate = false;
10106 // Go through the mask of half-words to find an element that's being moved
10107 // from one vector to the other.
10108 for (unsigned i = 0; i < NumHalfWords; ++i) {
10109 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10110 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10111 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10112 uint32_t TargetOrder = 0x0;
10113
10114 // If both vector operands for the shuffle are the same vector, the mask
10115 // will contain only elements from the first one and the second one will be
10116 // undef.
10117 if (V2.isUndef()) {
10118 ShiftElts = 0;
10119 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10120 TargetOrder = OriginalOrderLow;
10121 Swap = false;
10122 // Skip if not the correct element or mask of other elements don't equal
10123 // to our expected order.
10124 if (MaskOneElt == VINSERTHSrcElem &&
10125 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10126 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10127 FoundCandidate = true;
10128 break;
10129 }
10130 } else { // If both operands are defined.
10131 // Target order is [8,15] if the current mask is between [0,7].
10132 TargetOrder =
10133 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10134 // Skip if mask of other elements don't equal our expected order.
10135 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10136 // We only need the last 3 bits for the number of shifts.
10137 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10138 : BigEndianShifts[MaskOneElt & 0x7];
10139 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10140 Swap = MaskOneElt < NumHalfWords;
10141 FoundCandidate = true;
10142 break;
10143 }
10144 }
10145 }
10146
10147 if (!FoundCandidate)
10148 return SDValue();
10149
10150 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10151 // optionally with VECSHL if shift is required.
10152 if (Swap)
10153 std::swap(V1, V2);
10154 if (V2.isUndef())
10155 V2 = V1;
10156 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10157 if (ShiftElts) {
10158 // Double ShiftElts because we're left shifting on v16i8 type.
10159 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10160 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10161 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10162 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10163 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10164 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10165 }
10166 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10167 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10168 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10169 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10170}
10171
10172/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10173/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10174/// return the default SDValue.
10175SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10176 SelectionDAG &DAG) const {
10177 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10178 // to v16i8. Peek through the bitcasts to get the actual operands.
10181
10182 auto ShuffleMask = SVN->getMask();
10183 SDValue VecShuffle(SVN, 0);
10184 SDLoc DL(SVN);
10185
10186 // Check that we have a four byte shuffle.
10187 if (!isNByteElemShuffleMask(SVN, 4, 1))
10188 return SDValue();
10189
10190 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10191 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10192 std::swap(LHS, RHS);
10194 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10195 if (!CommutedSV)
10196 return SDValue();
10197 ShuffleMask = CommutedSV->getMask();
10198 }
10199
10200 // Ensure that the RHS is a vector of constants.
10201 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10202 if (!BVN)
10203 return SDValue();
10204
10205 // Check if RHS is a splat of 4-bytes (or smaller).
10206 APInt APSplatValue, APSplatUndef;
10207 unsigned SplatBitSize;
10208 bool HasAnyUndefs;
10209 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10210 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10211 SplatBitSize > 32)
10212 return SDValue();
10213
10214 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10215 // The instruction splats a constant C into two words of the source vector
10216 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10217 // Thus we check that the shuffle mask is the equivalent of
10218 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10219 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10220 // within each word are consecutive, so we only need to check the first byte.
10221 SDValue Index;
10222 bool IsLE = Subtarget.isLittleEndian();
10223 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10224 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10225 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10226 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10227 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10228 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10229 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10230 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10231 else
10232 return SDValue();
10233
10234 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10235 // for XXSPLTI32DX.
10236 unsigned SplatVal = APSplatValue.getZExtValue();
10237 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10238 SplatVal |= (SplatVal << SplatBitSize);
10239
10240 SDValue SplatNode = DAG.getNode(
10241 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10242 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10243 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10244}
10245
10246/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10247/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10248/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10249/// i.e (or (shl x, C1), (srl x, 128-C1)).
10250SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10251 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10252 assert(Op.getValueType() == MVT::v1i128 &&
10253 "Only set v1i128 as custom, other type shouldn't reach here!");
10254 SDLoc dl(Op);
10255 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10256 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10257 unsigned SHLAmt = N1.getConstantOperandVal(0);
10258 if (SHLAmt % 8 == 0) {
10259 std::array<int, 16> Mask;
10260 std::iota(Mask.begin(), Mask.end(), 0);
10261 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10262 if (SDValue Shuffle =
10263 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10264 DAG.getUNDEF(MVT::v16i8), Mask))
10265 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10266 }
10267 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10268 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10269 DAG.getConstant(SHLAmt, dl, MVT::i32));
10270 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10271 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10272 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10273 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10274}
10275
10276/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10277/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10278/// return the code it can be lowered into. Worst case, it can always be
10279/// lowered into a vperm.
10280SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10281 SelectionDAG &DAG) const {
10282 SDLoc dl(Op);
10283 SDValue V1 = Op.getOperand(0);
10284 SDValue V2 = Op.getOperand(1);
10285 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10286
10287 // Any nodes that were combined in the target-independent combiner prior
10288 // to vector legalization will not be sent to the target combine. Try to
10289 // combine it here.
10290 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10291 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10292 return NewShuffle;
10293 Op = NewShuffle;
10295 V1 = Op.getOperand(0);
10296 V2 = Op.getOperand(1);
10297 }
10298 EVT VT = Op.getValueType();
10299 bool isLittleEndian = Subtarget.isLittleEndian();
10300
10301 unsigned ShiftElts, InsertAtByte;
10302 bool Swap = false;
10303
10304 // If this is a load-and-splat, we can do that with a single instruction
10305 // in some cases. However if the load has multiple uses, we don't want to
10306 // combine it because that will just produce multiple loads.
10307 bool IsPermutedLoad = false;
10308 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10309 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10310 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10311 InputLoad->hasOneUse()) {
10312 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10313 int SplatIdx =
10314 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10315
10316 // The splat index for permuted loads will be in the left half of the vector
10317 // which is strictly wider than the loaded value by 8 bytes. So we need to
10318 // adjust the splat index to point to the correct address in memory.
10319 if (IsPermutedLoad) {
10320 assert((isLittleEndian || IsFourByte) &&
10321 "Unexpected size for permuted load on big endian target");
10322 SplatIdx += IsFourByte ? 2 : 1;
10323 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10324 "Splat of a value outside of the loaded memory");
10325 }
10326
10327 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10328 // For 4-byte load-and-splat, we need Power9.
10329 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10330 uint64_t Offset = 0;
10331 if (IsFourByte)
10332 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10333 else
10334 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10335
10336 // If the width of the load is the same as the width of the splat,
10337 // loading with an offset would load the wrong memory.
10338 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10339 Offset = 0;
10340
10341 SDValue BasePtr = LD->getBasePtr();
10342 if (Offset != 0)
10344 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10345 SDValue Ops[] = {
10346 LD->getChain(), // Chain
10347 BasePtr, // BasePtr
10348 DAG.getValueType(Op.getValueType()) // VT
10349 };
10350 SDVTList VTL =
10351 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10352 SDValue LdSplt =
10353 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10354 Ops, LD->getMemoryVT(), LD->getMemOperand());
10355 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10356 if (LdSplt.getValueType() != SVOp->getValueType(0))
10357 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10358 return LdSplt;
10359 }
10360 }
10361
10362 // All v2i64 and v2f64 shuffles are legal
10363 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10364 return Op;
10365
10366 if (Subtarget.hasP9Vector() &&
10367 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10368 isLittleEndian)) {
10369 if (V2.isUndef())
10370 V2 = V1;
10371 else if (Swap)
10372 std::swap(V1, V2);
10373 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10374 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10375 if (ShiftElts) {
10376 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10377 DAG.getConstant(ShiftElts, dl, MVT::i32));
10378 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10379 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10380 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10381 }
10382 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10383 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10384 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10385 }
10386
10387 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10388 SDValue SplatInsertNode;
10389 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10390 return SplatInsertNode;
10391 }
10392
10393 if (Subtarget.hasP9Altivec()) {
10394 SDValue NewISDNode;
10395 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10396 return NewISDNode;
10397
10398 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10399 return NewISDNode;
10400 }
10401
10402 if (Subtarget.hasVSX() &&
10403 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10404 if (Swap)
10405 std::swap(V1, V2);
10406 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10407 SDValue Conv2 =
10408 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10409
10410 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10411 DAG.getConstant(ShiftElts, dl, MVT::i32));
10412 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10413 }
10414
10415 if (Subtarget.hasVSX() &&
10416 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10417 if (Swap)
10418 std::swap(V1, V2);
10419 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10420 SDValue Conv2 =
10421 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10422
10423 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10424 DAG.getConstant(ShiftElts, dl, MVT::i32));
10425 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10426 }
10427
10428 if (Subtarget.hasP9Vector()) {
10429 if (PPC::isXXBRHShuffleMask(SVOp)) {
10430 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10431 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10432 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10433 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10434 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10435 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10436 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10437 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10438 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10439 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10440 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10441 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10442 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10443 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10444 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10445 }
10446 }
10447
10448 if (Subtarget.hasVSX()) {
10449 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10450 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10451
10452 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10453 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10454 DAG.getConstant(SplatIdx, dl, MVT::i32));
10455 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10456 }
10457
10458 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10459 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10460 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10461 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10462 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10463 }
10464 }
10465
10466 // Cases that are handled by instructions that take permute immediates
10467 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10468 // selected by the instruction selector.
10469 if (V2.isUndef()) {
10470 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10471 PPC::isSplatShuffleMask(SVOp, 2) ||
10472 PPC::isSplatShuffleMask(SVOp, 4) ||
10473 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10474 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10475 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10476 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10477 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10478 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10479 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10480 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10481 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10482 (Subtarget.hasP8Altivec() && (
10483 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10484 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10485 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10486 return Op;
10487 }
10488 }
10489
10490 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10491 // and produce a fixed permutation. If any of these match, do not lower to
10492 // VPERM.
10493 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10494 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10495 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10496 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10497 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10498 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10499 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10500 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10501 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10502 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10503 (Subtarget.hasP8Altivec() && (
10504 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10505 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10506 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10507 return Op;
10508
10509 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10510 // perfect shuffle table to emit an optimal matching sequence.
10511 ArrayRef<int> PermMask = SVOp->getMask();
10512
10513 if (!DisablePerfectShuffle && !isLittleEndian) {
10514 unsigned PFIndexes[4];
10515 bool isFourElementShuffle = true;
10516 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10517 ++i) { // Element number
10518 unsigned EltNo = 8; // Start out undef.
10519 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10520 if (PermMask[i * 4 + j] < 0)
10521 continue; // Undef, ignore it.
10522
10523 unsigned ByteSource = PermMask[i * 4 + j];
10524 if ((ByteSource & 3) != j) {
10525 isFourElementShuffle = false;
10526 break;
10527 }
10528
10529 if (EltNo == 8) {
10530 EltNo = ByteSource / 4;
10531 } else if (EltNo != ByteSource / 4) {
10532 isFourElementShuffle = false;
10533 break;
10534 }
10535 }
10536 PFIndexes[i] = EltNo;
10537 }
10538
10539 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10540 // perfect shuffle vector to determine if it is cost effective to do this as
10541 // discrete instructions, or whether we should use a vperm.
10542 // For now, we skip this for little endian until such time as we have a
10543 // little-endian perfect shuffle table.
10544 if (isFourElementShuffle) {
10545 // Compute the index in the perfect shuffle table.
10546 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10547 PFIndexes[2] * 9 + PFIndexes[3];
10548
10549 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10550 unsigned Cost = (PFEntry >> 30);
10551
10552 // Determining when to avoid vperm is tricky. Many things affect the cost
10553 // of vperm, particularly how many times the perm mask needs to be
10554 // computed. For example, if the perm mask can be hoisted out of a loop or
10555 // is already used (perhaps because there are multiple permutes with the
10556 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10557 // permute mask out of the loop requires an extra register.
10558 //
10559 // As a compromise, we only emit discrete instructions if the shuffle can
10560 // be generated in 3 or fewer operations. When we have loop information
10561 // available, if this block is within a loop, we should avoid using vperm
10562 // for 3-operation perms and use a constant pool load instead.
10563 if (Cost < 3)
10564 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10565 }
10566 }
10567
10568 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10569 // vector that will get spilled to the constant pool.
10570 if (V2.isUndef()) V2 = V1;
10571
10572 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10573}
10574
10575SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10576 ArrayRef<int> PermMask, EVT VT,
10577 SDValue V1, SDValue V2) const {
10578 unsigned Opcode = PPCISD::VPERM;
10579 EVT ValType = V1.getValueType();
10580 SDLoc dl(Op);
10581 bool NeedSwap = false;
10582 bool isLittleEndian = Subtarget.isLittleEndian();
10583 bool isPPC64 = Subtarget.isPPC64();
10584
10585 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10586 (V1->hasOneUse() || V2->hasOneUse())) {
10587 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10588 "XXPERM instead\n");
10589 Opcode = PPCISD::XXPERM;
10590
10591 // The second input to XXPERM is also an output so if the second input has
10592 // multiple uses then copying is necessary, as a result we want the
10593 // single-use operand to be used as the second input to prevent copying.
10594 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10595 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10596 std::swap(V1, V2);
10597 NeedSwap = !NeedSwap;
10598 }
10599 }
10600
10601 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10602 // that it is in input element units, not in bytes. Convert now.
10603
10604 // For little endian, the order of the input vectors is reversed, and
10605 // the permutation mask is complemented with respect to 31. This is
10606 // necessary to produce proper semantics with the big-endian-based vperm
10607 // instruction.
10608 EVT EltVT = V1.getValueType().getVectorElementType();
10609 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10610
10611 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10612 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10613
10614 /*
10615 Vectors will be appended like so: [ V1 | v2 ]
10616 XXSWAPD on V1:
10617 [ A | B | C | D ] -> [ C | D | A | B ]
10618 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10619 i.e. index of A, B += 8, and index of C, D -= 8.
10620 XXSWAPD on V2:
10621 [ E | F | G | H ] -> [ G | H | E | F ]
10622 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10623 i.e. index of E, F += 8, index of G, H -= 8
10624 Swap V1 and V2:
10625 [ V1 | V2 ] -> [ V2 | V1 ]
10626 0-15 16-31 0-15 16-31
10627 i.e. index of V1 += 16, index of V2 -= 16
10628 */
10629
10630 SmallVector<SDValue, 16> ResultMask;
10631 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10632 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10633
10634 if (V1HasXXSWAPD) {
10635 if (SrcElt < 8)
10636 SrcElt += 8;
10637 else if (SrcElt < 16)
10638 SrcElt -= 8;
10639 }
10640 if (V2HasXXSWAPD) {
10641 if (SrcElt > 23)
10642 SrcElt -= 8;
10643 else if (SrcElt > 15)
10644 SrcElt += 8;
10645 }
10646 if (NeedSwap) {
10647 if (SrcElt < 16)
10648 SrcElt += 16;
10649 else
10650 SrcElt -= 16;
10651 }
10652 for (unsigned j = 0; j != BytesPerElement; ++j)
10653 if (isLittleEndian)
10654 ResultMask.push_back(
10655 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10656 else
10657 ResultMask.push_back(
10658 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10659 }
10660
10661 if (V1HasXXSWAPD) {
10662 dl = SDLoc(V1->getOperand(0));
10663 V1 = V1->getOperand(0)->getOperand(1);
10664 }
10665 if (V2HasXXSWAPD) {
10666 dl = SDLoc(V2->getOperand(0));
10667 V2 = V2->getOperand(0)->getOperand(1);
10668 }
10669
10670 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10671 if (ValType != MVT::v2f64)
10672 V1 = DAG.getBitcast(MVT::v2f64, V1);
10673 if (V2.getValueType() != MVT::v2f64)
10674 V2 = DAG.getBitcast(MVT::v2f64, V2);
10675 }
10676
10677 ShufflesHandledWithVPERM++;
10678 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10679 LLVM_DEBUG({
10680 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10681 if (Opcode == PPCISD::XXPERM) {
10682 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10683 } else {
10684 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10685 }
10686 SVOp->dump();
10687 dbgs() << "With the following permute control vector:\n";
10688 VPermMask.dump();
10689 });
10690
10691 if (Opcode == PPCISD::XXPERM)
10692 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10693
10694 // Only need to place items backwards in LE,
10695 // the mask was properly calculated.
10696 if (isLittleEndian)
10697 std::swap(V1, V2);
10698
10699 SDValue VPERMNode =
10700 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10701
10702 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10703 return VPERMNode;
10704}
10705
10706/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10707/// vector comparison. If it is, return true and fill in Opc/isDot with
10708/// information about the intrinsic.
10709static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10710 bool &isDot, const PPCSubtarget &Subtarget) {
10711 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10712 CompareOpc = -1;
10713 isDot = false;
10714 switch (IntrinsicID) {
10715 default:
10716 return false;
10717 // Comparison predicates.
10718 case Intrinsic::ppc_altivec_vcmpbfp_p:
10719 CompareOpc = 966;
10720 isDot = true;
10721 break;
10722 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10723 CompareOpc = 198;
10724 isDot = true;
10725 break;
10726 case Intrinsic::ppc_altivec_vcmpequb_p:
10727 CompareOpc = 6;
10728 isDot = true;
10729 break;
10730 case Intrinsic::ppc_altivec_vcmpequh_p:
10731 CompareOpc = 70;
10732 isDot = true;
10733 break;
10734 case Intrinsic::ppc_altivec_vcmpequw_p:
10735 CompareOpc = 134;
10736 isDot = true;
10737 break;
10738 case Intrinsic::ppc_altivec_vcmpequd_p:
10739 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10740 CompareOpc = 199;
10741 isDot = true;
10742 } else
10743 return false;
10744 break;
10745 case Intrinsic::ppc_altivec_vcmpneb_p:
10746 case Intrinsic::ppc_altivec_vcmpneh_p:
10747 case Intrinsic::ppc_altivec_vcmpnew_p:
10748 case Intrinsic::ppc_altivec_vcmpnezb_p:
10749 case Intrinsic::ppc_altivec_vcmpnezh_p:
10750 case Intrinsic::ppc_altivec_vcmpnezw_p:
10751 if (Subtarget.hasP9Altivec()) {
10752 switch (IntrinsicID) {
10753 default:
10754 llvm_unreachable("Unknown comparison intrinsic.");
10755 case Intrinsic::ppc_altivec_vcmpneb_p:
10756 CompareOpc = 7;
10757 break;
10758 case Intrinsic::ppc_altivec_vcmpneh_p:
10759 CompareOpc = 71;
10760 break;
10761 case Intrinsic::ppc_altivec_vcmpnew_p:
10762 CompareOpc = 135;
10763 break;
10764 case Intrinsic::ppc_altivec_vcmpnezb_p:
10765 CompareOpc = 263;
10766 break;
10767 case Intrinsic::ppc_altivec_vcmpnezh_p:
10768 CompareOpc = 327;
10769 break;
10770 case Intrinsic::ppc_altivec_vcmpnezw_p:
10771 CompareOpc = 391;
10772 break;
10773 }
10774 isDot = true;
10775 } else
10776 return false;
10777 break;
10778 case Intrinsic::ppc_altivec_vcmpgefp_p:
10779 CompareOpc = 454;
10780 isDot = true;
10781 break;
10782 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10783 CompareOpc = 710;
10784 isDot = true;
10785 break;
10786 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10787 CompareOpc = 774;
10788 isDot = true;
10789 break;
10790 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10791 CompareOpc = 838;
10792 isDot = true;
10793 break;
10794 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10795 CompareOpc = 902;
10796 isDot = true;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10799 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10800 CompareOpc = 967;
10801 isDot = true;
10802 } else
10803 return false;
10804 break;
10805 case Intrinsic::ppc_altivec_vcmpgtub_p:
10806 CompareOpc = 518;
10807 isDot = true;
10808 break;
10809 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10810 CompareOpc = 582;
10811 isDot = true;
10812 break;
10813 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10814 CompareOpc = 646;
10815 isDot = true;
10816 break;
10817 case Intrinsic::ppc_altivec_vcmpgtud_p:
10818 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10819 CompareOpc = 711;
10820 isDot = true;
10821 } else
10822 return false;
10823 break;
10824
10825 case Intrinsic::ppc_altivec_vcmpequq:
10826 case Intrinsic::ppc_altivec_vcmpgtsq:
10827 case Intrinsic::ppc_altivec_vcmpgtuq:
10828 if (!Subtarget.isISA3_1())
10829 return false;
10830 switch (IntrinsicID) {
10831 default:
10832 llvm_unreachable("Unknown comparison intrinsic.");
10833 case Intrinsic::ppc_altivec_vcmpequq:
10834 CompareOpc = 455;
10835 break;
10836 case Intrinsic::ppc_altivec_vcmpgtsq:
10837 CompareOpc = 903;
10838 break;
10839 case Intrinsic::ppc_altivec_vcmpgtuq:
10840 CompareOpc = 647;
10841 break;
10842 }
10843 break;
10844
10845 // VSX predicate comparisons use the same infrastructure
10846 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10847 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10848 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10849 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10850 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10851 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10852 if (Subtarget.hasVSX()) {
10853 switch (IntrinsicID) {
10854 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10855 CompareOpc = 99;
10856 break;
10857 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10858 CompareOpc = 115;
10859 break;
10860 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10861 CompareOpc = 107;
10862 break;
10863 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10864 CompareOpc = 67;
10865 break;
10866 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10867 CompareOpc = 83;
10868 break;
10869 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10870 CompareOpc = 75;
10871 break;
10872 }
10873 isDot = true;
10874 } else
10875 return false;
10876 break;
10877
10878 // Normal Comparisons.
10879 case Intrinsic::ppc_altivec_vcmpbfp:
10880 CompareOpc = 966;
10881 break;
10882 case Intrinsic::ppc_altivec_vcmpeqfp:
10883 CompareOpc = 198;
10884 break;
10885 case Intrinsic::ppc_altivec_vcmpequb:
10886 CompareOpc = 6;
10887 break;
10888 case Intrinsic::ppc_altivec_vcmpequh:
10889 CompareOpc = 70;
10890 break;
10891 case Intrinsic::ppc_altivec_vcmpequw:
10892 CompareOpc = 134;
10893 break;
10894 case Intrinsic::ppc_altivec_vcmpequd:
10895 if (Subtarget.hasP8Altivec())
10896 CompareOpc = 199;
10897 else
10898 return false;
10899 break;
10900 case Intrinsic::ppc_altivec_vcmpneb:
10901 case Intrinsic::ppc_altivec_vcmpneh:
10902 case Intrinsic::ppc_altivec_vcmpnew:
10903 case Intrinsic::ppc_altivec_vcmpnezb:
10904 case Intrinsic::ppc_altivec_vcmpnezh:
10905 case Intrinsic::ppc_altivec_vcmpnezw:
10906 if (Subtarget.hasP9Altivec())
10907 switch (IntrinsicID) {
10908 default:
10909 llvm_unreachable("Unknown comparison intrinsic.");
10910 case Intrinsic::ppc_altivec_vcmpneb:
10911 CompareOpc = 7;
10912 break;
10913 case Intrinsic::ppc_altivec_vcmpneh:
10914 CompareOpc = 71;
10915 break;
10916 case Intrinsic::ppc_altivec_vcmpnew:
10917 CompareOpc = 135;
10918 break;
10919 case Intrinsic::ppc_altivec_vcmpnezb:
10920 CompareOpc = 263;
10921 break;
10922 case Intrinsic::ppc_altivec_vcmpnezh:
10923 CompareOpc = 327;
10924 break;
10925 case Intrinsic::ppc_altivec_vcmpnezw:
10926 CompareOpc = 391;
10927 break;
10928 }
10929 else
10930 return false;
10931 break;
10932 case Intrinsic::ppc_altivec_vcmpgefp:
10933 CompareOpc = 454;
10934 break;
10935 case Intrinsic::ppc_altivec_vcmpgtfp:
10936 CompareOpc = 710;
10937 break;
10938 case Intrinsic::ppc_altivec_vcmpgtsb:
10939 CompareOpc = 774;
10940 break;
10941 case Intrinsic::ppc_altivec_vcmpgtsh:
10942 CompareOpc = 838;
10943 break;
10944 case Intrinsic::ppc_altivec_vcmpgtsw:
10945 CompareOpc = 902;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpgtsd:
10948 if (Subtarget.hasP8Altivec())
10949 CompareOpc = 967;
10950 else
10951 return false;
10952 break;
10953 case Intrinsic::ppc_altivec_vcmpgtub:
10954 CompareOpc = 518;
10955 break;
10956 case Intrinsic::ppc_altivec_vcmpgtuh:
10957 CompareOpc = 582;
10958 break;
10959 case Intrinsic::ppc_altivec_vcmpgtuw:
10960 CompareOpc = 646;
10961 break;
10962 case Intrinsic::ppc_altivec_vcmpgtud:
10963 if (Subtarget.hasP8Altivec())
10964 CompareOpc = 711;
10965 else
10966 return false;
10967 break;
10968 case Intrinsic::ppc_altivec_vcmpequq_p:
10969 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10970 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10971 if (!Subtarget.isISA3_1())
10972 return false;
10973 switch (IntrinsicID) {
10974 default:
10975 llvm_unreachable("Unknown comparison intrinsic.");
10976 case Intrinsic::ppc_altivec_vcmpequq_p:
10977 CompareOpc = 455;
10978 break;
10979 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10980 CompareOpc = 903;
10981 break;
10982 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10983 CompareOpc = 647;
10984 break;
10985 }
10986 isDot = true;
10987 break;
10988 }
10989 return true;
10990}
10991
10992/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10993/// lower, do it, otherwise return null.
10994SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10995 SelectionDAG &DAG) const {
10996 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10997
10998 SDLoc dl(Op);
10999
11000 switch (IntrinsicID) {
11001 case Intrinsic::thread_pointer:
11002 // Reads the thread pointer register, used for __builtin_thread_pointer.
11003 if (Subtarget.isPPC64())
11004 return DAG.getRegister(PPC::X13, MVT::i64);
11005 return DAG.getRegister(PPC::R2, MVT::i32);
11006
11007 case Intrinsic::ppc_rldimi: {
11008 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11009 SDValue Src = Op.getOperand(1);
11010 APInt Mask = Op.getConstantOperandAPInt(4);
11011 if (Mask.isZero())
11012 return Op.getOperand(2);
11013 if (Mask.isAllOnes())
11014 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11015 uint64_t SH = Op.getConstantOperandVal(3);
11016 unsigned MB = 0, ME = 0;
11017 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11018 report_fatal_error("invalid rldimi mask!");
11019 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11020 if (ME < 63 - SH) {
11021 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11022 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11023 } else if (ME > 63 - SH) {
11024 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11025 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11026 }
11027 return SDValue(
11028 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11029 {Op.getOperand(2), Src,
11030 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11031 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11032 0);
11033 }
11034
11035 case Intrinsic::ppc_rlwimi: {
11036 APInt Mask = Op.getConstantOperandAPInt(4);
11037 if (Mask.isZero())
11038 return Op.getOperand(2);
11039 if (Mask.isAllOnes())
11040 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11041 Op.getOperand(3));
11042 unsigned MB = 0, ME = 0;
11043 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11044 report_fatal_error("invalid rlwimi mask!");
11045 return SDValue(DAG.getMachineNode(
11046 PPC::RLWIMI, dl, MVT::i32,
11047 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11048 DAG.getTargetConstant(MB, dl, MVT::i32),
11049 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11050 0);
11051 }
11052
11053 case Intrinsic::ppc_rlwnm: {
11054 if (Op.getConstantOperandVal(3) == 0)
11055 return DAG.getConstant(0, dl, MVT::i32);
11056 unsigned MB = 0, ME = 0;
11057 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11058 report_fatal_error("invalid rlwnm mask!");
11059 return SDValue(
11060 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11061 {Op.getOperand(1), Op.getOperand(2),
11062 DAG.getTargetConstant(MB, dl, MVT::i32),
11063 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11064 0);
11065 }
11066
11067 case Intrinsic::ppc_mma_disassemble_acc: {
11068 if (Subtarget.isISAFuture()) {
11069 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11070 SDValue WideVec =
11071 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11072 Op.getOperand(1)),
11073 0);
11075 SDValue Value = SDValue(WideVec.getNode(), 0);
11076 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11077
11078 SDValue Extract;
11079 Extract = DAG.getNode(
11080 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11081 Subtarget.isLittleEndian() ? Value2 : Value,
11082 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11083 dl, getPointerTy(DAG.getDataLayout())));
11084 RetOps.push_back(Extract);
11085 Extract = DAG.getNode(
11086 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11087 Subtarget.isLittleEndian() ? Value2 : Value,
11088 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11089 dl, getPointerTy(DAG.getDataLayout())));
11090 RetOps.push_back(Extract);
11091 Extract = DAG.getNode(
11092 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11093 Subtarget.isLittleEndian() ? Value : Value2,
11094 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11095 dl, getPointerTy(DAG.getDataLayout())));
11096 RetOps.push_back(Extract);
11097 Extract = DAG.getNode(
11098 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11099 Subtarget.isLittleEndian() ? Value : Value2,
11100 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11101 dl, getPointerTy(DAG.getDataLayout())));
11102 RetOps.push_back(Extract);
11103 return DAG.getMergeValues(RetOps, dl);
11104 }
11105 [[fallthrough]];
11106 }
11107 case Intrinsic::ppc_vsx_disassemble_pair: {
11108 int NumVecs = 2;
11109 SDValue WideVec = Op.getOperand(1);
11110 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11111 NumVecs = 4;
11112 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11113 }
11115 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11116 SDValue Extract = DAG.getNode(
11117 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11118 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11119 : VecNo,
11120 dl, getPointerTy(DAG.getDataLayout())));
11121 RetOps.push_back(Extract);
11122 }
11123 return DAG.getMergeValues(RetOps, dl);
11124 }
11125
11126 case Intrinsic::ppc_mma_build_dmr: {
11129 for (int i = 1; i < 9; i += 2) {
11130 SDValue Hi = Op.getOperand(i);
11131 SDValue Lo = Op.getOperand(i + 1);
11132 if (Hi->getOpcode() == ISD::LOAD)
11133 Chains.push_back(Hi.getValue(1));
11134 if (Lo->getOpcode() == ISD::LOAD)
11135 Chains.push_back(Lo.getValue(1));
11136 Pairs.push_back(
11137 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11138 }
11139 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11140 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11141 return DAG.getMergeValues({Value, TF}, dl);
11142 }
11143
11144 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11145 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11146 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11147 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11148 "Specify P of 0 or 1 for lower or upper 512 bytes");
11149 unsigned HiLo = Idx->getSExtValue();
11150 unsigned Opcode;
11151 unsigned Subx;
11152 if (HiLo == 0) {
11153 Opcode = PPC::DMXXEXTFDMR512;
11154 Subx = PPC::sub_wacc_lo;
11155 } else {
11156 Opcode = PPC::DMXXEXTFDMR512_HI;
11157 Subx = PPC::sub_wacc_hi;
11158 }
11159 SDValue Subreg(
11160 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11161 Op.getOperand(1),
11162 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11163 0);
11164 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11165 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11166 }
11167
11168 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11169 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11170 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11171 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11172 "Specify a dmr row pair 0-3");
11173 unsigned IdxVal = Idx->getSExtValue();
11174 unsigned Subx;
11175 switch (IdxVal) {
11176 case 0:
11177 Subx = PPC::sub_dmrrowp0;
11178 break;
11179 case 1:
11180 Subx = PPC::sub_dmrrowp1;
11181 break;
11182 case 2:
11183 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11184 break;
11185 case 3:
11186 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11187 break;
11188 }
11189 SDValue Subreg(
11190 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11191 Op.getOperand(1),
11192 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11193 0);
11194 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11195 return SDValue(
11196 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11197 0);
11198 }
11199
11200 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11201 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11202 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11203 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11204 "Specify P of 0 or 1 for lower or upper 512 bytes");
11205 unsigned HiLo = Idx->getSExtValue();
11206 unsigned Opcode;
11207 unsigned Subx;
11208 if (HiLo == 0) {
11209 Opcode = PPC::DMXXINSTDMR512;
11210 Subx = PPC::sub_wacc_lo;
11211 } else {
11212 Opcode = PPC::DMXXINSTDMR512_HI;
11213 Subx = PPC::sub_wacc_hi;
11214 }
11215 SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
11216 SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
11217 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11218 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11219 Op.getOperand(1), Wacc, SubReg),
11220 0);
11221 }
11222
11223 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11224 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11225 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11226 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11227 "Specify a dmr row pair 0-3");
11228 unsigned IdxVal = Idx->getSExtValue();
11229 unsigned Subx;
11230 switch (IdxVal) {
11231 case 0:
11232 Subx = PPC::sub_dmrrowp0;
11233 break;
11234 case 1:
11235 Subx = PPC::sub_dmrrowp1;
11236 break;
11237 case 2:
11238 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11239 break;
11240 case 3:
11241 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11242 break;
11243 }
11244 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11245 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11246 SDValue Ops[] = {Op.getOperand(2), P};
11247 SDValue DMRRowp = SDValue(
11248 DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
11249 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11250 Op.getOperand(1), DMRRowp, SubReg),
11251 0);
11252 }
11253
11254 case Intrinsic::ppc_mma_xxmfacc:
11255 case Intrinsic::ppc_mma_xxmtacc: {
11256 // Allow pre-isa-future subtargets to lower as normal.
11257 if (!Subtarget.isISAFuture())
11258 return SDValue();
11259 // The intrinsics for xxmtacc and xxmfacc take one argument of
11260 // type v512i1, for future cpu the corresponding wacc instruction
11261 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11262 // the need to produce the xxm[t|f]acc.
11263 SDValue WideVec = Op.getOperand(1);
11264 DAG.ReplaceAllUsesWith(Op, WideVec);
11265 return SDValue();
11266 }
11267
11268 case Intrinsic::ppc_unpack_longdouble: {
11269 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11270 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11271 "Argument of long double unpack must be 0 or 1!");
11272 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11273 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11274 Idx->getValueType(0)));
11275 }
11276
11277 case Intrinsic::ppc_compare_exp_lt:
11278 case Intrinsic::ppc_compare_exp_gt:
11279 case Intrinsic::ppc_compare_exp_eq:
11280 case Intrinsic::ppc_compare_exp_uo: {
11281 unsigned Pred;
11282 switch (IntrinsicID) {
11283 case Intrinsic::ppc_compare_exp_lt:
11284 Pred = PPC::PRED_LT;
11285 break;
11286 case Intrinsic::ppc_compare_exp_gt:
11287 Pred = PPC::PRED_GT;
11288 break;
11289 case Intrinsic::ppc_compare_exp_eq:
11290 Pred = PPC::PRED_EQ;
11291 break;
11292 case Intrinsic::ppc_compare_exp_uo:
11293 Pred = PPC::PRED_UN;
11294 break;
11295 }
11296 return SDValue(
11297 DAG.getMachineNode(
11298 PPC::SELECT_CC_I4, dl, MVT::i32,
11299 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11300 Op.getOperand(1), Op.getOperand(2)),
11301 0),
11302 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11303 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11304 0);
11305 }
11306 case Intrinsic::ppc_test_data_class: {
11307 EVT OpVT = Op.getOperand(1).getValueType();
11308 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11309 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11310 : PPC::XSTSTDCSP);
11311 return SDValue(
11312 DAG.getMachineNode(
11313 PPC::SELECT_CC_I4, dl, MVT::i32,
11314 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11315 Op.getOperand(1)),
11316 0),
11317 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11318 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11319 0);
11320 }
11321 case Intrinsic::ppc_fnmsub: {
11322 EVT VT = Op.getOperand(1).getValueType();
11323 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11324 return DAG.getNode(
11325 ISD::FNEG, dl, VT,
11326 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11327 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11328 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11329 Op.getOperand(2), Op.getOperand(3));
11330 }
11331 case Intrinsic::ppc_convert_f128_to_ppcf128:
11332 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11333 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11334 ? RTLIB::CONVERT_PPCF128_F128
11335 : RTLIB::CONVERT_F128_PPCF128;
11336 MakeLibCallOptions CallOptions;
11337 std::pair<SDValue, SDValue> Result =
11338 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11339 dl, SDValue());
11340 return Result.first;
11341 }
11342 case Intrinsic::ppc_maxfe:
11343 case Intrinsic::ppc_maxfl:
11344 case Intrinsic::ppc_maxfs:
11345 case Intrinsic::ppc_minfe:
11346 case Intrinsic::ppc_minfl:
11347 case Intrinsic::ppc_minfs: {
11348 EVT VT = Op.getValueType();
11349 assert(
11350 all_of(Op->ops().drop_front(4),
11351 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11352 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11353 (void)VT;
11355 if (IntrinsicID == Intrinsic::ppc_minfe ||
11356 IntrinsicID == Intrinsic::ppc_minfl ||
11357 IntrinsicID == Intrinsic::ppc_minfs)
11358 CC = ISD::SETLT;
11359 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11360 SDValue Res = Op.getOperand(I);
11361 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11362 Res =
11363 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11364 }
11365 return Res;
11366 }
11367 }
11368
11369 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11370 // opcode number of the comparison.
11371 int CompareOpc;
11372 bool isDot;
11373 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11374 return SDValue(); // Don't custom lower most intrinsics.
11375
11376 // If this is a non-dot comparison, make the VCMP node and we are done.
11377 if (!isDot) {
11378 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11379 Op.getOperand(1), Op.getOperand(2),
11380 DAG.getConstant(CompareOpc, dl, MVT::i32));
11381 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11382 }
11383
11384 // Create the PPCISD altivec 'dot' comparison node.
11385 SDValue Ops[] = {
11386 Op.getOperand(2), // LHS
11387 Op.getOperand(3), // RHS
11388 DAG.getConstant(CompareOpc, dl, MVT::i32)
11389 };
11390 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11391 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11392
11393 // Unpack the result based on how the target uses it.
11394 unsigned BitNo; // Bit # of CR6.
11395 bool InvertBit; // Invert result?
11396 unsigned Bitx;
11397 unsigned SetOp;
11398 switch (Op.getConstantOperandVal(1)) {
11399 default: // Can't happen, don't crash on invalid number though.
11400 case 0: // Return the value of the EQ bit of CR6.
11401 BitNo = 0;
11402 InvertBit = false;
11403 Bitx = PPC::sub_eq;
11404 SetOp = PPCISD::SETBC;
11405 break;
11406 case 1: // Return the inverted value of the EQ bit of CR6.
11407 BitNo = 0;
11408 InvertBit = true;
11409 Bitx = PPC::sub_eq;
11410 SetOp = PPCISD::SETBCR;
11411 break;
11412 case 2: // Return the value of the LT bit of CR6.
11413 BitNo = 2;
11414 InvertBit = false;
11415 Bitx = PPC::sub_lt;
11416 SetOp = PPCISD::SETBC;
11417 break;
11418 case 3: // Return the inverted value of the LT bit of CR6.
11419 BitNo = 2;
11420 InvertBit = true;
11421 Bitx = PPC::sub_lt;
11422 SetOp = PPCISD::SETBCR;
11423 break;
11424 }
11425
11426 SDValue GlueOp = CompNode.getValue(1);
11427 if (Subtarget.isISA3_1()) {
11428 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11429 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11430 SDValue CRBit =
11431 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11432 CR6Reg, SubRegIdx, GlueOp),
11433 0);
11434 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11435 }
11436
11437 // Now that we have the comparison, emit a copy from the CR to a GPR.
11438 // This is flagged to the above dot comparison.
11439 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11440 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11441
11442 // Shift the bit into the low position.
11443 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11444 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11445 // Isolate the bit.
11446 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11447 DAG.getConstant(1, dl, MVT::i32));
11448
11449 // If we are supposed to, toggle the bit.
11450 if (InvertBit)
11451 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11452 DAG.getConstant(1, dl, MVT::i32));
11453 return Flags;
11454}
11455
11456SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11457 SelectionDAG &DAG) const {
11458 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11459 // the beginning of the argument list.
11460 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11461 SDLoc DL(Op);
11462 switch (Op.getConstantOperandVal(ArgStart)) {
11463 case Intrinsic::ppc_cfence: {
11464 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11465 SDValue Val = Op.getOperand(ArgStart + 1);
11466 EVT Ty = Val.getValueType();
11467 if (Ty == MVT::i128) {
11468 // FIXME: Testing one of two paired registers is sufficient to guarantee
11469 // ordering?
11470 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11471 }
11472 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11473 return SDValue(
11474 DAG.getMachineNode(
11475 Opcode, DL, MVT::Other,
11476 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11477 Op.getOperand(0)),
11478 0);
11479 }
11480 case Intrinsic::ppc_mma_disassemble_dmr: {
11481 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11482 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11483 }
11484 default:
11485 break;
11486 }
11487 return SDValue();
11488}
11489
11490// Lower scalar BSWAP64 to xxbrd.
11491SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11492 SDLoc dl(Op);
11493 if (!Subtarget.isPPC64())
11494 return Op;
11495 // MTVSRDD
11496 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11497 Op.getOperand(0));
11498 // XXBRD
11499 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11500 // MFVSRD
11501 int VectorIndex = 0;
11502 if (Subtarget.isLittleEndian())
11503 VectorIndex = 1;
11504 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11505 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11506 return Op;
11507}
11508
11509// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11510// compared to a value that is atomically loaded (atomic loads zero-extend).
11511SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11512 SelectionDAG &DAG) const {
11513 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11514 "Expecting an atomic compare-and-swap here.");
11515 SDLoc dl(Op);
11516 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11517 EVT MemVT = AtomicNode->getMemoryVT();
11518 if (MemVT.getSizeInBits() >= 32)
11519 return Op;
11520
11521 SDValue CmpOp = Op.getOperand(2);
11522 // If this is already correctly zero-extended, leave it alone.
11523 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11524 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11525 return Op;
11526
11527 // Clear the high bits of the compare operand.
11528 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11529 SDValue NewCmpOp =
11530 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11531 DAG.getConstant(MaskVal, dl, MVT::i32));
11532
11533 // Replace the existing compare operand with the properly zero-extended one.
11535 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11536 Ops.push_back(AtomicNode->getOperand(i));
11537 Ops[2] = NewCmpOp;
11538 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11539 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11540 auto NodeTy =
11541 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11542 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11543}
11544
11545SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11546 SelectionDAG &DAG) const {
11547 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11548 EVT MemVT = N->getMemoryVT();
11549 assert(MemVT.getSimpleVT() == MVT::i128 &&
11550 "Expect quadword atomic operations");
11551 SDLoc dl(N);
11552 unsigned Opc = N->getOpcode();
11553 switch (Opc) {
11554 case ISD::ATOMIC_LOAD: {
11555 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11556 // lowered to ppc instructions by pattern matching instruction selector.
11557 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11559 N->getOperand(0),
11560 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11561 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11562 Ops.push_back(N->getOperand(I));
11563 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11564 Ops, MemVT, N->getMemOperand());
11565 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11566 SDValue ValHi =
11567 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11568 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11569 DAG.getConstant(64, dl, MVT::i32));
11570 SDValue Val =
11571 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11572 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11573 {Val, LoadedVal.getValue(2)});
11574 }
11575 case ISD::ATOMIC_STORE: {
11576 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11577 // lowered to ppc instructions by pattern matching instruction selector.
11578 SDVTList Tys = DAG.getVTList(MVT::Other);
11580 N->getOperand(0),
11581 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11582 SDValue Val = N->getOperand(1);
11583 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11584 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11585 DAG.getConstant(64, dl, MVT::i32));
11586 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11587 Ops.push_back(ValLo);
11588 Ops.push_back(ValHi);
11589 Ops.push_back(N->getOperand(2));
11590 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11591 N->getMemOperand());
11592 }
11593 default:
11594 llvm_unreachable("Unexpected atomic opcode");
11595 }
11596}
11597
11599 SelectionDAG &DAG,
11600 const PPCSubtarget &Subtarget) {
11601 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11602
11603 enum DataClassMask {
11604 DC_NAN = 1 << 6,
11605 DC_NEG_INF = 1 << 4,
11606 DC_POS_INF = 1 << 5,
11607 DC_NEG_ZERO = 1 << 2,
11608 DC_POS_ZERO = 1 << 3,
11609 DC_NEG_SUBNORM = 1,
11610 DC_POS_SUBNORM = 1 << 1,
11611 };
11612
11613 EVT VT = Op.getValueType();
11614
11615 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11616 : VT == MVT::f64 ? PPC::XSTSTDCDP
11617 : PPC::XSTSTDCSP;
11618
11619 if (Mask == fcAllFlags)
11620 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11621 if (Mask == 0)
11622 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11623
11624 // When it's cheaper or necessary to test reverse flags.
11625 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11626 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11627 return DAG.getNOT(Dl, Rev, MVT::i1);
11628 }
11629
11630 // Power doesn't support testing whether a value is 'normal'. Test the rest
11631 // first, and test if it's 'not not-normal' with expected sign.
11632 if (Mask & fcNormal) {
11633 SDValue Rev(DAG.getMachineNode(
11634 TestOp, Dl, MVT::i32,
11635 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11636 DC_NEG_ZERO | DC_POS_ZERO |
11637 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11638 Dl, MVT::i32),
11639 Op),
11640 0);
11641 // Sign are stored in CR bit 0, result are in CR bit 2.
11642 SDValue Sign(
11643 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11644 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11645 0);
11646 SDValue Normal(DAG.getNOT(
11647 Dl,
11649 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11650 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11651 0),
11652 MVT::i1));
11653 if (Mask & fcPosNormal)
11654 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11655 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11656 if (Mask == fcPosNormal || Mask == fcNegNormal)
11657 return Result;
11658
11659 return DAG.getNode(
11660 ISD::OR, Dl, MVT::i1,
11661 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11662 }
11663
11664 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11665 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11666 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11667 bool IsQuiet = Mask & fcQNan;
11668 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11669
11670 // Quietness is determined by the first bit in fraction field.
11671 uint64_t QuietMask = 0;
11672 SDValue HighWord;
11673 if (VT == MVT::f128) {
11674 HighWord = DAG.getNode(
11675 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11676 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11677 QuietMask = 0x8000;
11678 } else if (VT == MVT::f64) {
11679 if (Subtarget.isPPC64()) {
11680 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11681 DAG.getBitcast(MVT::i64, Op),
11682 DAG.getConstant(1, Dl, MVT::i32));
11683 } else {
11684 SDValue Vec = DAG.getBitcast(
11685 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11686 HighWord = DAG.getNode(
11687 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11688 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11689 }
11690 QuietMask = 0x80000;
11691 } else if (VT == MVT::f32) {
11692 HighWord = DAG.getBitcast(MVT::i32, Op);
11693 QuietMask = 0x400000;
11694 }
11695 SDValue NanRes = DAG.getSetCC(
11696 Dl, MVT::i1,
11697 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11698 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11699 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11700 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11701 if (Mask == fcQNan || Mask == fcSNan)
11702 return NanRes;
11703
11704 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11705 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11706 NanRes);
11707 }
11708
11709 unsigned NativeMask = 0;
11710 if ((Mask & fcNan) == fcNan)
11711 NativeMask |= DC_NAN;
11712 if (Mask & fcNegInf)
11713 NativeMask |= DC_NEG_INF;
11714 if (Mask & fcPosInf)
11715 NativeMask |= DC_POS_INF;
11716 if (Mask & fcNegZero)
11717 NativeMask |= DC_NEG_ZERO;
11718 if (Mask & fcPosZero)
11719 NativeMask |= DC_POS_ZERO;
11720 if (Mask & fcNegSubnormal)
11721 NativeMask |= DC_NEG_SUBNORM;
11722 if (Mask & fcPosSubnormal)
11723 NativeMask |= DC_POS_SUBNORM;
11724 return SDValue(
11725 DAG.getMachineNode(
11726 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11728 TestOp, Dl, MVT::i32,
11729 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11730 0),
11731 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11732 0);
11733}
11734
11735SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11736 SelectionDAG &DAG) const {
11737 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11738 SDValue LHS = Op.getOperand(0);
11739 uint64_t RHSC = Op.getConstantOperandVal(1);
11740 SDLoc Dl(Op);
11741 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11742 if (LHS.getValueType() == MVT::ppcf128) {
11743 // The higher part determines the value class.
11744 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11745 DAG.getConstant(1, Dl, MVT::i32));
11746 }
11747
11748 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11749}
11750
11751// Adjust the length value for a load/store with length to account for the
11752// instructions requiring a left justified length, and for non-byte element
11753// types requiring scaling by element size.
11754static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11755 SelectionDAG &DAG) {
11756 SDLoc dl(Val);
11757 EVT VT = Val->getValueType(0);
11758 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11759 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11760 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11761 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11762}
11763
11764SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11765 auto VPLD = cast<VPLoadSDNode>(Op);
11766 bool Future = Subtarget.isISAFuture();
11767 SDLoc dl(Op);
11768 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11769 "Mask predication not supported");
11770 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11771 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11772 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11773 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11774 Len = AdjustLength(Len, EltBits, !Future, DAG);
11775 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11776 VPLD->getOperand(1), Len};
11777 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11778 SDValue VPL =
11780 VPLD->getMemoryVT(), VPLD->getMemOperand());
11781 return VPL;
11782}
11783
11784SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11785 auto VPST = cast<VPStoreSDNode>(Op);
11786 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11787 "Mask predication not supported");
11788 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11789 SDLoc dl(Op);
11790 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11791 unsigned EltBits =
11792 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11793 bool Future = Subtarget.isISAFuture();
11794 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11795 Len = AdjustLength(Len, EltBits, !Future, DAG);
11796 SDValue Ops[] = {
11797 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11798 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11799 VPST->getOperand(2), Len};
11800 SDVTList Tys = DAG.getVTList(MVT::Other);
11801 SDValue VPS =
11803 VPST->getMemoryVT(), VPST->getMemOperand());
11804 return VPS;
11805}
11806
11807SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11808 SelectionDAG &DAG) const {
11809 SDLoc dl(Op);
11810
11811 MachineFunction &MF = DAG.getMachineFunction();
11812 SDValue Op0 = Op.getOperand(0);
11813 EVT ValVT = Op0.getValueType();
11814 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11815 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11816 int64_t IntVal = Op.getConstantOperandVal(0);
11817 if (IntVal >= -16 && IntVal <= 15)
11818 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11819 dl);
11820 }
11821
11822 ReuseLoadInfo RLI;
11823 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11824 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11825 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11826 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11827
11828 MachineMemOperand *MMO =
11830 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11831 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11833 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11834 MVT::i32, MMO);
11835 if (RLI.ResChain)
11836 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11837 return Bits.getValue(0);
11838 }
11839
11840 // Create a stack slot that is 16-byte aligned.
11841 MachineFrameInfo &MFI = MF.getFrameInfo();
11842 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11843 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11844 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11845
11846 SDValue Val = Op0;
11847 // P10 hardware store forwarding requires that a single store contains all
11848 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11849 // to avoid load hit store on P10 when running binaries compiled for older
11850 // processors by generating two mergeable scalar stores to forward with the
11851 // vector load.
11852 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11853 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11854 ValVT.getSizeInBits() <= 64) {
11855 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11856 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11857 SDValue ShiftBy = DAG.getConstant(
11858 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11859 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11860 SDValue Plus8 =
11861 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11862 SDValue Store2 =
11863 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11864 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11865 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11866 MachinePointerInfo());
11867 }
11868
11869 // Store the input value into Value#0 of the stack slot.
11870 SDValue Store =
11871 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11872 // Load it out.
11873 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11874}
11875
11876SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11877 SelectionDAG &DAG) const {
11878 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11879 "Should only be called for ISD::INSERT_VECTOR_ELT");
11880
11881 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11882
11883 EVT VT = Op.getValueType();
11884 SDLoc dl(Op);
11885 SDValue V1 = Op.getOperand(0);
11886 SDValue V2 = Op.getOperand(1);
11887
11888 if (VT == MVT::v2f64 && C)
11889 return Op;
11890
11891 if (Subtarget.hasP9Vector()) {
11892 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11893 // because on P10, it allows this specific insert_vector_elt load pattern to
11894 // utilize the refactored load and store infrastructure in order to exploit
11895 // prefixed loads.
11896 // On targets with inexpensive direct moves (Power9 and up), a
11897 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11898 // load since a single precision load will involve conversion to double
11899 // precision on the load followed by another conversion to single precision.
11900 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11901 (isa<LoadSDNode>(V2))) {
11902 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11903 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11904 SDValue InsVecElt =
11905 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11906 BitcastLoad, Op.getOperand(2));
11907 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11908 }
11909 }
11910
11911 if (Subtarget.isISA3_1()) {
11912 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11913 return SDValue();
11914 // On P10, we have legal lowering for constant and variable indices for
11915 // all vectors.
11916 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11917 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11918 return Op;
11919 }
11920
11921 // Before P10, we have legal lowering for constant indices but not for
11922 // variable ones.
11923 if (!C)
11924 return SDValue();
11925
11926 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11927 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11928 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11929 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11930 unsigned InsertAtElement = C->getZExtValue();
11931 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11932 if (Subtarget.isLittleEndian()) {
11933 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11934 }
11935 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11936 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11937 }
11938 return Op;
11939}
11940
11941SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11942 SelectionDAG &DAG) const {
11943 SDLoc dl(Op);
11944 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11945 SDValue LoadChain = LN->getChain();
11946 SDValue BasePtr = LN->getBasePtr();
11947 EVT VT = Op.getValueType();
11948 bool IsV1024i1 = VT == MVT::v1024i1;
11949 bool IsV2048i1 = VT == MVT::v2048i1;
11950
11951 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
11952 // Dense Math dmr pair registers, respectively.
11953 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
11954 (void)IsV2048i1;
11955 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11956 "Dense Math support required.");
11957 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11958
11960 SmallVector<SDValue, 8> LoadChains;
11961
11962 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
11963 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
11964 MachineMemOperand *MMO = LN->getMemOperand();
11965 unsigned NumVecs = VT.getSizeInBits() / 256;
11966 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11967 MachineMemOperand *NewMMO =
11968 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
11969 if (Idx > 0) {
11970 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11971 DAG.getConstant(32, dl, BasePtr.getValueType()));
11972 LoadOps[2] = BasePtr;
11973 }
11975 DAG.getVTList(MVT::v256i1, MVT::Other),
11976 LoadOps, MVT::v256i1, NewMMO);
11977 LoadChains.push_back(Ld.getValue(1));
11978 Loads.push_back(Ld);
11979 }
11980
11981 if (Subtarget.isLittleEndian()) {
11982 std::reverse(Loads.begin(), Loads.end());
11983 std::reverse(LoadChains.begin(), LoadChains.end());
11984 }
11985
11986 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11987 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[0],
11988 Loads[1]),
11989 0);
11990 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
11991 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
11992 Loads[2], Loads[3]),
11993 0);
11994 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
11995 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
11996 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11997
11998 SDValue Value =
11999 SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12000
12001 if (IsV1024i1) {
12002 return DAG.getMergeValues({Value, TF}, dl);
12003 }
12004
12005 // Handle Loads for V2048i1 which represents a dmr pair.
12006 SDValue DmrPValue;
12007 SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
12008 Loads[4], Loads[5]),
12009 0);
12010 SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12011 Loads[6], Loads[7]),
12012 0);
12013 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
12014 SDValue Dmr1Value = SDValue(
12015 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
12016
12017 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12018 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12019
12020 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12021 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12022
12023 DmrPValue = SDValue(
12024 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12025
12026 return DAG.getMergeValues({DmrPValue, TF}, dl);
12027}
12028
12029SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12030 const SDLoc &dl,
12031 SelectionDAG &DAG) const {
12032 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
12033 Pairs[1]),
12034 0);
12035 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12036 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12037 Pairs[2], Pairs[3]),
12038 0);
12039 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12040 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12041
12042 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12043 {RC, Lo, LoSub, Hi, HiSub}),
12044 0);
12045}
12046
12047SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12048 SelectionDAG &DAG) const {
12049 SDLoc dl(Op);
12050 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12051 SDValue LoadChain = LN->getChain();
12052 SDValue BasePtr = LN->getBasePtr();
12053 EVT VT = Op.getValueType();
12054
12055 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12056 return LowerDMFVectorLoad(Op, DAG);
12057
12058 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12059 return Op;
12060
12061 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12062 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12063 // 2 or 4 vsx registers.
12064 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12065 "Type unsupported without MMA");
12066 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12067 "Type unsupported without paired vector support");
12068 Align Alignment = LN->getAlign();
12070 SmallVector<SDValue, 4> LoadChains;
12071 unsigned NumVecs = VT.getSizeInBits() / 128;
12072 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12073 SDValue Load =
12074 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12075 LN->getPointerInfo().getWithOffset(Idx * 16),
12076 commonAlignment(Alignment, Idx * 16),
12077 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12078 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12079 DAG.getConstant(16, dl, BasePtr.getValueType()));
12080 Loads.push_back(Load);
12081 LoadChains.push_back(Load.getValue(1));
12082 }
12083 if (Subtarget.isLittleEndian()) {
12084 std::reverse(Loads.begin(), Loads.end());
12085 std::reverse(LoadChains.begin(), LoadChains.end());
12086 }
12087 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12088 SDValue Value =
12089 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12090 dl, VT, Loads);
12091 SDValue RetOps[] = {Value, TF};
12092 return DAG.getMergeValues(RetOps, dl);
12093}
12094
12095SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12096 SelectionDAG &DAG) const {
12097
12098 SDLoc dl(Op);
12099 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12100 SDValue StoreChain = SN->getChain();
12101 SDValue BasePtr = SN->getBasePtr();
12104 EVT VT = SN->getValue().getValueType();
12105 bool IsV1024i1 = VT == MVT::v1024i1;
12106 bool IsV2048i1 = VT == MVT::v2048i1;
12107
12108 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12109 // Dense Math dmr pair registers, respectively.
12110 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12111 (void)IsV2048i1;
12112 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12113 "Dense Math support required.");
12114 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12115
12116 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12117 if (IsV1024i1) {
12119 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12120 Op.getOperand(1),
12121 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12122 0);
12124 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12125 Op.getOperand(1),
12126 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12127 0);
12128 MachineSDNode *ExtNode =
12129 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12130 Values.push_back(SDValue(ExtNode, 0));
12131 Values.push_back(SDValue(ExtNode, 1));
12132 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12133 Values.push_back(SDValue(ExtNode, 0));
12134 Values.push_back(SDValue(ExtNode, 1));
12135 } else {
12136 // This corresponds to v2048i1 which represents a dmr pair.
12137 SDValue Dmr0(
12138 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12139 Op.getOperand(1),
12140 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12141 0);
12142
12143 SDValue Dmr1(
12144 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12145 Op.getOperand(1),
12146 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12147 0);
12148
12149 SDValue Dmr0Lo(DAG.getMachineNode(
12150 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12151 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12152 0);
12153
12154 SDValue Dmr0Hi(DAG.getMachineNode(
12155 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12156 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12157 0);
12158
12159 SDValue Dmr1Lo(DAG.getMachineNode(
12160 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12161 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12162 0);
12163
12164 SDValue Dmr1Hi(DAG.getMachineNode(
12165 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12166 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12167 0);
12168
12169 MachineSDNode *ExtNode =
12170 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12171 Values.push_back(SDValue(ExtNode, 0));
12172 Values.push_back(SDValue(ExtNode, 1));
12173 ExtNode =
12174 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12175 Values.push_back(SDValue(ExtNode, 0));
12176 Values.push_back(SDValue(ExtNode, 1));
12177 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12178 Values.push_back(SDValue(ExtNode, 0));
12179 Values.push_back(SDValue(ExtNode, 1));
12180 ExtNode =
12181 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12182 Values.push_back(SDValue(ExtNode, 0));
12183 Values.push_back(SDValue(ExtNode, 1));
12184 }
12185
12186 if (Subtarget.isLittleEndian())
12187 std::reverse(Values.begin(), Values.end());
12188
12189 SDVTList Tys = DAG.getVTList(MVT::Other);
12191 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12192 Values[0], BasePtr};
12193 MachineMemOperand *MMO = SN->getMemOperand();
12194 unsigned NumVecs = VT.getSizeInBits() / 256;
12195 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12196 MachineMemOperand *NewMMO =
12197 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12198 if (Idx > 0) {
12199 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12200 DAG.getConstant(32, dl, BasePtr.getValueType()));
12201 Ops[3] = BasePtr;
12202 }
12203 Ops[2] = Values[Idx];
12205 MVT::v256i1, NewMMO);
12206 Stores.push_back(St);
12207 }
12208
12209 SDValue TF = DAG.getTokenFactor(dl, Stores);
12210 return TF;
12211}
12212
12213SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12214 SelectionDAG &DAG) const {
12215 SDLoc dl(Op);
12216 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12217 SDValue StoreChain = SN->getChain();
12218 SDValue BasePtr = SN->getBasePtr();
12219 SDValue Value = SN->getValue();
12220 SDValue Value2 = SN->getValue();
12221 EVT StoreVT = Value.getValueType();
12222
12223 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12224 return LowerDMFVectorStore(Op, DAG);
12225
12226 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12227 return Op;
12228
12229 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12230 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12231 // underlying registers individually.
12232 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12233 "Type unsupported without MMA");
12234 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12235 "Type unsupported without paired vector support");
12236 Align Alignment = SN->getAlign();
12238 unsigned NumVecs = 2;
12239 if (StoreVT == MVT::v512i1) {
12240 if (Subtarget.isISAFuture()) {
12241 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12242 MachineSDNode *ExtNode = DAG.getMachineNode(
12243 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12244
12245 Value = SDValue(ExtNode, 0);
12246 Value2 = SDValue(ExtNode, 1);
12247 } else
12248 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12249 NumVecs = 4;
12250 }
12251 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12252 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12253 SDValue Elt;
12254 if (Subtarget.isISAFuture()) {
12255 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12256 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12257 Idx > 1 ? Value2 : Value,
12258 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12259 } else
12260 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12261 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12262
12263 SDValue Store =
12264 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12265 SN->getPointerInfo().getWithOffset(Idx * 16),
12266 commonAlignment(Alignment, Idx * 16),
12267 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12268 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12269 DAG.getConstant(16, dl, BasePtr.getValueType()));
12270 Stores.push_back(Store);
12271 }
12272 SDValue TF = DAG.getTokenFactor(dl, Stores);
12273 return TF;
12274}
12275
12276SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12277 SDLoc dl(Op);
12278 if (Op.getValueType() == MVT::v4i32) {
12279 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12280
12281 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12282 // +16 as shift amt.
12283 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12284 SDValue RHSSwap = // = vrlw RHS, 16
12285 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12286
12287 // Shrinkify inputs to v8i16.
12288 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12289 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12290 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12291
12292 // Low parts multiplied together, generating 32-bit results (we ignore the
12293 // top parts).
12294 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12295 LHS, RHS, DAG, dl, MVT::v4i32);
12296
12297 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12298 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12299 // Shift the high parts up 16 bits.
12300 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12301 Neg16, DAG, dl);
12302 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12303 } else if (Op.getValueType() == MVT::v16i8) {
12304 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12305 bool isLittleEndian = Subtarget.isLittleEndian();
12306
12307 // Multiply the even 8-bit parts, producing 16-bit sums.
12308 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12309 LHS, RHS, DAG, dl, MVT::v8i16);
12310 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12311
12312 // Multiply the odd 8-bit parts, producing 16-bit sums.
12313 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12314 LHS, RHS, DAG, dl, MVT::v8i16);
12315 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12316
12317 // Merge the results together. Because vmuleub and vmuloub are
12318 // instructions with a big-endian bias, we must reverse the
12319 // element numbering and reverse the meaning of "odd" and "even"
12320 // when generating little endian code.
12321 int Ops[16];
12322 for (unsigned i = 0; i != 8; ++i) {
12323 if (isLittleEndian) {
12324 Ops[i*2 ] = 2*i;
12325 Ops[i*2+1] = 2*i+16;
12326 } else {
12327 Ops[i*2 ] = 2*i+1;
12328 Ops[i*2+1] = 2*i+1+16;
12329 }
12330 }
12331 if (isLittleEndian)
12332 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12333 else
12334 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12335 } else {
12336 llvm_unreachable("Unknown mul to lower!");
12337 }
12338}
12339
12340SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12341 bool IsStrict = Op->isStrictFPOpcode();
12342 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12343 !Subtarget.hasP9Vector())
12344 return SDValue();
12345
12346 return Op;
12347}
12348
12349// Custom lowering for fpext vf32 to v2f64
12350SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12351
12352 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12353 "Should only be called for ISD::FP_EXTEND");
12354
12355 // FIXME: handle extends from half precision float vectors on P9.
12356 // We only want to custom lower an extend from v2f32 to v2f64.
12357 if (Op.getValueType() != MVT::v2f64 ||
12358 Op.getOperand(0).getValueType() != MVT::v2f32)
12359 return SDValue();
12360
12361 SDLoc dl(Op);
12362 SDValue Op0 = Op.getOperand(0);
12363
12364 switch (Op0.getOpcode()) {
12365 default:
12366 return SDValue();
12368 assert(Op0.getNumOperands() == 2 &&
12370 "Node should have 2 operands with second one being a constant!");
12371
12372 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12373 return SDValue();
12374
12375 // Custom lower is only done for high or low doubleword.
12376 int Idx = Op0.getConstantOperandVal(1);
12377 if (Idx % 2 != 0)
12378 return SDValue();
12379
12380 // Since input is v4f32, at this point Idx is either 0 or 2.
12381 // Shift to get the doubleword position we want.
12382 int DWord = Idx >> 1;
12383
12384 // High and low word positions are different on little endian.
12385 if (Subtarget.isLittleEndian())
12386 DWord ^= 0x1;
12387
12388 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12389 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12390 }
12391 case ISD::FADD:
12392 case ISD::FMUL:
12393 case ISD::FSUB: {
12394 SDValue NewLoad[2];
12395 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12396 // Ensure both input are loads.
12397 SDValue LdOp = Op0.getOperand(i);
12398 if (LdOp.getOpcode() != ISD::LOAD)
12399 return SDValue();
12400 // Generate new load node.
12401 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12402 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12403 NewLoad[i] = DAG.getMemIntrinsicNode(
12404 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12405 LD->getMemoryVT(), LD->getMemOperand());
12406 }
12407 SDValue NewOp =
12408 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12409 NewLoad[1], Op0.getNode()->getFlags());
12410 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12411 DAG.getConstant(0, dl, MVT::i32));
12412 }
12413 case ISD::LOAD: {
12414 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12415 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12416 SDValue NewLd = DAG.getMemIntrinsicNode(
12417 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12418 LD->getMemoryVT(), LD->getMemOperand());
12419 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12420 DAG.getConstant(0, dl, MVT::i32));
12421 }
12422 }
12423 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12424}
12425
12427 SelectionDAG &DAG,
12428 const PPCSubtarget &STI) {
12429 SDLoc DL(Value);
12430 if (STI.useCRBits())
12431 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12432 DAG.getConstant(1, DL, SumType),
12433 DAG.getConstant(0, DL, SumType));
12434 else
12435 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12436 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12437 Value, DAG.getAllOnesConstant(DL, SumType));
12438 return Sum.getValue(1);
12439}
12440
12442 EVT CarryType, SelectionDAG &DAG,
12443 const PPCSubtarget &STI) {
12444 SDLoc DL(Flag);
12445 SDValue Zero = DAG.getConstant(0, DL, SumType);
12446 SDValue Carry = DAG.getNode(
12447 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12448 if (STI.useCRBits())
12449 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12450 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12451}
12452
12453SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12454
12455 SDLoc DL(Op);
12456 SDNode *N = Op.getNode();
12457 EVT VT = N->getValueType(0);
12458 EVT CarryType = N->getValueType(1);
12459 unsigned Opc = N->getOpcode();
12460 bool IsAdd = Opc == ISD::UADDO;
12461 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12462 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12463 N->getOperand(0), N->getOperand(1));
12464 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12465 DAG, Subtarget);
12466 if (!IsAdd)
12467 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12468 DAG.getConstant(1UL, DL, CarryType));
12469 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12470}
12471
12472SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12473 SelectionDAG &DAG) const {
12474 SDLoc DL(Op);
12475 SDNode *N = Op.getNode();
12476 unsigned Opc = N->getOpcode();
12477 EVT VT = N->getValueType(0);
12478 EVT CarryType = N->getValueType(1);
12479 SDValue CarryOp = N->getOperand(2);
12480 bool IsAdd = Opc == ISD::UADDO_CARRY;
12481 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12482 if (!IsAdd)
12483 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12484 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12485 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12486 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12487 Op.getOperand(0), Op.getOperand(1), CarryOp);
12488 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12489 Subtarget);
12490 if (!IsAdd)
12491 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12492 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12493 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12494}
12495
12496SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12497
12498 SDLoc dl(Op);
12499 SDValue LHS = Op.getOperand(0);
12500 SDValue RHS = Op.getOperand(1);
12501 EVT VT = Op.getNode()->getValueType(0);
12502
12503 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12504
12505 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12506 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12507
12508 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12509
12510 SDValue Overflow =
12511 DAG.getNode(ISD::SRL, dl, VT, And,
12512 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12513
12514 SDValue OverflowTrunc =
12515 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12516
12517 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12518}
12519
12520/// Implements signed add with overflow detection using the rule:
12521/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12522SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12523
12524 SDLoc dl(Op);
12525 SDValue LHS = Op.getOperand(0);
12526 SDValue RHS = Op.getOperand(1);
12527 EVT VT = Op.getNode()->getValueType(0);
12528
12529 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12530
12531 // Compute ~(x xor y)
12532 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12533 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12534 // Compute (s xor x)
12535 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12536
12537 // overflow = (x eqv y) & (s xor x)
12538 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12539
12540 // Shift sign bit down to LSB
12541 SDValue Overflow =
12542 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12543 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12544 // Truncate to the overflow type (i1)
12545 SDValue OverflowTrunc =
12546 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12547
12548 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12549}
12550
12551// Lower unsigned 3-way compare producing -1/0/1.
12552SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12553 SDLoc DL(Op);
12554 SDValue A = DAG.getFreeze(Op.getOperand(0));
12555 SDValue B = DAG.getFreeze(Op.getOperand(1));
12556 EVT OpVT = A.getValueType(); // operand type
12557 EVT ResVT = Op.getValueType(); // result type
12558
12559 // First compute diff = A - B (will become subf).
12560 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12561
12562 // Generate B - A using SUBC to capture carry.
12563 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12564 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12565 SDValue CA0 = SubC.getValue(1);
12566
12567 // t2 = A - B + CA0 using SUBE.
12568 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12569 SDValue CA1 = SubE1.getValue(1);
12570
12571 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12572 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12573
12574 // Extract the first result and truncate to result type if needed
12575 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12576}
12577
12578/// LowerOperation - Provide custom lowering hooks for some operations.
12579///
12581 switch (Op.getOpcode()) {
12582 default:
12583 llvm_unreachable("Wasn't expecting to be able to lower this!");
12584 case ISD::FPOW: return lowerPow(Op, DAG);
12585 case ISD::FSIN: return lowerSin(Op, DAG);
12586 case ISD::FCOS: return lowerCos(Op, DAG);
12587 case ISD::FLOG: return lowerLog(Op, DAG);
12588 case ISD::FLOG10: return lowerLog10(Op, DAG);
12589 case ISD::FEXP: return lowerExp(Op, DAG);
12590 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12591 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12592 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12593 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12594 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12595 case ISD::STRICT_FSETCC:
12597 case ISD::SETCC: return LowerSETCC(Op, DAG);
12598 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12599 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12600 case ISD::SSUBO:
12601 return LowerSSUBO(Op, DAG);
12602 case ISD::SADDO:
12603 return LowerSADDO(Op, DAG);
12604
12605 case ISD::INLINEASM:
12606 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12607 // Variable argument lowering.
12608 case ISD::VASTART: return LowerVASTART(Op, DAG);
12609 case ISD::VAARG: return LowerVAARG(Op, DAG);
12610 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12611
12612 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12613 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12614 case ISD::GET_DYNAMIC_AREA_OFFSET:
12615 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12616
12617 // Exception handling lowering.
12618 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12619 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12620 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12621
12622 case ISD::LOAD: return LowerLOAD(Op, DAG);
12623 case ISD::STORE: return LowerSTORE(Op, DAG);
12624 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12625 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12628 case ISD::FP_TO_UINT:
12629 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12632 case ISD::UINT_TO_FP:
12633 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12634 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12635 case ISD::SET_ROUNDING:
12636 return LowerSET_ROUNDING(Op, DAG);
12637
12638 // Lower 64-bit shifts.
12639 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12640 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12641 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12642
12643 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12644 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12645
12646 // Vector-related lowering.
12647 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12648 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12649 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12650 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12651 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12652 case ISD::MUL: return LowerMUL(Op, DAG);
12653 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12655 case ISD::FP_ROUND:
12656 return LowerFP_ROUND(Op, DAG);
12657 case ISD::ROTL: return LowerROTL(Op, DAG);
12658
12659 // For counter-based loop handling.
12660 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12661
12662 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12663
12664 // Frame & Return address.
12665 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12666 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12667
12669 return LowerINTRINSIC_VOID(Op, DAG);
12670 case ISD::BSWAP:
12671 return LowerBSWAP(Op, DAG);
12672 case ISD::ATOMIC_CMP_SWAP:
12673 return LowerATOMIC_CMP_SWAP(Op, DAG);
12674 case ISD::ATOMIC_STORE:
12675 return LowerATOMIC_LOAD_STORE(Op, DAG);
12676 case ISD::IS_FPCLASS:
12677 return LowerIS_FPCLASS(Op, DAG);
12678 case ISD::UADDO:
12679 case ISD::USUBO:
12680 return LowerADDSUBO(Op, DAG);
12681 case ISD::UADDO_CARRY:
12682 case ISD::USUBO_CARRY:
12683 return LowerADDSUBO_CARRY(Op, DAG);
12684 case ISD::UCMP:
12685 return LowerUCMP(Op, DAG);
12686 case ISD::STRICT_LRINT:
12687 case ISD::STRICT_LLRINT:
12688 case ISD::STRICT_LROUND:
12691 if (Op->getFlags().hasNoFPExcept())
12692 return Op;
12693 return SDValue();
12694 case ISD::VP_LOAD:
12695 return LowerVP_LOAD(Op, DAG);
12696 case ISD::VP_STORE:
12697 return LowerVP_STORE(Op, DAG);
12698 }
12699}
12700
12703 SelectionDAG &DAG) const {
12704 SDLoc dl(N);
12705 switch (N->getOpcode()) {
12706 default:
12707 llvm_unreachable("Do not know how to custom type legalize this operation!");
12708 case ISD::ATOMIC_LOAD: {
12709 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12710 Results.push_back(Res);
12711 Results.push_back(Res.getValue(1));
12712 break;
12713 }
12714 case ISD::READCYCLECOUNTER: {
12715 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12716 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12717
12718 Results.push_back(
12719 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12720 Results.push_back(RTB.getValue(2));
12721 break;
12722 }
12724 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12725 break;
12726
12727 assert(N->getValueType(0) == MVT::i1 &&
12728 "Unexpected result type for CTR decrement intrinsic");
12729 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12730 N->getValueType(0));
12731 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12732 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12733 N->getOperand(1));
12734
12735 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12736 Results.push_back(NewInt.getValue(1));
12737 break;
12738 }
12740 switch (N->getConstantOperandVal(0)) {
12741 case Intrinsic::ppc_pack_longdouble:
12742 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12743 N->getOperand(2), N->getOperand(1)));
12744 break;
12745 case Intrinsic::ppc_maxfe:
12746 case Intrinsic::ppc_minfe:
12747 case Intrinsic::ppc_fnmsub:
12748 case Intrinsic::ppc_convert_f128_to_ppcf128:
12749 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12750 break;
12751 }
12752 break;
12753 }
12754 case ISD::VAARG: {
12755 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12756 return;
12757
12758 EVT VT = N->getValueType(0);
12759
12760 if (VT == MVT::i64) {
12761 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12762
12763 Results.push_back(NewNode);
12764 Results.push_back(NewNode.getValue(1));
12765 }
12766 return;
12767 }
12770 case ISD::FP_TO_SINT:
12771 case ISD::FP_TO_UINT: {
12772 // LowerFP_TO_INT() can only handle f32 and f64.
12773 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12774 MVT::ppcf128)
12775 return;
12776 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12777 Results.push_back(LoweredValue);
12778 if (N->isStrictFPOpcode())
12779 Results.push_back(LoweredValue.getValue(1));
12780 return;
12781 }
12782 case ISD::TRUNCATE: {
12783 if (!N->getValueType(0).isVector())
12784 return;
12785 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12786 if (Lowered)
12787 Results.push_back(Lowered);
12788 return;
12789 }
12790 case ISD::SCALAR_TO_VECTOR: {
12791 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12792 if (Lowered)
12793 Results.push_back(Lowered);
12794 return;
12795 }
12796 case ISD::FSHL:
12797 case ISD::FSHR:
12798 // Don't handle funnel shifts here.
12799 return;
12800 case ISD::BITCAST:
12801 // Don't handle bitcast here.
12802 return;
12803 case ISD::FP_EXTEND:
12804 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12805 if (Lowered)
12806 Results.push_back(Lowered);
12807 return;
12808 }
12809}
12810
12811//===----------------------------------------------------------------------===//
12812// Other Lowering Code
12813//===----------------------------------------------------------------------===//
12814
12816 return Builder.CreateIntrinsic(Id, {});
12817}
12818
12820 Value *Addr,
12821 AtomicOrdering Ord) const {
12822 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12823
12824 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12825 "Only 8/16/32/64-bit atomic loads supported");
12826 Intrinsic::ID IntID;
12827 switch (SZ) {
12828 default:
12829 llvm_unreachable("Unexpected PrimitiveSize");
12830 case 8:
12831 IntID = Intrinsic::ppc_lbarx;
12832 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12833 break;
12834 case 16:
12835 IntID = Intrinsic::ppc_lharx;
12836 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12837 break;
12838 case 32:
12839 IntID = Intrinsic::ppc_lwarx;
12840 break;
12841 case 64:
12842 IntID = Intrinsic::ppc_ldarx;
12843 break;
12844 }
12845 Value *Call =
12846 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12847
12848 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12849}
12850
12851// Perform a store-conditional operation to Addr. Return the status of the
12852// store. This should be 0 if the store succeeded, non-zero otherwise.
12854 Value *Val, Value *Addr,
12855 AtomicOrdering Ord) const {
12856 Type *Ty = Val->getType();
12857 unsigned SZ = Ty->getPrimitiveSizeInBits();
12858
12859 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12860 "Only 8/16/32/64-bit atomic loads supported");
12861 Intrinsic::ID IntID;
12862 switch (SZ) {
12863 default:
12864 llvm_unreachable("Unexpected PrimitiveSize");
12865 case 8:
12866 IntID = Intrinsic::ppc_stbcx;
12867 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12868 break;
12869 case 16:
12870 IntID = Intrinsic::ppc_sthcx;
12871 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12872 break;
12873 case 32:
12874 IntID = Intrinsic::ppc_stwcx;
12875 break;
12876 case 64:
12877 IntID = Intrinsic::ppc_stdcx;
12878 break;
12879 }
12880
12881 if (SZ == 8 || SZ == 16)
12882 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12883
12884 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12885 /*FMFSource=*/nullptr, "stcx");
12886 return Builder.CreateXor(Call, Builder.getInt32(1));
12887}
12888
12889// The mappings for emitLeading/TrailingFence is taken from
12890// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12892 Instruction *Inst,
12893 AtomicOrdering Ord) const {
12895 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12896 if (isReleaseOrStronger(Ord))
12897 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12898 return nullptr;
12899}
12900
12902 Instruction *Inst,
12903 AtomicOrdering Ord) const {
12904 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12905 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12906 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12907 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12908 if (isa<LoadInst>(Inst))
12909 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12910 {Inst});
12911 // FIXME: Can use isync for rmw operation.
12912 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12913 }
12914 return nullptr;
12915}
12916
12919 unsigned AtomicSize,
12920 unsigned BinOpcode,
12921 unsigned CmpOpcode,
12922 unsigned CmpPred) const {
12923 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12924 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12925
12926 auto LoadMnemonic = PPC::LDARX;
12927 auto StoreMnemonic = PPC::STDCX;
12928 switch (AtomicSize) {
12929 default:
12930 llvm_unreachable("Unexpected size of atomic entity");
12931 case 1:
12932 LoadMnemonic = PPC::LBARX;
12933 StoreMnemonic = PPC::STBCX;
12934 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12935 break;
12936 case 2:
12937 LoadMnemonic = PPC::LHARX;
12938 StoreMnemonic = PPC::STHCX;
12939 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12940 break;
12941 case 4:
12942 LoadMnemonic = PPC::LWARX;
12943 StoreMnemonic = PPC::STWCX;
12944 break;
12945 case 8:
12946 LoadMnemonic = PPC::LDARX;
12947 StoreMnemonic = PPC::STDCX;
12948 break;
12949 }
12950
12951 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12952 MachineFunction *F = BB->getParent();
12954
12955 Register dest = MI.getOperand(0).getReg();
12956 Register ptrA = MI.getOperand(1).getReg();
12957 Register ptrB = MI.getOperand(2).getReg();
12958 Register incr = MI.getOperand(3).getReg();
12959 DebugLoc dl = MI.getDebugLoc();
12960
12961 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12962 MachineBasicBlock *loop2MBB =
12963 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12964 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12965 F->insert(It, loopMBB);
12966 if (CmpOpcode)
12967 F->insert(It, loop2MBB);
12968 F->insert(It, exitMBB);
12969 exitMBB->splice(exitMBB->begin(), BB,
12970 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12972
12973 MachineRegisterInfo &RegInfo = F->getRegInfo();
12974 Register TmpReg = (!BinOpcode) ? incr :
12975 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12976 : &PPC::GPRCRegClass);
12977
12978 // thisMBB:
12979 // ...
12980 // fallthrough --> loopMBB
12981 BB->addSuccessor(loopMBB);
12982
12983 // loopMBB:
12984 // l[wd]arx dest, ptr
12985 // add r0, dest, incr
12986 // st[wd]cx. r0, ptr
12987 // bne- loopMBB
12988 // fallthrough --> exitMBB
12989
12990 // For max/min...
12991 // loopMBB:
12992 // l[wd]arx dest, ptr
12993 // cmpl?[wd] dest, incr
12994 // bgt exitMBB
12995 // loop2MBB:
12996 // st[wd]cx. dest, ptr
12997 // bne- loopMBB
12998 // fallthrough --> exitMBB
12999
13000 BB = loopMBB;
13001 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13002 .addReg(ptrA).addReg(ptrB);
13003 if (BinOpcode)
13004 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13005 if (CmpOpcode) {
13006 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13007 // Signed comparisons of byte or halfword values must be sign-extended.
13008 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13009 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13010 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13011 ExtReg).addReg(dest);
13012 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13013 } else
13014 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13015
13016 BuildMI(BB, dl, TII->get(PPC::BCC))
13017 .addImm(CmpPred)
13018 .addReg(CrReg)
13019 .addMBB(exitMBB);
13020 BB->addSuccessor(loop2MBB);
13021 BB->addSuccessor(exitMBB);
13022 BB = loop2MBB;
13023 }
13024 BuildMI(BB, dl, TII->get(StoreMnemonic))
13025 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13026 BuildMI(BB, dl, TII->get(PPC::BCC))
13028 .addReg(PPC::CR0)
13029 .addMBB(loopMBB);
13030 BB->addSuccessor(loopMBB);
13031 BB->addSuccessor(exitMBB);
13032
13033 // exitMBB:
13034 // ...
13035 BB = exitMBB;
13036 return BB;
13037}
13038
13040 switch(MI.getOpcode()) {
13041 default:
13042 return false;
13043 case PPC::COPY:
13044 return TII->isSignExtended(MI.getOperand(1).getReg(),
13045 &MI.getMF()->getRegInfo());
13046 case PPC::LHA:
13047 case PPC::LHA8:
13048 case PPC::LHAU:
13049 case PPC::LHAU8:
13050 case PPC::LHAUX:
13051 case PPC::LHAUX8:
13052 case PPC::LHAX:
13053 case PPC::LHAX8:
13054 case PPC::LWA:
13055 case PPC::LWAUX:
13056 case PPC::LWAX:
13057 case PPC::LWAX_32:
13058 case PPC::LWA_32:
13059 case PPC::PLHA:
13060 case PPC::PLHA8:
13061 case PPC::PLHA8pc:
13062 case PPC::PLHApc:
13063 case PPC::PLWA:
13064 case PPC::PLWA8:
13065 case PPC::PLWA8pc:
13066 case PPC::PLWApc:
13067 case PPC::EXTSB:
13068 case PPC::EXTSB8:
13069 case PPC::EXTSB8_32_64:
13070 case PPC::EXTSB8_rec:
13071 case PPC::EXTSB_rec:
13072 case PPC::EXTSH:
13073 case PPC::EXTSH8:
13074 case PPC::EXTSH8_32_64:
13075 case PPC::EXTSH8_rec:
13076 case PPC::EXTSH_rec:
13077 case PPC::EXTSW:
13078 case PPC::EXTSWSLI:
13079 case PPC::EXTSWSLI_32_64:
13080 case PPC::EXTSWSLI_32_64_rec:
13081 case PPC::EXTSWSLI_rec:
13082 case PPC::EXTSW_32:
13083 case PPC::EXTSW_32_64:
13084 case PPC::EXTSW_32_64_rec:
13085 case PPC::EXTSW_rec:
13086 case PPC::SRAW:
13087 case PPC::SRAWI:
13088 case PPC::SRAWI_rec:
13089 case PPC::SRAW_rec:
13090 return true;
13091 }
13092 return false;
13093}
13094
13097 bool is8bit, // operation
13098 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13099 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13100 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13101
13102 // If this is a signed comparison and the value being compared is not known
13103 // to be sign extended, sign extend it here.
13104 DebugLoc dl = MI.getDebugLoc();
13105 MachineFunction *F = BB->getParent();
13106 MachineRegisterInfo &RegInfo = F->getRegInfo();
13107 Register incr = MI.getOperand(3).getReg();
13108 bool IsSignExtended =
13109 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13110
13111 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13112 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13113 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13114 .addReg(MI.getOperand(3).getReg());
13115 MI.getOperand(3).setReg(ValueReg);
13116 incr = ValueReg;
13117 }
13118 // If we support part-word atomic mnemonics, just use them
13119 if (Subtarget.hasPartwordAtomics())
13120 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13121 CmpPred);
13122
13123 // In 64 bit mode we have to use 64 bits for addresses, even though the
13124 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13125 // registers without caring whether they're 32 or 64, but here we're
13126 // doing actual arithmetic on the addresses.
13127 bool is64bit = Subtarget.isPPC64();
13128 bool isLittleEndian = Subtarget.isLittleEndian();
13129 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13130
13131 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13133
13134 Register dest = MI.getOperand(0).getReg();
13135 Register ptrA = MI.getOperand(1).getReg();
13136 Register ptrB = MI.getOperand(2).getReg();
13137
13138 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13139 MachineBasicBlock *loop2MBB =
13140 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13141 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13142 F->insert(It, loopMBB);
13143 if (CmpOpcode)
13144 F->insert(It, loop2MBB);
13145 F->insert(It, exitMBB);
13146 exitMBB->splice(exitMBB->begin(), BB,
13147 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13149
13150 const TargetRegisterClass *RC =
13151 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13152 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13153
13154 Register PtrReg = RegInfo.createVirtualRegister(RC);
13155 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13156 Register ShiftReg =
13157 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13158 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13159 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13160 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13161 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13162 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13163 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13164 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13165 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13166 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13167 Register Ptr1Reg;
13168 Register TmpReg =
13169 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13170
13171 // thisMBB:
13172 // ...
13173 // fallthrough --> loopMBB
13174 BB->addSuccessor(loopMBB);
13175
13176 // The 4-byte load must be aligned, while a char or short may be
13177 // anywhere in the word. Hence all this nasty bookkeeping code.
13178 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13179 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13180 // xori shift, shift1, 24 [16]
13181 // rlwinm ptr, ptr1, 0, 0, 29
13182 // slw incr2, incr, shift
13183 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13184 // slw mask, mask2, shift
13185 // loopMBB:
13186 // lwarx tmpDest, ptr
13187 // add tmp, tmpDest, incr2
13188 // andc tmp2, tmpDest, mask
13189 // and tmp3, tmp, mask
13190 // or tmp4, tmp3, tmp2
13191 // stwcx. tmp4, ptr
13192 // bne- loopMBB
13193 // fallthrough --> exitMBB
13194 // srw SrwDest, tmpDest, shift
13195 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13196 if (ptrA != ZeroReg) {
13197 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13198 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13199 .addReg(ptrA)
13200 .addReg(ptrB);
13201 } else {
13202 Ptr1Reg = ptrB;
13203 }
13204 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13205 // mode.
13206 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13207 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13208 .addImm(3)
13209 .addImm(27)
13210 .addImm(is8bit ? 28 : 27);
13211 if (!isLittleEndian)
13212 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13213 .addReg(Shift1Reg)
13214 .addImm(is8bit ? 24 : 16);
13215 if (is64bit)
13216 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13217 .addReg(Ptr1Reg)
13218 .addImm(0)
13219 .addImm(61);
13220 else
13221 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13222 .addReg(Ptr1Reg)
13223 .addImm(0)
13224 .addImm(0)
13225 .addImm(29);
13226 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13227 if (is8bit)
13228 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13229 else {
13230 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13231 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13232 .addReg(Mask3Reg)
13233 .addImm(65535);
13234 }
13235 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13236 .addReg(Mask2Reg)
13237 .addReg(ShiftReg);
13238
13239 BB = loopMBB;
13240 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13241 .addReg(ZeroReg)
13242 .addReg(PtrReg);
13243 if (BinOpcode)
13244 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13245 .addReg(Incr2Reg)
13246 .addReg(TmpDestReg);
13247 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13248 .addReg(TmpDestReg)
13249 .addReg(MaskReg);
13250 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13251 if (CmpOpcode) {
13252 // For unsigned comparisons, we can directly compare the shifted values.
13253 // For signed comparisons we shift and sign extend.
13254 Register SReg = RegInfo.createVirtualRegister(GPRC);
13255 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13256 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13257 .addReg(TmpDestReg)
13258 .addReg(MaskReg);
13259 unsigned ValueReg = SReg;
13260 unsigned CmpReg = Incr2Reg;
13261 if (CmpOpcode == PPC::CMPW) {
13262 ValueReg = RegInfo.createVirtualRegister(GPRC);
13263 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13264 .addReg(SReg)
13265 .addReg(ShiftReg);
13266 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13267 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13268 .addReg(ValueReg);
13269 ValueReg = ValueSReg;
13270 CmpReg = incr;
13271 }
13272 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13273 BuildMI(BB, dl, TII->get(PPC::BCC))
13274 .addImm(CmpPred)
13275 .addReg(CrReg)
13276 .addMBB(exitMBB);
13277 BB->addSuccessor(loop2MBB);
13278 BB->addSuccessor(exitMBB);
13279 BB = loop2MBB;
13280 }
13281 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13282 BuildMI(BB, dl, TII->get(PPC::STWCX))
13283 .addReg(Tmp4Reg)
13284 .addReg(ZeroReg)
13285 .addReg(PtrReg);
13286 BuildMI(BB, dl, TII->get(PPC::BCC))
13288 .addReg(PPC::CR0)
13289 .addMBB(loopMBB);
13290 BB->addSuccessor(loopMBB);
13291 BB->addSuccessor(exitMBB);
13292
13293 // exitMBB:
13294 // ...
13295 BB = exitMBB;
13296 // Since the shift amount is not a constant, we need to clear
13297 // the upper bits with a separate RLWINM.
13298 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13299 .addReg(SrwDestReg)
13300 .addImm(0)
13301 .addImm(is8bit ? 24 : 16)
13302 .addImm(31);
13303 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13304 .addReg(TmpDestReg)
13305 .addReg(ShiftReg);
13306 return BB;
13307}
13308
13311 MachineBasicBlock *MBB) const {
13312 DebugLoc DL = MI.getDebugLoc();
13313 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13314 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13315
13316 MachineFunction *MF = MBB->getParent();
13318
13319 const BasicBlock *BB = MBB->getBasicBlock();
13320 MachineFunction::iterator I = ++MBB->getIterator();
13321
13322 Register DstReg = MI.getOperand(0).getReg();
13323 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13324 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13325 Register mainDstReg = MRI.createVirtualRegister(RC);
13326 Register restoreDstReg = MRI.createVirtualRegister(RC);
13327
13328 MVT PVT = getPointerTy(MF->getDataLayout());
13329 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13330 "Invalid Pointer Size!");
13331 // For v = setjmp(buf), we generate
13332 //
13333 // thisMBB:
13334 // SjLjSetup mainMBB
13335 // bl mainMBB
13336 // v_restore = 1
13337 // b sinkMBB
13338 //
13339 // mainMBB:
13340 // buf[LabelOffset] = LR
13341 // v_main = 0
13342 //
13343 // sinkMBB:
13344 // v = phi(main, restore)
13345 //
13346
13347 MachineBasicBlock *thisMBB = MBB;
13348 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13349 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13350 MF->insert(I, mainMBB);
13351 MF->insert(I, sinkMBB);
13352
13354
13355 // Transfer the remainder of BB and its successor edges to sinkMBB.
13356 sinkMBB->splice(sinkMBB->begin(), MBB,
13357 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13359
13360 // Note that the structure of the jmp_buf used here is not compatible
13361 // with that used by libc, and is not designed to be. Specifically, it
13362 // stores only those 'reserved' registers that LLVM does not otherwise
13363 // understand how to spill. Also, by convention, by the time this
13364 // intrinsic is called, Clang has already stored the frame address in the
13365 // first slot of the buffer and stack address in the third. Following the
13366 // X86 target code, we'll store the jump address in the second slot. We also
13367 // need to save the TOC pointer (R2) to handle jumps between shared
13368 // libraries, and that will be stored in the fourth slot. The thread
13369 // identifier (R13) is not affected.
13370
13371 // thisMBB:
13372 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13373 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13374 const int64_t BPOffset = 4 * PVT.getStoreSize();
13375
13376 // Prepare IP either in reg.
13377 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13378 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13379 Register BufReg = MI.getOperand(1).getReg();
13380
13381 if (Subtarget.is64BitELFABI()) {
13382 setUsesTOCBasePtr(*MBB->getParent());
13383 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13384 .addReg(PPC::X2)
13385 .addImm(TOCOffset)
13386 .addReg(BufReg)
13387 .cloneMemRefs(MI);
13388 }
13389
13390 // Naked functions never have a base pointer, and so we use r1. For all
13391 // other functions, this decision must be delayed until during PEI.
13392 unsigned BaseReg;
13393 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13394 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13395 else
13396 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13397
13398 MIB = BuildMI(*thisMBB, MI, DL,
13399 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13400 .addReg(BaseReg)
13401 .addImm(BPOffset)
13402 .addReg(BufReg)
13403 .cloneMemRefs(MI);
13404
13405 // Setup
13406 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13407 MIB.addRegMask(TRI->getNoPreservedMask());
13408
13409 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13410
13411 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13412 .addMBB(mainMBB);
13413 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13414
13415 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13416 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13417
13418 // mainMBB:
13419 // mainDstReg = 0
13420 MIB =
13421 BuildMI(mainMBB, DL,
13422 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13423
13424 // Store IP
13425 if (Subtarget.isPPC64()) {
13426 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13427 .addReg(LabelReg)
13428 .addImm(LabelOffset)
13429 .addReg(BufReg);
13430 } else {
13431 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13432 .addReg(LabelReg)
13433 .addImm(LabelOffset)
13434 .addReg(BufReg);
13435 }
13436 MIB.cloneMemRefs(MI);
13437
13438 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13439 mainMBB->addSuccessor(sinkMBB);
13440
13441 // sinkMBB:
13442 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13443 TII->get(PPC::PHI), DstReg)
13444 .addReg(mainDstReg).addMBB(mainMBB)
13445 .addReg(restoreDstReg).addMBB(thisMBB);
13446
13447 MI.eraseFromParent();
13448 return sinkMBB;
13449}
13450
13453 MachineBasicBlock *MBB) const {
13454 DebugLoc DL = MI.getDebugLoc();
13455 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13456
13457 MachineFunction *MF = MBB->getParent();
13459
13460 MVT PVT = getPointerTy(MF->getDataLayout());
13461 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13462 "Invalid Pointer Size!");
13463
13464 const TargetRegisterClass *RC =
13465 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13466 Register Tmp = MRI.createVirtualRegister(RC);
13467 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13468 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13469 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13470 unsigned BP =
13471 (PVT == MVT::i64)
13472 ? PPC::X30
13473 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13474 : PPC::R30);
13475
13477
13478 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13479 const int64_t SPOffset = 2 * PVT.getStoreSize();
13480 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13481 const int64_t BPOffset = 4 * PVT.getStoreSize();
13482
13483 Register BufReg = MI.getOperand(0).getReg();
13484
13485 // Reload FP (the jumped-to function may not have had a
13486 // frame pointer, and if so, then its r31 will be restored
13487 // as necessary).
13488 if (PVT == MVT::i64) {
13489 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13490 .addImm(0)
13491 .addReg(BufReg);
13492 } else {
13493 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13494 .addImm(0)
13495 .addReg(BufReg);
13496 }
13497 MIB.cloneMemRefs(MI);
13498
13499 // Reload IP
13500 if (PVT == MVT::i64) {
13501 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13502 .addImm(LabelOffset)
13503 .addReg(BufReg);
13504 } else {
13505 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13506 .addImm(LabelOffset)
13507 .addReg(BufReg);
13508 }
13509 MIB.cloneMemRefs(MI);
13510
13511 // Reload SP
13512 if (PVT == MVT::i64) {
13513 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13514 .addImm(SPOffset)
13515 .addReg(BufReg);
13516 } else {
13517 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13518 .addImm(SPOffset)
13519 .addReg(BufReg);
13520 }
13521 MIB.cloneMemRefs(MI);
13522
13523 // Reload BP
13524 if (PVT == MVT::i64) {
13525 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13526 .addImm(BPOffset)
13527 .addReg(BufReg);
13528 } else {
13529 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13530 .addImm(BPOffset)
13531 .addReg(BufReg);
13532 }
13533 MIB.cloneMemRefs(MI);
13534
13535 // Reload TOC
13536 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13537 setUsesTOCBasePtr(*MBB->getParent());
13538 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13539 .addImm(TOCOffset)
13540 .addReg(BufReg)
13541 .cloneMemRefs(MI);
13542 }
13543
13544 // Jump
13545 BuildMI(*MBB, MI, DL,
13546 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13547 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13548
13549 MI.eraseFromParent();
13550 return MBB;
13551}
13552
13554 // If the function specifically requests inline stack probes, emit them.
13555 if (MF.getFunction().hasFnAttribute("probe-stack"))
13556 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13557 "inline-asm";
13558 return false;
13559}
13560
13562 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13563 unsigned StackAlign = TFI->getStackAlignment();
13564 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13565 "Unexpected stack alignment");
13566 // The default stack probe size is 4096 if the function has no
13567 // stack-probe-size attribute.
13568 const Function &Fn = MF.getFunction();
13569 unsigned StackProbeSize =
13570 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13571 // Round down to the stack alignment.
13572 StackProbeSize &= ~(StackAlign - 1);
13573 return StackProbeSize ? StackProbeSize : StackAlign;
13574}
13575
13576// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13577// into three phases. In the first phase, it uses pseudo instruction
13578// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13579// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13580// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13581// MaxCallFrameSize so that it can calculate correct data area pointer.
13584 MachineBasicBlock *MBB) const {
13585 const bool isPPC64 = Subtarget.isPPC64();
13586 MachineFunction *MF = MBB->getParent();
13587 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13588 DebugLoc DL = MI.getDebugLoc();
13589 const unsigned ProbeSize = getStackProbeSize(*MF);
13590 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13592 // The CFG of probing stack looks as
13593 // +-----+
13594 // | MBB |
13595 // +--+--+
13596 // |
13597 // +----v----+
13598 // +--->+ TestMBB +---+
13599 // | +----+----+ |
13600 // | | |
13601 // | +-----v----+ |
13602 // +---+ BlockMBB | |
13603 // +----------+ |
13604 // |
13605 // +---------+ |
13606 // | TailMBB +<--+
13607 // +---------+
13608 // In MBB, calculate previous frame pointer and final stack pointer.
13609 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13610 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13611 // TailMBB is spliced via \p MI.
13612 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13613 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13614 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13615
13616 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13617 MF->insert(MBBIter, TestMBB);
13618 MF->insert(MBBIter, BlockMBB);
13619 MF->insert(MBBIter, TailMBB);
13620
13621 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13622 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13623
13624 Register DstReg = MI.getOperand(0).getReg();
13625 Register NegSizeReg = MI.getOperand(1).getReg();
13626 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13627 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13628 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13629 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13630
13631 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13632 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13633 // NegSize.
13634 unsigned ProbeOpc;
13635 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13636 ProbeOpc =
13637 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13638 else
13639 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13640 // and NegSizeReg will be allocated in the same phyreg to avoid
13641 // redundant copy when NegSizeReg has only one use which is current MI and
13642 // will be replaced by PREPARE_PROBED_ALLOCA then.
13643 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13644 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13645 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13646 .addDef(ActualNegSizeReg)
13647 .addReg(NegSizeReg)
13648 .add(MI.getOperand(2))
13649 .add(MI.getOperand(3));
13650
13651 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13652 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13653 FinalStackPtr)
13654 .addReg(SPReg)
13655 .addReg(ActualNegSizeReg);
13656
13657 // Materialize a scratch register for update.
13658 int64_t NegProbeSize = -(int64_t)ProbeSize;
13659 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13660 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13661 if (!isInt<16>(NegProbeSize)) {
13662 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13663 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13664 .addImm(NegProbeSize >> 16);
13665 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13666 ScratchReg)
13667 .addReg(TempReg)
13668 .addImm(NegProbeSize & 0xFFFF);
13669 } else
13670 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13671 .addImm(NegProbeSize);
13672
13673 {
13674 // Probing leading residual part.
13675 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13676 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13677 .addReg(ActualNegSizeReg)
13678 .addReg(ScratchReg);
13679 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13680 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13681 .addReg(Div)
13682 .addReg(ScratchReg);
13683 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13684 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13685 .addReg(Mul)
13686 .addReg(ActualNegSizeReg);
13687 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13688 .addReg(FramePointer)
13689 .addReg(SPReg)
13690 .addReg(NegMod);
13691 }
13692
13693 {
13694 // Remaining part should be multiple of ProbeSize.
13695 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13696 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13697 .addReg(SPReg)
13698 .addReg(FinalStackPtr);
13699 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13701 .addReg(CmpResult)
13702 .addMBB(TailMBB);
13703 TestMBB->addSuccessor(BlockMBB);
13704 TestMBB->addSuccessor(TailMBB);
13705 }
13706
13707 {
13708 // Touch the block.
13709 // |P...|P...|P...
13710 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13711 .addReg(FramePointer)
13712 .addReg(SPReg)
13713 .addReg(ScratchReg);
13714 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13715 BlockMBB->addSuccessor(TestMBB);
13716 }
13717
13718 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13719 // DYNAREAOFFSET pseudo instruction to get the future result.
13720 Register MaxCallFrameSizeReg =
13721 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13722 BuildMI(TailMBB, DL,
13723 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13724 MaxCallFrameSizeReg)
13725 .add(MI.getOperand(2))
13726 .add(MI.getOperand(3));
13727 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13728 .addReg(SPReg)
13729 .addReg(MaxCallFrameSizeReg);
13730
13731 // Splice instructions after MI to TailMBB.
13732 TailMBB->splice(TailMBB->end(), MBB,
13733 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13735 MBB->addSuccessor(TestMBB);
13736
13737 // Delete the pseudo instruction.
13738 MI.eraseFromParent();
13739
13740 ++NumDynamicAllocaProbed;
13741 return TailMBB;
13742}
13743
13745 switch (MI.getOpcode()) {
13746 case PPC::SELECT_CC_I4:
13747 case PPC::SELECT_CC_I8:
13748 case PPC::SELECT_CC_F4:
13749 case PPC::SELECT_CC_F8:
13750 case PPC::SELECT_CC_F16:
13751 case PPC::SELECT_CC_VRRC:
13752 case PPC::SELECT_CC_VSFRC:
13753 case PPC::SELECT_CC_VSSRC:
13754 case PPC::SELECT_CC_VSRC:
13755 case PPC::SELECT_CC_SPE4:
13756 case PPC::SELECT_CC_SPE:
13757 return true;
13758 default:
13759 return false;
13760 }
13761}
13762
13763static bool IsSelect(MachineInstr &MI) {
13764 switch (MI.getOpcode()) {
13765 case PPC::SELECT_I4:
13766 case PPC::SELECT_I8:
13767 case PPC::SELECT_F4:
13768 case PPC::SELECT_F8:
13769 case PPC::SELECT_F16:
13770 case PPC::SELECT_SPE:
13771 case PPC::SELECT_SPE4:
13772 case PPC::SELECT_VRRC:
13773 case PPC::SELECT_VSFRC:
13774 case PPC::SELECT_VSSRC:
13775 case PPC::SELECT_VSRC:
13776 return true;
13777 default:
13778 return false;
13779 }
13780}
13781
13784 MachineBasicBlock *BB) const {
13785 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13786 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13787 if (Subtarget.is64BitELFABI() &&
13788 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13789 !Subtarget.isUsingPCRelativeCalls()) {
13790 // Call lowering should have added an r2 operand to indicate a dependence
13791 // on the TOC base pointer value. It can't however, because there is no
13792 // way to mark the dependence as implicit there, and so the stackmap code
13793 // will confuse it with a regular operand. Instead, add the dependence
13794 // here.
13795 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13796 }
13797
13798 return emitPatchPoint(MI, BB);
13799 }
13800
13801 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13802 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13803 return emitEHSjLjSetJmp(MI, BB);
13804 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13805 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13806 return emitEHSjLjLongJmp(MI, BB);
13807 }
13808
13809 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13810
13811 // To "insert" these instructions we actually have to insert their
13812 // control-flow patterns.
13813 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13815
13816 MachineFunction *F = BB->getParent();
13817 MachineRegisterInfo &MRI = F->getRegInfo();
13818
13819 if (Subtarget.hasISEL() &&
13820 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13821 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13822 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13824 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13825 MI.getOpcode() == PPC::SELECT_CC_I8)
13826 Cond.push_back(MI.getOperand(4));
13827 else
13829 Cond.push_back(MI.getOperand(1));
13830
13831 DebugLoc dl = MI.getDebugLoc();
13832 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13833 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13834 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13835 // The incoming instruction knows the destination vreg to set, the
13836 // condition code register to branch on, the true/false values to
13837 // select between, and a branch opcode to use.
13838
13839 // thisMBB:
13840 // ...
13841 // TrueVal = ...
13842 // cmpTY ccX, r1, r2
13843 // bCC sinkMBB
13844 // fallthrough --> copy0MBB
13845 MachineBasicBlock *thisMBB = BB;
13846 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13847 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13848 DebugLoc dl = MI.getDebugLoc();
13849 F->insert(It, copy0MBB);
13850 F->insert(It, sinkMBB);
13851
13852 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13853 copy0MBB->addLiveIn(PPC::CARRY);
13854 sinkMBB->addLiveIn(PPC::CARRY);
13855 }
13856
13857 // Set the call frame size on entry to the new basic blocks.
13858 // See https://reviews.llvm.org/D156113.
13859 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13860 copy0MBB->setCallFrameSize(CallFrameSize);
13861 sinkMBB->setCallFrameSize(CallFrameSize);
13862
13863 // Transfer the remainder of BB and its successor edges to sinkMBB.
13864 sinkMBB->splice(sinkMBB->begin(), BB,
13865 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13867
13868 // Next, add the true and fallthrough blocks as its successors.
13869 BB->addSuccessor(copy0MBB);
13870 BB->addSuccessor(sinkMBB);
13871
13872 if (IsSelect(MI)) {
13873 BuildMI(BB, dl, TII->get(PPC::BC))
13874 .addReg(MI.getOperand(1).getReg())
13875 .addMBB(sinkMBB);
13876 } else {
13877 unsigned SelectPred = MI.getOperand(4).getImm();
13878 BuildMI(BB, dl, TII->get(PPC::BCC))
13879 .addImm(SelectPred)
13880 .addReg(MI.getOperand(1).getReg())
13881 .addMBB(sinkMBB);
13882 }
13883
13884 // copy0MBB:
13885 // %FalseValue = ...
13886 // # fallthrough to sinkMBB
13887 BB = copy0MBB;
13888
13889 // Update machine-CFG edges
13890 BB->addSuccessor(sinkMBB);
13891
13892 // sinkMBB:
13893 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13894 // ...
13895 BB = sinkMBB;
13896 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13897 .addReg(MI.getOperand(3).getReg())
13898 .addMBB(copy0MBB)
13899 .addReg(MI.getOperand(2).getReg())
13900 .addMBB(thisMBB);
13901 } else if (MI.getOpcode() == PPC::ReadTB) {
13902 // To read the 64-bit time-base register on a 32-bit target, we read the
13903 // two halves. Should the counter have wrapped while it was being read, we
13904 // need to try again.
13905 // ...
13906 // readLoop:
13907 // mfspr Rx,TBU # load from TBU
13908 // mfspr Ry,TB # load from TB
13909 // mfspr Rz,TBU # load from TBU
13910 // cmpw crX,Rx,Rz # check if 'old'='new'
13911 // bne readLoop # branch if they're not equal
13912 // ...
13913
13914 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13915 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13916 DebugLoc dl = MI.getDebugLoc();
13917 F->insert(It, readMBB);
13918 F->insert(It, sinkMBB);
13919
13920 // Transfer the remainder of BB and its successor edges to sinkMBB.
13921 sinkMBB->splice(sinkMBB->begin(), BB,
13922 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13924
13925 BB->addSuccessor(readMBB);
13926 BB = readMBB;
13927
13928 MachineRegisterInfo &RegInfo = F->getRegInfo();
13929 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13930 Register LoReg = MI.getOperand(0).getReg();
13931 Register HiReg = MI.getOperand(1).getReg();
13932
13933 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13934 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13935 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13936
13937 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13938
13939 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13940 .addReg(HiReg)
13941 .addReg(ReadAgainReg);
13942 BuildMI(BB, dl, TII->get(PPC::BCC))
13944 .addReg(CmpReg)
13945 .addMBB(readMBB);
13946
13947 BB->addSuccessor(readMBB);
13948 BB->addSuccessor(sinkMBB);
13949 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13950 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13951 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13952 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13953 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13954 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13955 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13956 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13957
13958 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13959 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13960 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13961 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13962 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13963 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13964 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13965 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13966
13967 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13968 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13969 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13970 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13971 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13972 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13973 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13974 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13975
13976 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13977 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13978 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13979 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13980 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13981 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13982 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13983 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13984
13985 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13986 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13987 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13988 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13989 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13990 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13991 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13992 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13993
13994 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13995 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13996 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13997 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13998 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13999 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14000 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14001 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14002
14003 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14004 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14005 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14006 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14007 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14008 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14009 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14010 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14011
14012 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14013 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14014 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14015 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14016 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14017 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14018 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14019 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14020
14021 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14022 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14023 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14024 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14025 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14026 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14027 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14028 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14029
14030 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14031 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14032 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14033 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14034 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14035 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14036 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14037 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14038
14039 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14040 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14041 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14042 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14043 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14044 BB = EmitAtomicBinary(MI, BB, 4, 0);
14045 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14046 BB = EmitAtomicBinary(MI, BB, 8, 0);
14047 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14048 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14049 (Subtarget.hasPartwordAtomics() &&
14050 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14051 (Subtarget.hasPartwordAtomics() &&
14052 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14053 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14054
14055 auto LoadMnemonic = PPC::LDARX;
14056 auto StoreMnemonic = PPC::STDCX;
14057 switch (MI.getOpcode()) {
14058 default:
14059 llvm_unreachable("Compare and swap of unknown size");
14060 case PPC::ATOMIC_CMP_SWAP_I8:
14061 LoadMnemonic = PPC::LBARX;
14062 StoreMnemonic = PPC::STBCX;
14063 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14064 break;
14065 case PPC::ATOMIC_CMP_SWAP_I16:
14066 LoadMnemonic = PPC::LHARX;
14067 StoreMnemonic = PPC::STHCX;
14068 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14069 break;
14070 case PPC::ATOMIC_CMP_SWAP_I32:
14071 LoadMnemonic = PPC::LWARX;
14072 StoreMnemonic = PPC::STWCX;
14073 break;
14074 case PPC::ATOMIC_CMP_SWAP_I64:
14075 LoadMnemonic = PPC::LDARX;
14076 StoreMnemonic = PPC::STDCX;
14077 break;
14078 }
14079 MachineRegisterInfo &RegInfo = F->getRegInfo();
14080 Register dest = MI.getOperand(0).getReg();
14081 Register ptrA = MI.getOperand(1).getReg();
14082 Register ptrB = MI.getOperand(2).getReg();
14083 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14084 Register oldval = MI.getOperand(3).getReg();
14085 Register newval = MI.getOperand(4).getReg();
14086 DebugLoc dl = MI.getDebugLoc();
14087
14088 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14089 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14090 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14091 F->insert(It, loop1MBB);
14092 F->insert(It, loop2MBB);
14093 F->insert(It, exitMBB);
14094 exitMBB->splice(exitMBB->begin(), BB,
14095 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14097
14098 // thisMBB:
14099 // ...
14100 // fallthrough --> loopMBB
14101 BB->addSuccessor(loop1MBB);
14102
14103 // loop1MBB:
14104 // l[bhwd]arx dest, ptr
14105 // cmp[wd] dest, oldval
14106 // bne- exitBB
14107 // loop2MBB:
14108 // st[bhwd]cx. newval, ptr
14109 // bne- loopMBB
14110 // b exitBB
14111 // exitBB:
14112 BB = loop1MBB;
14113 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14114 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14115 .addReg(dest)
14116 .addReg(oldval);
14117 BuildMI(BB, dl, TII->get(PPC::BCC))
14119 .addReg(CrReg)
14120 .addMBB(exitMBB);
14121 BB->addSuccessor(loop2MBB);
14122 BB->addSuccessor(exitMBB);
14123
14124 BB = loop2MBB;
14125 BuildMI(BB, dl, TII->get(StoreMnemonic))
14126 .addReg(newval)
14127 .addReg(ptrA)
14128 .addReg(ptrB);
14129 BuildMI(BB, dl, TII->get(PPC::BCC))
14131 .addReg(PPC::CR0)
14132 .addMBB(loop1MBB);
14133 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14134 BB->addSuccessor(loop1MBB);
14135 BB->addSuccessor(exitMBB);
14136
14137 // exitMBB:
14138 // ...
14139 BB = exitMBB;
14140 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14141 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14142 // We must use 64-bit registers for addresses when targeting 64-bit,
14143 // since we're actually doing arithmetic on them. Other registers
14144 // can be 32-bit.
14145 bool is64bit = Subtarget.isPPC64();
14146 bool isLittleEndian = Subtarget.isLittleEndian();
14147 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14148
14149 Register dest = MI.getOperand(0).getReg();
14150 Register ptrA = MI.getOperand(1).getReg();
14151 Register ptrB = MI.getOperand(2).getReg();
14152 Register oldval = MI.getOperand(3).getReg();
14153 Register newval = MI.getOperand(4).getReg();
14154 DebugLoc dl = MI.getDebugLoc();
14155
14156 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14157 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14158 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14159 F->insert(It, loop1MBB);
14160 F->insert(It, loop2MBB);
14161 F->insert(It, exitMBB);
14162 exitMBB->splice(exitMBB->begin(), BB,
14163 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14165
14166 MachineRegisterInfo &RegInfo = F->getRegInfo();
14167 const TargetRegisterClass *RC =
14168 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14169 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14170
14171 Register PtrReg = RegInfo.createVirtualRegister(RC);
14172 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14173 Register ShiftReg =
14174 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14175 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14176 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14177 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14178 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14179 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14180 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14181 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14182 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14183 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14184 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14185 Register Ptr1Reg;
14186 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14187 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14188 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14189 // thisMBB:
14190 // ...
14191 // fallthrough --> loopMBB
14192 BB->addSuccessor(loop1MBB);
14193
14194 // The 4-byte load must be aligned, while a char or short may be
14195 // anywhere in the word. Hence all this nasty bookkeeping code.
14196 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14197 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14198 // xori shift, shift1, 24 [16]
14199 // rlwinm ptr, ptr1, 0, 0, 29
14200 // slw newval2, newval, shift
14201 // slw oldval2, oldval,shift
14202 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14203 // slw mask, mask2, shift
14204 // and newval3, newval2, mask
14205 // and oldval3, oldval2, mask
14206 // loop1MBB:
14207 // lwarx tmpDest, ptr
14208 // and tmp, tmpDest, mask
14209 // cmpw tmp, oldval3
14210 // bne- exitBB
14211 // loop2MBB:
14212 // andc tmp2, tmpDest, mask
14213 // or tmp4, tmp2, newval3
14214 // stwcx. tmp4, ptr
14215 // bne- loop1MBB
14216 // b exitBB
14217 // exitBB:
14218 // srw dest, tmpDest, shift
14219 if (ptrA != ZeroReg) {
14220 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14221 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14222 .addReg(ptrA)
14223 .addReg(ptrB);
14224 } else {
14225 Ptr1Reg = ptrB;
14226 }
14227
14228 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14229 // mode.
14230 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14231 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
14232 .addImm(3)
14233 .addImm(27)
14234 .addImm(is8bit ? 28 : 27);
14235 if (!isLittleEndian)
14236 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14237 .addReg(Shift1Reg)
14238 .addImm(is8bit ? 24 : 16);
14239 if (is64bit)
14240 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14241 .addReg(Ptr1Reg)
14242 .addImm(0)
14243 .addImm(61);
14244 else
14245 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14246 .addReg(Ptr1Reg)
14247 .addImm(0)
14248 .addImm(0)
14249 .addImm(29);
14250 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14251 .addReg(newval)
14252 .addReg(ShiftReg);
14253 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14254 .addReg(oldval)
14255 .addReg(ShiftReg);
14256 if (is8bit)
14257 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14258 else {
14259 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14260 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14261 .addReg(Mask3Reg)
14262 .addImm(65535);
14263 }
14264 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14265 .addReg(Mask2Reg)
14266 .addReg(ShiftReg);
14267 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14268 .addReg(NewVal2Reg)
14269 .addReg(MaskReg);
14270 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14271 .addReg(OldVal2Reg)
14272 .addReg(MaskReg);
14273
14274 BB = loop1MBB;
14275 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14276 .addReg(ZeroReg)
14277 .addReg(PtrReg);
14278 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14279 .addReg(TmpDestReg)
14280 .addReg(MaskReg);
14281 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14282 .addReg(TmpReg)
14283 .addReg(OldVal3Reg);
14284 BuildMI(BB, dl, TII->get(PPC::BCC))
14286 .addReg(CrReg)
14287 .addMBB(exitMBB);
14288 BB->addSuccessor(loop2MBB);
14289 BB->addSuccessor(exitMBB);
14290
14291 BB = loop2MBB;
14292 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14293 .addReg(TmpDestReg)
14294 .addReg(MaskReg);
14295 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14296 .addReg(Tmp2Reg)
14297 .addReg(NewVal3Reg);
14298 BuildMI(BB, dl, TII->get(PPC::STWCX))
14299 .addReg(Tmp4Reg)
14300 .addReg(ZeroReg)
14301 .addReg(PtrReg);
14302 BuildMI(BB, dl, TII->get(PPC::BCC))
14304 .addReg(PPC::CR0)
14305 .addMBB(loop1MBB);
14306 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14307 BB->addSuccessor(loop1MBB);
14308 BB->addSuccessor(exitMBB);
14309
14310 // exitMBB:
14311 // ...
14312 BB = exitMBB;
14313 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14314 .addReg(TmpReg)
14315 .addReg(ShiftReg);
14316 } else if (MI.getOpcode() == PPC::FADDrtz) {
14317 // This pseudo performs an FADD with rounding mode temporarily forced
14318 // to round-to-zero. We emit this via custom inserter since the FPSCR
14319 // is not modeled at the SelectionDAG level.
14320 Register Dest = MI.getOperand(0).getReg();
14321 Register Src1 = MI.getOperand(1).getReg();
14322 Register Src2 = MI.getOperand(2).getReg();
14323 DebugLoc dl = MI.getDebugLoc();
14324
14325 MachineRegisterInfo &RegInfo = F->getRegInfo();
14326 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14327
14328 // Save FPSCR value.
14329 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14330
14331 // Set rounding mode to round-to-zero.
14332 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14333 .addImm(31)
14335
14336 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14337 .addImm(30)
14339
14340 // Perform addition.
14341 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14342 .addReg(Src1)
14343 .addReg(Src2);
14344 if (MI.getFlag(MachineInstr::NoFPExcept))
14346
14347 // Restore FPSCR value.
14348 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14349 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14350 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14351 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14352 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14353 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14354 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14355 ? PPC::ANDI8_rec
14356 : PPC::ANDI_rec;
14357 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14358 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14359
14360 MachineRegisterInfo &RegInfo = F->getRegInfo();
14361 Register Dest = RegInfo.createVirtualRegister(
14362 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14363
14364 DebugLoc Dl = MI.getDebugLoc();
14365 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14366 .addReg(MI.getOperand(1).getReg())
14367 .addImm(1);
14368 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14369 MI.getOperand(0).getReg())
14370 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14371 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14372 DebugLoc Dl = MI.getDebugLoc();
14373 MachineRegisterInfo &RegInfo = F->getRegInfo();
14374 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14375 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14376 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14377 MI.getOperand(0).getReg())
14378 .addReg(CRReg);
14379 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14380 DebugLoc Dl = MI.getDebugLoc();
14381 unsigned Imm = MI.getOperand(1).getImm();
14382 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14383 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14384 MI.getOperand(0).getReg())
14385 .addReg(PPC::CR0EQ);
14386 } else if (MI.getOpcode() == PPC::SETRNDi) {
14387 DebugLoc dl = MI.getDebugLoc();
14388 Register OldFPSCRReg = MI.getOperand(0).getReg();
14389
14390 // Save FPSCR value.
14391 if (MRI.use_empty(OldFPSCRReg))
14392 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14393 else
14394 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14395
14396 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14397 // the following settings:
14398 // 00 Round to nearest
14399 // 01 Round to 0
14400 // 10 Round to +inf
14401 // 11 Round to -inf
14402
14403 // When the operand is immediate, using the two least significant bits of
14404 // the immediate to set the bits 62:63 of FPSCR.
14405 unsigned Mode = MI.getOperand(1).getImm();
14406 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14407 .addImm(31)
14409
14410 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14411 .addImm(30)
14413 } else if (MI.getOpcode() == PPC::SETRND) {
14414 DebugLoc dl = MI.getDebugLoc();
14415
14416 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14417 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14418 // If the target doesn't have DirectMove, we should use stack to do the
14419 // conversion, because the target doesn't have the instructions like mtvsrd
14420 // or mfvsrd to do this conversion directly.
14421 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14422 if (Subtarget.hasDirectMove()) {
14423 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14424 .addReg(SrcReg);
14425 } else {
14426 // Use stack to do the register copy.
14427 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14428 MachineRegisterInfo &RegInfo = F->getRegInfo();
14429 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14430 if (RC == &PPC::F8RCRegClass) {
14431 // Copy register from F8RCRegClass to G8RCRegclass.
14432 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14433 "Unsupported RegClass.");
14434
14435 StoreOp = PPC::STFD;
14436 LoadOp = PPC::LD;
14437 } else {
14438 // Copy register from G8RCRegClass to F8RCRegclass.
14439 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14440 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14441 "Unsupported RegClass.");
14442 }
14443
14444 MachineFrameInfo &MFI = F->getFrameInfo();
14445 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14446
14447 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14448 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14450 MFI.getObjectAlign(FrameIdx));
14451
14452 // Store the SrcReg into the stack.
14453 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14454 .addReg(SrcReg)
14455 .addImm(0)
14456 .addFrameIndex(FrameIdx)
14457 .addMemOperand(MMOStore);
14458
14459 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14460 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14462 MFI.getObjectAlign(FrameIdx));
14463
14464 // Load from the stack where SrcReg is stored, and save to DestReg,
14465 // so we have done the RegClass conversion from RegClass::SrcReg to
14466 // RegClass::DestReg.
14467 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14468 .addImm(0)
14469 .addFrameIndex(FrameIdx)
14470 .addMemOperand(MMOLoad);
14471 }
14472 };
14473
14474 Register OldFPSCRReg = MI.getOperand(0).getReg();
14475
14476 // Save FPSCR value.
14477 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14478
14479 // When the operand is gprc register, use two least significant bits of the
14480 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14481 //
14482 // copy OldFPSCRTmpReg, OldFPSCRReg
14483 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14484 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14485 // copy NewFPSCRReg, NewFPSCRTmpReg
14486 // mtfsf 255, NewFPSCRReg
14487 MachineOperand SrcOp = MI.getOperand(1);
14488 MachineRegisterInfo &RegInfo = F->getRegInfo();
14489 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14490
14491 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14492
14493 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14494 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14495
14496 // The first operand of INSERT_SUBREG should be a register which has
14497 // subregisters, we only care about its RegClass, so we should use an
14498 // IMPLICIT_DEF register.
14499 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14500 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14501 .addReg(ImDefReg)
14502 .add(SrcOp)
14503 .addImm(1);
14504
14505 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14506 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14507 .addReg(OldFPSCRTmpReg)
14508 .addReg(ExtSrcReg)
14509 .addImm(0)
14510 .addImm(62);
14511
14512 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14513 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14514
14515 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14516 // bits of FPSCR.
14517 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14518 .addImm(255)
14519 .addReg(NewFPSCRReg)
14520 .addImm(0)
14521 .addImm(0);
14522 } else if (MI.getOpcode() == PPC::SETFLM) {
14523 DebugLoc Dl = MI.getDebugLoc();
14524
14525 // Result of setflm is previous FPSCR content, so we need to save it first.
14526 Register OldFPSCRReg = MI.getOperand(0).getReg();
14527 if (MRI.use_empty(OldFPSCRReg))
14528 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14529 else
14530 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14531
14532 // Put bits in 32:63 to FPSCR.
14533 Register NewFPSCRReg = MI.getOperand(1).getReg();
14534 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14535 .addImm(255)
14536 .addReg(NewFPSCRReg)
14537 .addImm(0)
14538 .addImm(0);
14539 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14540 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14541 return emitProbedAlloca(MI, BB);
14542 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14543 DebugLoc DL = MI.getDebugLoc();
14544 Register Src = MI.getOperand(2).getReg();
14545 Register Lo = MI.getOperand(0).getReg();
14546 Register Hi = MI.getOperand(1).getReg();
14547 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14548 .addDef(Lo)
14549 .addUse(Src, 0, PPC::sub_gp8_x1);
14550 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14551 .addDef(Hi)
14552 .addUse(Src, 0, PPC::sub_gp8_x0);
14553 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14554 MI.getOpcode() == PPC::STQX_PSEUDO) {
14555 DebugLoc DL = MI.getDebugLoc();
14556 // Ptr is used as the ptr_rc_no_r0 part
14557 // of LQ/STQ's memory operand and adding result of RA and RB,
14558 // so it has to be g8rc_and_g8rc_nox0.
14559 Register Ptr =
14560 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14561 Register Val = MI.getOperand(0).getReg();
14562 Register RA = MI.getOperand(1).getReg();
14563 Register RB = MI.getOperand(2).getReg();
14564 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14565 BuildMI(*BB, MI, DL,
14566 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14567 : TII->get(PPC::STQ))
14568 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
14569 .addImm(0)
14570 .addReg(Ptr);
14571 } else {
14572 llvm_unreachable("Unexpected instr type to insert");
14573 }
14574
14575 MI.eraseFromParent(); // The pseudo instruction is gone now.
14576 return BB;
14577}
14578
14579//===----------------------------------------------------------------------===//
14580// Target Optimization Hooks
14581//===----------------------------------------------------------------------===//
14582
14583static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14584 // For the estimates, convergence is quadratic, so we essentially double the
14585 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14586 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14587 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14588 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14589 if (VT.getScalarType() == MVT::f64)
14590 RefinementSteps++;
14591 return RefinementSteps;
14592}
14593
14594SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14595 const DenormalMode &Mode) const {
14596 // We only have VSX Vector Test for software Square Root.
14597 EVT VT = Op.getValueType();
14598 if (!isTypeLegal(MVT::i1) ||
14599 (VT != MVT::f64 &&
14600 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14602
14603 SDLoc DL(Op);
14604 // The output register of FTSQRT is CR field.
14605 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14606 // ftsqrt BF,FRB
14607 // Let e_b be the unbiased exponent of the double-precision
14608 // floating-point operand in register FRB.
14609 // fe_flag is set to 1 if either of the following conditions occurs.
14610 // - The double-precision floating-point operand in register FRB is a zero,
14611 // a NaN, or an infinity, or a negative value.
14612 // - e_b is less than or equal to -970.
14613 // Otherwise fe_flag is set to 0.
14614 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14615 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14616 // exponent is less than -970)
14617 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14618 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14619 FTSQRT, SRIdxVal),
14620 0);
14621}
14622
14623SDValue
14624PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14625 SelectionDAG &DAG) const {
14626 // We only have VSX Vector Square Root.
14627 EVT VT = Op.getValueType();
14628 if (VT != MVT::f64 &&
14629 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14631
14632 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14633}
14634
14635SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14636 int Enabled, int &RefinementSteps,
14637 bool &UseOneConstNR,
14638 bool Reciprocal) const {
14639 EVT VT = Operand.getValueType();
14640 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14641 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14642 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14643 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14644 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14645 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14646
14647 // The Newton-Raphson computation with a single constant does not provide
14648 // enough accuracy on some CPUs.
14649 UseOneConstNR = !Subtarget.needsTwoConstNR();
14650 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14651 }
14652 return SDValue();
14653}
14654
14655SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14656 int Enabled,
14657 int &RefinementSteps) const {
14658 EVT VT = Operand.getValueType();
14659 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14660 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14661 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14662 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14663 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14664 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14665 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14666 }
14667 return SDValue();
14668}
14669
14671 // Note: This functionality is used only when arcp is enabled, and
14672 // on cores with reciprocal estimates (which are used when arcp is
14673 // enabled for division), this functionality is redundant with the default
14674 // combiner logic (once the division -> reciprocal/multiply transformation
14675 // has taken place). As a result, this matters more for older cores than for
14676 // newer ones.
14677
14678 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14679 // reciprocal if there are two or more FDIVs (for embedded cores with only
14680 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14681 switch (Subtarget.getCPUDirective()) {
14682 default:
14683 return 3;
14684 case PPC::DIR_440:
14685 case PPC::DIR_A2:
14686 case PPC::DIR_E500:
14687 case PPC::DIR_E500mc:
14688 case PPC::DIR_E5500:
14689 return 2;
14690 }
14691}
14692
14693// isConsecutiveLSLoc needs to work even if all adds have not yet been
14694// collapsed, and so we need to look through chains of them.
14696 int64_t& Offset, SelectionDAG &DAG) {
14697 if (DAG.isBaseWithConstantOffset(Loc)) {
14698 Base = Loc.getOperand(0);
14699 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14700
14701 // The base might itself be a base plus an offset, and if so, accumulate
14702 // that as well.
14703 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14704 }
14705}
14706
14708 unsigned Bytes, int Dist,
14709 SelectionDAG &DAG) {
14710 if (VT.getSizeInBits() / 8 != Bytes)
14711 return false;
14712
14713 SDValue BaseLoc = Base->getBasePtr();
14714 if (Loc.getOpcode() == ISD::FrameIndex) {
14715 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14716 return false;
14718 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14719 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14720 int FS = MFI.getObjectSize(FI);
14721 int BFS = MFI.getObjectSize(BFI);
14722 if (FS != BFS || FS != (int)Bytes) return false;
14723 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14724 }
14725
14726 SDValue Base1 = Loc, Base2 = BaseLoc;
14727 int64_t Offset1 = 0, Offset2 = 0;
14728 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14729 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14730 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14731 return true;
14732
14733 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14734 const GlobalValue *GV1 = nullptr;
14735 const GlobalValue *GV2 = nullptr;
14736 Offset1 = 0;
14737 Offset2 = 0;
14738 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14739 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14740 if (isGA1 && isGA2 && GV1 == GV2)
14741 return Offset1 == (Offset2 + Dist*Bytes);
14742 return false;
14743}
14744
14745// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14746// not enforce equality of the chain operands.
14748 unsigned Bytes, int Dist,
14749 SelectionDAG &DAG) {
14751 EVT VT = LS->getMemoryVT();
14752 SDValue Loc = LS->getBasePtr();
14753 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14754 }
14755
14756 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14757 EVT VT;
14758 switch (N->getConstantOperandVal(1)) {
14759 default: return false;
14760 case Intrinsic::ppc_altivec_lvx:
14761 case Intrinsic::ppc_altivec_lvxl:
14762 case Intrinsic::ppc_vsx_lxvw4x:
14763 case Intrinsic::ppc_vsx_lxvw4x_be:
14764 VT = MVT::v4i32;
14765 break;
14766 case Intrinsic::ppc_vsx_lxvd2x:
14767 case Intrinsic::ppc_vsx_lxvd2x_be:
14768 VT = MVT::v2f64;
14769 break;
14770 case Intrinsic::ppc_altivec_lvebx:
14771 VT = MVT::i8;
14772 break;
14773 case Intrinsic::ppc_altivec_lvehx:
14774 VT = MVT::i16;
14775 break;
14776 case Intrinsic::ppc_altivec_lvewx:
14777 VT = MVT::i32;
14778 break;
14779 }
14780
14781 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14782 }
14783
14784 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14785 EVT VT;
14786 switch (N->getConstantOperandVal(1)) {
14787 default: return false;
14788 case Intrinsic::ppc_altivec_stvx:
14789 case Intrinsic::ppc_altivec_stvxl:
14790 case Intrinsic::ppc_vsx_stxvw4x:
14791 VT = MVT::v4i32;
14792 break;
14793 case Intrinsic::ppc_vsx_stxvd2x:
14794 VT = MVT::v2f64;
14795 break;
14796 case Intrinsic::ppc_vsx_stxvw4x_be:
14797 VT = MVT::v4i32;
14798 break;
14799 case Intrinsic::ppc_vsx_stxvd2x_be:
14800 VT = MVT::v2f64;
14801 break;
14802 case Intrinsic::ppc_altivec_stvebx:
14803 VT = MVT::i8;
14804 break;
14805 case Intrinsic::ppc_altivec_stvehx:
14806 VT = MVT::i16;
14807 break;
14808 case Intrinsic::ppc_altivec_stvewx:
14809 VT = MVT::i32;
14810 break;
14811 }
14812
14813 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14814 }
14815
14816 return false;
14817}
14818
14819// Return true is there is a nearyby consecutive load to the one provided
14820// (regardless of alignment). We search up and down the chain, looking though
14821// token factors and other loads (but nothing else). As a result, a true result
14822// indicates that it is safe to create a new consecutive load adjacent to the
14823// load provided.
14825 SDValue Chain = LD->getChain();
14826 EVT VT = LD->getMemoryVT();
14827
14828 SmallPtrSet<SDNode *, 16> LoadRoots;
14829 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14831
14832 // First, search up the chain, branching to follow all token-factor operands.
14833 // If we find a consecutive load, then we're done, otherwise, record all
14834 // nodes just above the top-level loads and token factors.
14835 while (!Queue.empty()) {
14836 SDNode *ChainNext = Queue.pop_back_val();
14837 if (!Visited.insert(ChainNext).second)
14838 continue;
14839
14840 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14841 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14842 return true;
14843
14844 if (!Visited.count(ChainLD->getChain().getNode()))
14845 Queue.push_back(ChainLD->getChain().getNode());
14846 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14847 for (const SDUse &O : ChainNext->ops())
14848 if (!Visited.count(O.getNode()))
14849 Queue.push_back(O.getNode());
14850 } else
14851 LoadRoots.insert(ChainNext);
14852 }
14853
14854 // Second, search down the chain, starting from the top-level nodes recorded
14855 // in the first phase. These top-level nodes are the nodes just above all
14856 // loads and token factors. Starting with their uses, recursively look though
14857 // all loads (just the chain uses) and token factors to find a consecutive
14858 // load.
14859 Visited.clear();
14860 Queue.clear();
14861
14862 for (SDNode *I : LoadRoots) {
14863 Queue.push_back(I);
14864
14865 while (!Queue.empty()) {
14866 SDNode *LoadRoot = Queue.pop_back_val();
14867 if (!Visited.insert(LoadRoot).second)
14868 continue;
14869
14870 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14871 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14872 return true;
14873
14874 for (SDNode *U : LoadRoot->users())
14875 if (((isa<MemSDNode>(U) &&
14876 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14877 U->getOpcode() == ISD::TokenFactor) &&
14878 !Visited.count(U))
14879 Queue.push_back(U);
14880 }
14881 }
14882
14883 return false;
14884}
14885
14886/// This function is called when we have proved that a SETCC node can be replaced
14887/// by subtraction (and other supporting instructions) so that the result of
14888/// comparison is kept in a GPR instead of CR. This function is purely for
14889/// codegen purposes and has some flags to guide the codegen process.
14890static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14891 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14892 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14893
14894 // Zero extend the operands to the largest legal integer. Originally, they
14895 // must be of a strictly smaller size.
14896 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14897 DAG.getConstant(Size, DL, MVT::i32));
14898 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14899 DAG.getConstant(Size, DL, MVT::i32));
14900
14901 // Swap if needed. Depends on the condition code.
14902 if (Swap)
14903 std::swap(Op0, Op1);
14904
14905 // Subtract extended integers.
14906 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14907
14908 // Move the sign bit to the least significant position and zero out the rest.
14909 // Now the least significant bit carries the result of original comparison.
14910 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14911 DAG.getConstant(Size - 1, DL, MVT::i32));
14912 auto Final = Shifted;
14913
14914 // Complement the result if needed. Based on the condition code.
14915 if (Complement)
14916 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14917 DAG.getConstant(1, DL, MVT::i64));
14918
14919 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14920}
14921
14922SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14923 DAGCombinerInfo &DCI) const {
14924 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14925
14926 SelectionDAG &DAG = DCI.DAG;
14927 SDLoc DL(N);
14928
14929 // Size of integers being compared has a critical role in the following
14930 // analysis, so we prefer to do this when all types are legal.
14931 if (!DCI.isAfterLegalizeDAG())
14932 return SDValue();
14933
14934 // If all users of SETCC extend its value to a legal integer type
14935 // then we replace SETCC with a subtraction
14936 for (const SDNode *U : N->users())
14937 if (U->getOpcode() != ISD::ZERO_EXTEND)
14938 return SDValue();
14939
14940 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14941 auto OpSize = N->getOperand(0).getValueSizeInBits();
14942
14944
14945 if (OpSize < Size) {
14946 switch (CC) {
14947 default: break;
14948 case ISD::SETULT:
14949 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14950 case ISD::SETULE:
14951 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14952 case ISD::SETUGT:
14953 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14954 case ISD::SETUGE:
14955 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14956 }
14957 }
14958
14959 return SDValue();
14960}
14961
14962SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14963 DAGCombinerInfo &DCI) const {
14964 SelectionDAG &DAG = DCI.DAG;
14965 SDLoc dl(N);
14966
14967 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14968 // If we're tracking CR bits, we need to be careful that we don't have:
14969 // trunc(binary-ops(zext(x), zext(y)))
14970 // or
14971 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14972 // such that we're unnecessarily moving things into GPRs when it would be
14973 // better to keep them in CR bits.
14974
14975 // Note that trunc here can be an actual i1 trunc, or can be the effective
14976 // truncation that comes from a setcc or select_cc.
14977 if (N->getOpcode() == ISD::TRUNCATE &&
14978 N->getValueType(0) != MVT::i1)
14979 return SDValue();
14980
14981 if (N->getOperand(0).getValueType() != MVT::i32 &&
14982 N->getOperand(0).getValueType() != MVT::i64)
14983 return SDValue();
14984
14985 if (N->getOpcode() == ISD::SETCC ||
14986 N->getOpcode() == ISD::SELECT_CC) {
14987 // If we're looking at a comparison, then we need to make sure that the
14988 // high bits (all except for the first) don't matter the result.
14989 ISD::CondCode CC =
14990 cast<CondCodeSDNode>(N->getOperand(
14991 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14992 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14993
14994 if (ISD::isSignedIntSetCC(CC)) {
14995 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14996 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14997 return SDValue();
14998 } else if (ISD::isUnsignedIntSetCC(CC)) {
14999 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15000 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15001 !DAG.MaskedValueIsZero(N->getOperand(1),
15002 APInt::getHighBitsSet(OpBits, OpBits-1)))
15003 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15004 : SDValue());
15005 } else {
15006 // This is neither a signed nor an unsigned comparison, just make sure
15007 // that the high bits are equal.
15008 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15009 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15010
15011 // We don't really care about what is known about the first bit (if
15012 // anything), so pretend that it is known zero for both to ensure they can
15013 // be compared as constants.
15014 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15015 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15016
15017 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15018 Op1Known.getConstant() != Op2Known.getConstant())
15019 return SDValue();
15020 }
15021 }
15022
15023 // We now know that the higher-order bits are irrelevant, we just need to
15024 // make sure that all of the intermediate operations are bit operations, and
15025 // all inputs are extensions.
15026 if (N->getOperand(0).getOpcode() != ISD::AND &&
15027 N->getOperand(0).getOpcode() != ISD::OR &&
15028 N->getOperand(0).getOpcode() != ISD::XOR &&
15029 N->getOperand(0).getOpcode() != ISD::SELECT &&
15030 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15031 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15032 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15033 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15034 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15035 return SDValue();
15036
15037 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15038 N->getOperand(1).getOpcode() != ISD::AND &&
15039 N->getOperand(1).getOpcode() != ISD::OR &&
15040 N->getOperand(1).getOpcode() != ISD::XOR &&
15041 N->getOperand(1).getOpcode() != ISD::SELECT &&
15042 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15043 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15044 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15045 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15046 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15047 return SDValue();
15048
15050 SmallVector<SDValue, 8> BinOps, PromOps;
15051 SmallPtrSet<SDNode *, 16> Visited;
15052
15053 for (unsigned i = 0; i < 2; ++i) {
15054 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15055 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15056 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15057 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15058 isa<ConstantSDNode>(N->getOperand(i)))
15059 Inputs.push_back(N->getOperand(i));
15060 else
15061 BinOps.push_back(N->getOperand(i));
15062
15063 if (N->getOpcode() == ISD::TRUNCATE)
15064 break;
15065 }
15066
15067 // Visit all inputs, collect all binary operations (and, or, xor and
15068 // select) that are all fed by extensions.
15069 while (!BinOps.empty()) {
15070 SDValue BinOp = BinOps.pop_back_val();
15071
15072 if (!Visited.insert(BinOp.getNode()).second)
15073 continue;
15074
15075 PromOps.push_back(BinOp);
15076
15077 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15078 // The condition of the select is not promoted.
15079 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15080 continue;
15081 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15082 continue;
15083
15084 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15085 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15086 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15087 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15088 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15089 Inputs.push_back(BinOp.getOperand(i));
15090 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15091 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15092 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15093 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15094 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15095 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15096 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15097 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15098 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15099 BinOps.push_back(BinOp.getOperand(i));
15100 } else {
15101 // We have an input that is not an extension or another binary
15102 // operation; we'll abort this transformation.
15103 return SDValue();
15104 }
15105 }
15106 }
15107
15108 // Make sure that this is a self-contained cluster of operations (which
15109 // is not quite the same thing as saying that everything has only one
15110 // use).
15111 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15112 if (isa<ConstantSDNode>(Inputs[i]))
15113 continue;
15114
15115 for (const SDNode *User : Inputs[i].getNode()->users()) {
15116 if (User != N && !Visited.count(User))
15117 return SDValue();
15118
15119 // Make sure that we're not going to promote the non-output-value
15120 // operand(s) or SELECT or SELECT_CC.
15121 // FIXME: Although we could sometimes handle this, and it does occur in
15122 // practice that one of the condition inputs to the select is also one of
15123 // the outputs, we currently can't deal with this.
15124 if (User->getOpcode() == ISD::SELECT) {
15125 if (User->getOperand(0) == Inputs[i])
15126 return SDValue();
15127 } else if (User->getOpcode() == ISD::SELECT_CC) {
15128 if (User->getOperand(0) == Inputs[i] ||
15129 User->getOperand(1) == Inputs[i])
15130 return SDValue();
15131 }
15132 }
15133 }
15134
15135 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15136 for (const SDNode *User : PromOps[i].getNode()->users()) {
15137 if (User != N && !Visited.count(User))
15138 return SDValue();
15139
15140 // Make sure that we're not going to promote the non-output-value
15141 // operand(s) or SELECT or SELECT_CC.
15142 // FIXME: Although we could sometimes handle this, and it does occur in
15143 // practice that one of the condition inputs to the select is also one of
15144 // the outputs, we currently can't deal with this.
15145 if (User->getOpcode() == ISD::SELECT) {
15146 if (User->getOperand(0) == PromOps[i])
15147 return SDValue();
15148 } else if (User->getOpcode() == ISD::SELECT_CC) {
15149 if (User->getOperand(0) == PromOps[i] ||
15150 User->getOperand(1) == PromOps[i])
15151 return SDValue();
15152 }
15153 }
15154 }
15155
15156 // Replace all inputs with the extension operand.
15157 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15158 // Constants may have users outside the cluster of to-be-promoted nodes,
15159 // and so we need to replace those as we do the promotions.
15160 if (isa<ConstantSDNode>(Inputs[i]))
15161 continue;
15162 else
15163 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15164 }
15165
15166 std::list<HandleSDNode> PromOpHandles;
15167 for (auto &PromOp : PromOps)
15168 PromOpHandles.emplace_back(PromOp);
15169
15170 // Replace all operations (these are all the same, but have a different
15171 // (i1) return type). DAG.getNode will validate that the types of
15172 // a binary operator match, so go through the list in reverse so that
15173 // we've likely promoted both operands first. Any intermediate truncations or
15174 // extensions disappear.
15175 while (!PromOpHandles.empty()) {
15176 SDValue PromOp = PromOpHandles.back().getValue();
15177 PromOpHandles.pop_back();
15178
15179 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15180 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15181 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15182 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15183 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15184 PromOp.getOperand(0).getValueType() != MVT::i1) {
15185 // The operand is not yet ready (see comment below).
15186 PromOpHandles.emplace_front(PromOp);
15187 continue;
15188 }
15189
15190 SDValue RepValue = PromOp.getOperand(0);
15191 if (isa<ConstantSDNode>(RepValue))
15192 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15193
15194 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15195 continue;
15196 }
15197
15198 unsigned C;
15199 switch (PromOp.getOpcode()) {
15200 default: C = 0; break;
15201 case ISD::SELECT: C = 1; break;
15202 case ISD::SELECT_CC: C = 2; break;
15203 }
15204
15205 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15206 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15207 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15208 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15209 // The to-be-promoted operands of this node have not yet been
15210 // promoted (this should be rare because we're going through the
15211 // list backward, but if one of the operands has several users in
15212 // this cluster of to-be-promoted nodes, it is possible).
15213 PromOpHandles.emplace_front(PromOp);
15214 continue;
15215 }
15216
15218
15219 // If there are any constant inputs, make sure they're replaced now.
15220 for (unsigned i = 0; i < 2; ++i)
15221 if (isa<ConstantSDNode>(Ops[C+i]))
15222 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15223
15224 DAG.ReplaceAllUsesOfValueWith(PromOp,
15225 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15226 }
15227
15228 // Now we're left with the initial truncation itself.
15229 if (N->getOpcode() == ISD::TRUNCATE)
15230 return N->getOperand(0);
15231
15232 // Otherwise, this is a comparison. The operands to be compared have just
15233 // changed type (to i1), but everything else is the same.
15234 return SDValue(N, 0);
15235}
15236
15237SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15238 DAGCombinerInfo &DCI) const {
15239 SelectionDAG &DAG = DCI.DAG;
15240 SDLoc dl(N);
15241
15242 // If we're tracking CR bits, we need to be careful that we don't have:
15243 // zext(binary-ops(trunc(x), trunc(y)))
15244 // or
15245 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15246 // such that we're unnecessarily moving things into CR bits that can more
15247 // efficiently stay in GPRs. Note that if we're not certain that the high
15248 // bits are set as required by the final extension, we still may need to do
15249 // some masking to get the proper behavior.
15250
15251 // This same functionality is important on PPC64 when dealing with
15252 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15253 // the return values of functions. Because it is so similar, it is handled
15254 // here as well.
15255
15256 if (N->getValueType(0) != MVT::i32 &&
15257 N->getValueType(0) != MVT::i64)
15258 return SDValue();
15259
15260 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15261 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15262 return SDValue();
15263
15264 if (N->getOperand(0).getOpcode() != ISD::AND &&
15265 N->getOperand(0).getOpcode() != ISD::OR &&
15266 N->getOperand(0).getOpcode() != ISD::XOR &&
15267 N->getOperand(0).getOpcode() != ISD::SELECT &&
15268 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15269 return SDValue();
15270
15272 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15273 SmallPtrSet<SDNode *, 16> Visited;
15274
15275 // Visit all inputs, collect all binary operations (and, or, xor and
15276 // select) that are all fed by truncations.
15277 while (!BinOps.empty()) {
15278 SDValue BinOp = BinOps.pop_back_val();
15279
15280 if (!Visited.insert(BinOp.getNode()).second)
15281 continue;
15282
15283 PromOps.push_back(BinOp);
15284
15285 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15286 // The condition of the select is not promoted.
15287 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15288 continue;
15289 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15290 continue;
15291
15292 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15293 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15294 Inputs.push_back(BinOp.getOperand(i));
15295 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15296 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15297 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15298 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15299 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15300 BinOps.push_back(BinOp.getOperand(i));
15301 } else {
15302 // We have an input that is not a truncation or another binary
15303 // operation; we'll abort this transformation.
15304 return SDValue();
15305 }
15306 }
15307 }
15308
15309 // The operands of a select that must be truncated when the select is
15310 // promoted because the operand is actually part of the to-be-promoted set.
15311 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15312
15313 // Make sure that this is a self-contained cluster of operations (which
15314 // is not quite the same thing as saying that everything has only one
15315 // use).
15316 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15317 if (isa<ConstantSDNode>(Inputs[i]))
15318 continue;
15319
15320 for (SDNode *User : Inputs[i].getNode()->users()) {
15321 if (User != N && !Visited.count(User))
15322 return SDValue();
15323
15324 // If we're going to promote the non-output-value operand(s) or SELECT or
15325 // SELECT_CC, record them for truncation.
15326 if (User->getOpcode() == ISD::SELECT) {
15327 if (User->getOperand(0) == Inputs[i])
15328 SelectTruncOp[0].insert(std::make_pair(User,
15329 User->getOperand(0).getValueType()));
15330 } else if (User->getOpcode() == ISD::SELECT_CC) {
15331 if (User->getOperand(0) == Inputs[i])
15332 SelectTruncOp[0].insert(std::make_pair(User,
15333 User->getOperand(0).getValueType()));
15334 if (User->getOperand(1) == Inputs[i])
15335 SelectTruncOp[1].insert(std::make_pair(User,
15336 User->getOperand(1).getValueType()));
15337 }
15338 }
15339 }
15340
15341 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15342 for (SDNode *User : PromOps[i].getNode()->users()) {
15343 if (User != N && !Visited.count(User))
15344 return SDValue();
15345
15346 // If we're going to promote the non-output-value operand(s) or SELECT or
15347 // SELECT_CC, record them for truncation.
15348 if (User->getOpcode() == ISD::SELECT) {
15349 if (User->getOperand(0) == PromOps[i])
15350 SelectTruncOp[0].insert(std::make_pair(User,
15351 User->getOperand(0).getValueType()));
15352 } else if (User->getOpcode() == ISD::SELECT_CC) {
15353 if (User->getOperand(0) == PromOps[i])
15354 SelectTruncOp[0].insert(std::make_pair(User,
15355 User->getOperand(0).getValueType()));
15356 if (User->getOperand(1) == PromOps[i])
15357 SelectTruncOp[1].insert(std::make_pair(User,
15358 User->getOperand(1).getValueType()));
15359 }
15360 }
15361 }
15362
15363 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15364 bool ReallyNeedsExt = false;
15365 if (N->getOpcode() != ISD::ANY_EXTEND) {
15366 // If all of the inputs are not already sign/zero extended, then
15367 // we'll still need to do that at the end.
15368 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15369 if (isa<ConstantSDNode>(Inputs[i]))
15370 continue;
15371
15372 unsigned OpBits =
15373 Inputs[i].getOperand(0).getValueSizeInBits();
15374 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15375
15376 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15377 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15378 APInt::getHighBitsSet(OpBits,
15379 OpBits-PromBits))) ||
15380 (N->getOpcode() == ISD::SIGN_EXTEND &&
15381 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15382 (OpBits-(PromBits-1)))) {
15383 ReallyNeedsExt = true;
15384 break;
15385 }
15386 }
15387 }
15388
15389 // Convert PromOps to handles before doing any RAUW operations, as these
15390 // may CSE with existing nodes, deleting the originals.
15391 std::list<HandleSDNode> PromOpHandles;
15392 for (auto &PromOp : PromOps)
15393 PromOpHandles.emplace_back(PromOp);
15394
15395 // Replace all inputs, either with the truncation operand, or a
15396 // truncation or extension to the final output type.
15397 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15398 // Constant inputs need to be replaced with the to-be-promoted nodes that
15399 // use them because they might have users outside of the cluster of
15400 // promoted nodes.
15401 if (isa<ConstantSDNode>(Inputs[i]))
15402 continue;
15403
15404 SDValue InSrc = Inputs[i].getOperand(0);
15405 if (Inputs[i].getValueType() == N->getValueType(0))
15406 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15407 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15408 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15409 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15410 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15411 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15412 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15413 else
15414 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15415 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15416 }
15417
15418 // Replace all operations (these are all the same, but have a different
15419 // (promoted) return type). DAG.getNode will validate that the types of
15420 // a binary operator match, so go through the list in reverse so that
15421 // we've likely promoted both operands first.
15422 while (!PromOpHandles.empty()) {
15423 SDValue PromOp = PromOpHandles.back().getValue();
15424 PromOpHandles.pop_back();
15425
15426 unsigned C;
15427 switch (PromOp.getOpcode()) {
15428 default: C = 0; break;
15429 case ISD::SELECT: C = 1; break;
15430 case ISD::SELECT_CC: C = 2; break;
15431 }
15432
15433 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15434 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15435 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15436 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15437 // The to-be-promoted operands of this node have not yet been
15438 // promoted (this should be rare because we're going through the
15439 // list backward, but if one of the operands has several users in
15440 // this cluster of to-be-promoted nodes, it is possible).
15441 PromOpHandles.emplace_front(PromOp);
15442 continue;
15443 }
15444
15445 // For SELECT and SELECT_CC nodes, we do a similar check for any
15446 // to-be-promoted comparison inputs.
15447 if (PromOp.getOpcode() == ISD::SELECT ||
15448 PromOp.getOpcode() == ISD::SELECT_CC) {
15449 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15450 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15451 (SelectTruncOp[1].count(PromOp.getNode()) &&
15452 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15453 PromOpHandles.emplace_front(PromOp);
15454 continue;
15455 }
15456 }
15457
15459
15460 // If this node has constant inputs, then they'll need to be promoted here.
15461 for (unsigned i = 0; i < 2; ++i) {
15462 if (!isa<ConstantSDNode>(Ops[C+i]))
15463 continue;
15464 if (Ops[C+i].getValueType() == N->getValueType(0))
15465 continue;
15466
15467 if (N->getOpcode() == ISD::SIGN_EXTEND)
15468 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15469 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15470 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15471 else
15472 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15473 }
15474
15475 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15476 // truncate them again to the original value type.
15477 if (PromOp.getOpcode() == ISD::SELECT ||
15478 PromOp.getOpcode() == ISD::SELECT_CC) {
15479 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15480 if (SI0 != SelectTruncOp[0].end())
15481 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15482 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15483 if (SI1 != SelectTruncOp[1].end())
15484 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15485 }
15486
15487 DAG.ReplaceAllUsesOfValueWith(PromOp,
15488 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15489 }
15490
15491 // Now we're left with the initial extension itself.
15492 if (!ReallyNeedsExt)
15493 return N->getOperand(0);
15494
15495 // To zero extend, just mask off everything except for the first bit (in the
15496 // i1 case).
15497 if (N->getOpcode() == ISD::ZERO_EXTEND)
15498 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15500 N->getValueSizeInBits(0), PromBits),
15501 dl, N->getValueType(0)));
15502
15503 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15504 "Invalid extension type");
15505 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15506 SDValue ShiftCst =
15507 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15508 return DAG.getNode(
15509 ISD::SRA, dl, N->getValueType(0),
15510 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15511 ShiftCst);
15512}
15513
15514// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15516
15517 auto isValidForConvert = [](SDValue &Operand) {
15518 if (!Operand.hasOneUse())
15519 return false;
15520
15521 if (Operand.getValueType() != MVT::i128)
15522 return false;
15523
15524 if (Operand.getOpcode() == ISD::Constant)
15525 return true;
15526
15527 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15528 if (!LoadNode)
15529 return false;
15530
15531 // If memory operation is volatile, do not perform any
15532 // optimization or transformation. Volatile operations must be preserved
15533 // as written to ensure correct program behavior, so we return an empty
15534 // SDValue to indicate no action.
15535
15536 if (LoadNode->isVolatile())
15537 return false;
15538
15539 // Only combine loads if both use the unindexed addressing mode.
15540 // PowerPC AltiVec/VMX does not support vector loads or stores with
15541 // pre/post-increment addressing. Indexed modes may imply implicit
15542 // pointer updates, which are not compatible with AltiVec vector
15543 // instructions.
15544 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15545 return false;
15546
15547 // Only combine loads if both are non-extending loads
15548 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15549 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15550 // loaded value's semantics and are not compatible with vector loads.
15551 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15552 return false;
15553
15554 return true;
15555 };
15556
15557 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15558}
15559
15561 const SDLoc &DL) {
15562
15563 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15564
15565 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15566 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15567 "CC mus be ISD::SETNE or ISD::SETEQ");
15568
15569 auto getV16i8Load = [&](const SDValue &Operand) {
15570 if (Operand.getOpcode() == ISD::Constant)
15571 return DAG.getBitcast(MVT::v16i8, Operand);
15572
15573 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15574
15575 auto *LoadNode = cast<LoadSDNode>(Operand);
15576 return DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15577 LoadNode->getBasePtr(), LoadNode->getMemOperand());
15578 };
15579
15580 // Following code transforms the DAG
15581 // t0: ch,glue = EntryToken
15582 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15583 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15584 // undef:i64
15585 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15586 // t5: i128,ch =
15587 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15588 // setcc t3, t5, setne:ch
15589 //
15590 // ---->
15591 //
15592 // t0: ch,glue = EntryToken
15593 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15594 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15595 // undef:i64
15596 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15597 // t5: v16i8,ch =
15598 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15599 // t6: i32 =
15600 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15601 // Constant:i32<2>, t3, t5
15602 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15603
15604 // Or transforms the DAG
15605 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15606 // t8: i1 =
15607 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15608 //
15609 // --->
15610 //
15611 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15612 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15613 // t7: i32 =
15614 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15615
15616 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15617 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15618
15619 SDValue IntrID =
15620 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15621 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15622 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15623 IntrID, CRSel, LHSVec, RHSVec);
15624 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15625 // so we need to invert the CC opcode.
15626 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15627 DAG.getConstant(0, DL, MVT::i32),
15628 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15629}
15630
15631SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15632 DAGCombinerInfo &DCI) const {
15633 assert(N->getOpcode() == ISD::SETCC &&
15634 "Should be called with a SETCC node");
15635
15636 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15637 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15638 SDValue LHS = N->getOperand(0);
15639 SDValue RHS = N->getOperand(1);
15640
15641 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15642 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15643 LHS.hasOneUse())
15644 std::swap(LHS, RHS);
15645
15646 // x == 0-y --> x+y == 0
15647 // x != 0-y --> x+y != 0
15648 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15649 RHS.hasOneUse()) {
15650 SDLoc DL(N);
15651 SelectionDAG &DAG = DCI.DAG;
15652 EVT VT = N->getValueType(0);
15653 EVT OpVT = LHS.getValueType();
15654 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15655 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15656 }
15657
15658 // Optimization: Fold i128 equality/inequality compares of two loads into a
15659 // vectorized compare using vcmpequb.p when Altivec is available.
15660 //
15661 // Rationale:
15662 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15663 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
15664 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
15665 // perform a full 128-bit equality check in a single vector compare.
15666 //
15667 // Example Result:
15668 // This transformation replaces memcmp(a, b, 16) with two vector loads
15669 // and one vector compare instruction.
15670
15671 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15672 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
15673 }
15674
15675 return DAGCombineTruncBoolExt(N, DCI);
15676}
15677
15678// Is this an extending load from an f32 to an f64?
15679static bool isFPExtLoad(SDValue Op) {
15680 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15681 return LD->getExtensionType() == ISD::EXTLOAD &&
15682 Op.getValueType() == MVT::f64;
15683 return false;
15684}
15685
15686/// Reduces the number of fp-to-int conversion when building a vector.
15687///
15688/// If this vector is built out of floating to integer conversions,
15689/// transform it to a vector built out of floating point values followed by a
15690/// single floating to integer conversion of the vector.
15691/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15692/// becomes (fptosi (build_vector ($A, $B, ...)))
15693SDValue PPCTargetLowering::
15694combineElementTruncationToVectorTruncation(SDNode *N,
15695 DAGCombinerInfo &DCI) const {
15696 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15697 "Should be called with a BUILD_VECTOR node");
15698
15699 SelectionDAG &DAG = DCI.DAG;
15700 SDLoc dl(N);
15701
15702 SDValue FirstInput = N->getOperand(0);
15703 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15704 "The input operand must be an fp-to-int conversion.");
15705
15706 // This combine happens after legalization so the fp_to_[su]i nodes are
15707 // already converted to PPCSISD nodes.
15708 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15709 if (FirstConversion == PPCISD::FCTIDZ ||
15710 FirstConversion == PPCISD::FCTIDUZ ||
15711 FirstConversion == PPCISD::FCTIWZ ||
15712 FirstConversion == PPCISD::FCTIWUZ) {
15713 bool IsSplat = true;
15714 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15715 FirstConversion == PPCISD::FCTIWUZ;
15716 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15718 EVT TargetVT = N->getValueType(0);
15719 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15720 SDValue NextOp = N->getOperand(i);
15721 if (NextOp.getOpcode() != PPCISD::MFVSR)
15722 return SDValue();
15723 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15724 if (NextConversion != FirstConversion)
15725 return SDValue();
15726 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15727 // This is not valid if the input was originally double precision. It is
15728 // also not profitable to do unless this is an extending load in which
15729 // case doing this combine will allow us to combine consecutive loads.
15730 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15731 return SDValue();
15732 if (N->getOperand(i) != FirstInput)
15733 IsSplat = false;
15734 }
15735
15736 // If this is a splat, we leave it as-is since there will be only a single
15737 // fp-to-int conversion followed by a splat of the integer. This is better
15738 // for 32-bit and smaller ints and neutral for 64-bit ints.
15739 if (IsSplat)
15740 return SDValue();
15741
15742 // Now that we know we have the right type of node, get its operands
15743 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15744 SDValue In = N->getOperand(i).getOperand(0);
15745 if (Is32Bit) {
15746 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15747 // here, we know that all inputs are extending loads so this is safe).
15748 if (In.isUndef())
15749 Ops.push_back(DAG.getUNDEF(SrcVT));
15750 else {
15751 SDValue Trunc =
15752 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15753 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15754 Ops.push_back(Trunc);
15755 }
15756 } else
15757 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15758 }
15759
15760 unsigned Opcode;
15761 if (FirstConversion == PPCISD::FCTIDZ ||
15762 FirstConversion == PPCISD::FCTIWZ)
15763 Opcode = ISD::FP_TO_SINT;
15764 else
15765 Opcode = ISD::FP_TO_UINT;
15766
15767 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15768 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15769 return DAG.getNode(Opcode, dl, TargetVT, BV);
15770 }
15771 return SDValue();
15772}
15773
15774// LXVKQ instruction load VSX vector with a special quadword value
15775// based on an immediate value. This helper method returns the details of the
15776// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15777// to help generate the LXVKQ instruction and the subsequent shift instruction
15778// required to match the original build vector pattern.
15779
15780// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15781using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15782
15783static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15784
15785 // LXVKQ instruction loads the Quadword value:
15786 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15787 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15788 static const uint32_t Uim = 16;
15789
15790 // Check for direct LXVKQ match (no shift needed)
15791 if (FullVal == BasePattern)
15792 return std::make_tuple(Uim, uint8_t{0});
15793
15794 // Check if FullValue is 1 (the result of the base pattern >> 127)
15795 if (FullVal == APInt(128, 1))
15796 return std::make_tuple(Uim, uint8_t{127});
15797
15798 return std::nullopt;
15799}
15800
15801/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15802/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15803/// LXVKQ instruction load VSX vector with a special quadword value based on an
15804/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15805/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15806/// This can be used to inline the build vector constants that have the
15807/// following patterns:
15808///
15809/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15810/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15811/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15812/// combination of splatting and right shift instructions.
15813
15814SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15815 SelectionDAG &DAG) const {
15816
15817 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
15818 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
15819
15820 // This transformation is only supported if we are loading either a byte,
15821 // halfword, word, or doubleword.
15822 EVT VT = Op.getValueType();
15823 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
15824 VT == MVT::v2i64))
15825 return SDValue();
15826
15827 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
15828 << VT.getEVTString() << "): ";
15829 Op->dump());
15830
15831 unsigned NumElems = VT.getVectorNumElements();
15832 unsigned ElemBits = VT.getScalarSizeInBits();
15833
15834 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
15835
15836 // Check for Non-constant operand in the build vector.
15837 for (const SDValue &Operand : Op.getNode()->op_values()) {
15838 if (!isa<ConstantSDNode>(Operand))
15839 return SDValue();
15840 }
15841
15842 // Assemble build vector operands as a 128-bit register value
15843 // We need to reconstruct what the 128-bit register pattern would be
15844 // that produces this vector when interpreted with the current endianness
15845 APInt FullVal = APInt::getZero(128);
15846
15847 for (unsigned Index = 0; Index < NumElems; ++Index) {
15848 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
15849
15850 // Get element value as raw bits (zero-extended)
15851 uint64_t ElemValue = C->getZExtValue();
15852
15853 // Mask to element size to ensure we only get the relevant bits
15854 if (ElemBits < 64)
15855 ElemValue &= ((1ULL << ElemBits) - 1);
15856
15857 // Calculate bit position for this element in the 128-bit register
15858 unsigned BitPos =
15859 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
15860
15861 // Create APInt for the element value and shift it to correct position
15862 APInt ElemAPInt(128, ElemValue);
15863 ElemAPInt <<= BitPos;
15864
15865 // Place the element value at the correct bit position
15866 FullVal |= ElemAPInt;
15867 }
15868
15869 if (FullVal.isZero() || FullVal.isAllOnes())
15870 return SDValue();
15871
15872 if (auto UIMOpt = getPatternInfo(FullVal)) {
15873 const auto &[Uim, ShiftAmount] = *UIMOpt;
15874 SDLoc Dl(Op);
15875
15876 // Generate LXVKQ instruction if the shift amount is zero.
15877 if (ShiftAmount == 0) {
15878 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
15879 SDValue LxvkqInstr =
15880 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
15882 << "combineBVLoadsSpecialValue: Instruction Emitted ";
15883 LxvkqInstr.dump());
15884 return LxvkqInstr;
15885 }
15886
15887 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
15888
15889 // The right shifted pattern can be constructed using a combination of
15890 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15891 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
15892 // value 255.
15893 SDValue ShiftAmountVec =
15894 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
15895 DAG.getTargetConstant(255, Dl, MVT::i32)),
15896 0);
15897 // Generate appropriate right shift instruction
15898 SDValue ShiftVec = SDValue(
15899 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
15900 0);
15902 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
15903 ShiftVec.dump());
15904 return ShiftVec;
15905 }
15906 // No patterns matched for build vectors.
15907 return SDValue();
15908}
15909
15910/// Reduce the number of loads when building a vector.
15911///
15912/// Building a vector out of multiple loads can be converted to a load
15913/// of the vector type if the loads are consecutive. If the loads are
15914/// consecutive but in descending order, a shuffle is added at the end
15915/// to reorder the vector.
15917 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15918 "Should be called with a BUILD_VECTOR node");
15919
15920 SDLoc dl(N);
15921
15922 // Return early for non byte-sized type, as they can't be consecutive.
15923 if (!N->getValueType(0).getVectorElementType().isByteSized())
15924 return SDValue();
15925
15926 bool InputsAreConsecutiveLoads = true;
15927 bool InputsAreReverseConsecutive = true;
15928 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15929 SDValue FirstInput = N->getOperand(0);
15930 bool IsRoundOfExtLoad = false;
15931 LoadSDNode *FirstLoad = nullptr;
15932
15933 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15934 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15935 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15936 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15937 }
15938 // Not a build vector of (possibly fp_rounded) loads.
15939 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15940 N->getNumOperands() == 1)
15941 return SDValue();
15942
15943 if (!IsRoundOfExtLoad)
15944 FirstLoad = cast<LoadSDNode>(FirstInput);
15945
15947 InputLoads.push_back(FirstLoad);
15948 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15949 // If any inputs are fp_round(extload), they all must be.
15950 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15951 return SDValue();
15952
15953 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15954 N->getOperand(i);
15955 if (NextInput.getOpcode() != ISD::LOAD)
15956 return SDValue();
15957
15958 SDValue PreviousInput =
15959 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15960 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15961 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15962
15963 // If any inputs are fp_round(extload), they all must be.
15964 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15965 return SDValue();
15966
15967 // We only care about regular loads. The PPC-specific load intrinsics
15968 // will not lead to a merge opportunity.
15969 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15970 InputsAreConsecutiveLoads = false;
15971 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15972 InputsAreReverseConsecutive = false;
15973
15974 // Exit early if the loads are neither consecutive nor reverse consecutive.
15975 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15976 return SDValue();
15977 InputLoads.push_back(LD2);
15978 }
15979
15980 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15981 "The loads cannot be both consecutive and reverse consecutive.");
15982
15983 SDValue WideLoad;
15984 SDValue ReturnSDVal;
15985 if (InputsAreConsecutiveLoads) {
15986 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15987 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15988 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15989 FirstLoad->getAlign());
15990 ReturnSDVal = WideLoad;
15991 } else if (InputsAreReverseConsecutive) {
15992 LoadSDNode *LastLoad = InputLoads.back();
15993 assert(LastLoad && "Input needs to be a LoadSDNode.");
15994 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15995 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15996 LastLoad->getAlign());
15998 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15999 Ops.push_back(i);
16000
16001 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16002 DAG.getUNDEF(N->getValueType(0)), Ops);
16003 } else
16004 return SDValue();
16005
16006 for (auto *LD : InputLoads)
16007 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16008 return ReturnSDVal;
16009}
16010
16011// This function adds the required vector_shuffle needed to get
16012// the elements of the vector extract in the correct position
16013// as specified by the CorrectElems encoding.
16015 SDValue Input, uint64_t Elems,
16016 uint64_t CorrectElems) {
16017 SDLoc dl(N);
16018
16019 unsigned NumElems = Input.getValueType().getVectorNumElements();
16020 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16021
16022 // Knowing the element indices being extracted from the original
16023 // vector and the order in which they're being inserted, just put
16024 // them at element indices required for the instruction.
16025 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16026 if (DAG.getDataLayout().isLittleEndian())
16027 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16028 else
16029 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16030 CorrectElems = CorrectElems >> 8;
16031 Elems = Elems >> 8;
16032 }
16033
16034 SDValue Shuffle =
16035 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16036 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16037
16038 EVT VT = N->getValueType(0);
16039 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16040
16041 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16042 Input.getValueType().getVectorElementType(),
16044 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16045 DAG.getValueType(ExtVT));
16046}
16047
16048// Look for build vector patterns where input operands come from sign
16049// extended vector_extract elements of specific indices. If the correct indices
16050// aren't used, add a vector shuffle to fix up the indices and create
16051// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16052// during instruction selection.
16054 // This array encodes the indices that the vector sign extend instructions
16055 // extract from when extending from one type to another for both BE and LE.
16056 // The right nibble of each byte corresponds to the LE incides.
16057 // and the left nibble of each byte corresponds to the BE incides.
16058 // For example: 0x3074B8FC byte->word
16059 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16060 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16061 // For example: 0x000070F8 byte->double word
16062 // For LE: the allowed indices are: 0x0,0x8
16063 // For BE: the allowed indices are: 0x7,0xF
16064 uint64_t TargetElems[] = {
16065 0x3074B8FC, // b->w
16066 0x000070F8, // b->d
16067 0x10325476, // h->w
16068 0x00003074, // h->d
16069 0x00001032, // w->d
16070 };
16071
16072 uint64_t Elems = 0;
16073 int Index;
16074 SDValue Input;
16075
16076 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16077 if (!Op)
16078 return false;
16079 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16080 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16081 return false;
16082
16083 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16084 // of the right width.
16085 SDValue Extract = Op.getOperand(0);
16086 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16087 Extract = Extract.getOperand(0);
16088 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16089 return false;
16090
16092 if (!ExtOp)
16093 return false;
16094
16095 Index = ExtOp->getZExtValue();
16096 if (Input && Input != Extract.getOperand(0))
16097 return false;
16098
16099 if (!Input)
16100 Input = Extract.getOperand(0);
16101
16102 Elems = Elems << 8;
16103 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16104 Elems |= Index;
16105
16106 return true;
16107 };
16108
16109 // If the build vector operands aren't sign extended vector extracts,
16110 // of the same input vector, then return.
16111 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16112 if (!isSExtOfVecExtract(N->getOperand(i))) {
16113 return SDValue();
16114 }
16115 }
16116
16117 // If the vector extract indices are not correct, add the appropriate
16118 // vector_shuffle.
16119 int TgtElemArrayIdx;
16120 int InputSize = Input.getValueType().getScalarSizeInBits();
16121 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16122 if (InputSize + OutputSize == 40)
16123 TgtElemArrayIdx = 0;
16124 else if (InputSize + OutputSize == 72)
16125 TgtElemArrayIdx = 1;
16126 else if (InputSize + OutputSize == 48)
16127 TgtElemArrayIdx = 2;
16128 else if (InputSize + OutputSize == 80)
16129 TgtElemArrayIdx = 3;
16130 else if (InputSize + OutputSize == 96)
16131 TgtElemArrayIdx = 4;
16132 else
16133 return SDValue();
16134
16135 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16136 CorrectElems = DAG.getDataLayout().isLittleEndian()
16137 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16138 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16139 if (Elems != CorrectElems) {
16140 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16141 }
16142
16143 // Regular lowering will catch cases where a shuffle is not needed.
16144 return SDValue();
16145}
16146
16147// Look for the pattern of a load from a narrow width to i128, feeding
16148// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16149// (LXVRZX). This node represents a zero extending load that will be matched
16150// to the Load VSX Vector Rightmost instructions.
16152 SDLoc DL(N);
16153
16154 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16155 if (N->getValueType(0) != MVT::v1i128)
16156 return SDValue();
16157
16158 SDValue Operand = N->getOperand(0);
16159 // Proceed with the transformation if the operand to the BUILD_VECTOR
16160 // is a load instruction.
16161 if (Operand.getOpcode() != ISD::LOAD)
16162 return SDValue();
16163
16164 auto *LD = cast<LoadSDNode>(Operand);
16165 EVT MemoryType = LD->getMemoryVT();
16166
16167 // This transformation is only valid if the we are loading either a byte,
16168 // halfword, word, or doubleword.
16169 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16170 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16171
16172 // Ensure that the load from the narrow width is being zero extended to i128.
16173 if (!ValidLDType ||
16174 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16175 LD->getExtensionType() != ISD::EXTLOAD))
16176 return SDValue();
16177
16178 SDValue LoadOps[] = {
16179 LD->getChain(), LD->getBasePtr(),
16180 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16181
16182 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16183 DAG.getVTList(MVT::v1i128, MVT::Other),
16184 LoadOps, MemoryType, LD->getMemOperand());
16185}
16186
16187SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16188 DAGCombinerInfo &DCI) const {
16189 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16190 "Should be called with a BUILD_VECTOR node");
16191
16192 SelectionDAG &DAG = DCI.DAG;
16193 SDLoc dl(N);
16194
16195 if (!Subtarget.hasVSX())
16196 return SDValue();
16197
16198 // The target independent DAG combiner will leave a build_vector of
16199 // float-to-int conversions intact. We can generate MUCH better code for
16200 // a float-to-int conversion of a vector of floats.
16201 SDValue FirstInput = N->getOperand(0);
16202 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16203 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16204 if (Reduced)
16205 return Reduced;
16206 }
16207
16208 // If we're building a vector out of consecutive loads, just load that
16209 // vector type.
16210 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16211 if (Reduced)
16212 return Reduced;
16213
16214 // If we're building a vector out of extended elements from another vector
16215 // we have P9 vector integer extend instructions. The code assumes legal
16216 // input types (i.e. it can't handle things like v4i16) so do not run before
16217 // legalization.
16218 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16219 Reduced = combineBVOfVecSExt(N, DAG);
16220 if (Reduced)
16221 return Reduced;
16222 }
16223
16224 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16225 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16226 // is a load from <valid narrow width> to i128.
16227 if (Subtarget.isISA3_1()) {
16228 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16229 if (BVOfZLoad)
16230 return BVOfZLoad;
16231 }
16232
16233 if (N->getValueType(0) != MVT::v2f64)
16234 return SDValue();
16235
16236 // Looking for:
16237 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16238 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16239 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16240 return SDValue();
16241 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16242 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16243 return SDValue();
16244 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16245 return SDValue();
16246
16247 SDValue Ext1 = FirstInput.getOperand(0);
16248 SDValue Ext2 = N->getOperand(1).getOperand(0);
16249 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16251 return SDValue();
16252
16253 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16254 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16255 if (!Ext1Op || !Ext2Op)
16256 return SDValue();
16257 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16258 Ext1.getOperand(0) != Ext2.getOperand(0))
16259 return SDValue();
16260
16261 int FirstElem = Ext1Op->getZExtValue();
16262 int SecondElem = Ext2Op->getZExtValue();
16263 int SubvecIdx;
16264 if (FirstElem == 0 && SecondElem == 1)
16265 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16266 else if (FirstElem == 2 && SecondElem == 3)
16267 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16268 else
16269 return SDValue();
16270
16271 SDValue SrcVec = Ext1.getOperand(0);
16272 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16273 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16274 return DAG.getNode(NodeType, dl, MVT::v2f64,
16275 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16276}
16277
16278SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16279 DAGCombinerInfo &DCI) const {
16280 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16281 N->getOpcode() == ISD::UINT_TO_FP) &&
16282 "Need an int -> FP conversion node here");
16283
16284 if (useSoftFloat() || !Subtarget.has64BitSupport())
16285 return SDValue();
16286
16287 SelectionDAG &DAG = DCI.DAG;
16288 SDLoc dl(N);
16289 SDValue Op(N, 0);
16290
16291 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16292 // from the hardware.
16293 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16294 return SDValue();
16295 if (!Op.getOperand(0).getValueType().isSimple())
16296 return SDValue();
16297 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16298 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16299 return SDValue();
16300
16301 SDValue FirstOperand(Op.getOperand(0));
16302 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16303 (FirstOperand.getValueType() == MVT::i8 ||
16304 FirstOperand.getValueType() == MVT::i16);
16305 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16306 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16307 bool DstDouble = Op.getValueType() == MVT::f64;
16308 unsigned ConvOp = Signed ?
16309 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16310 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16311 SDValue WidthConst =
16312 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16313 dl, false);
16314 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16315 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16316 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16317 DAG.getVTList(MVT::f64, MVT::Other),
16318 Ops, MVT::i8, LDN->getMemOperand());
16319 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16320
16321 // For signed conversion, we need to sign-extend the value in the VSR
16322 if (Signed) {
16323 SDValue ExtOps[] = { Ld, WidthConst };
16324 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16325 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16326 } else
16327 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16328 }
16329
16330
16331 // For i32 intermediate values, unfortunately, the conversion functions
16332 // leave the upper 32 bits of the value are undefined. Within the set of
16333 // scalar instructions, we have no method for zero- or sign-extending the
16334 // value. Thus, we cannot handle i32 intermediate values here.
16335 if (Op.getOperand(0).getValueType() == MVT::i32)
16336 return SDValue();
16337
16338 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16339 "UINT_TO_FP is supported only with FPCVT");
16340
16341 // If we have FCFIDS, then use it when converting to single-precision.
16342 // Otherwise, convert to double-precision and then round.
16343 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16344 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16345 : PPCISD::FCFIDS)
16346 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16347 : PPCISD::FCFID);
16348 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16349 ? MVT::f32
16350 : MVT::f64;
16351
16352 // If we're converting from a float, to an int, and back to a float again,
16353 // then we don't need the store/load pair at all.
16354 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16355 Subtarget.hasFPCVT()) ||
16356 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16357 SDValue Src = Op.getOperand(0).getOperand(0);
16358 if (Src.getValueType() == MVT::f32) {
16359 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16360 DCI.AddToWorklist(Src.getNode());
16361 } else if (Src.getValueType() != MVT::f64) {
16362 // Make sure that we don't pick up a ppc_fp128 source value.
16363 return SDValue();
16364 }
16365
16366 unsigned FCTOp =
16367 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16368 PPCISD::FCTIDUZ;
16369
16370 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16371 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16372
16373 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16374 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16375 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16376 DCI.AddToWorklist(FP.getNode());
16377 }
16378
16379 return FP;
16380 }
16381
16382 return SDValue();
16383}
16384
16385// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16386// builtins) into loads with swaps.
16388 DAGCombinerInfo &DCI) const {
16389 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16390 // load combines.
16391 if (DCI.isBeforeLegalizeOps())
16392 return SDValue();
16393
16394 SelectionDAG &DAG = DCI.DAG;
16395 SDLoc dl(N);
16396 SDValue Chain;
16397 SDValue Base;
16398 MachineMemOperand *MMO;
16399
16400 switch (N->getOpcode()) {
16401 default:
16402 llvm_unreachable("Unexpected opcode for little endian VSX load");
16403 case ISD::LOAD: {
16405 Chain = LD->getChain();
16406 Base = LD->getBasePtr();
16407 MMO = LD->getMemOperand();
16408 // If the MMO suggests this isn't a load of a full vector, leave
16409 // things alone. For a built-in, we have to make the change for
16410 // correctness, so if there is a size problem that will be a bug.
16411 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16412 return SDValue();
16413 break;
16414 }
16417 Chain = Intrin->getChain();
16418 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16419 // us what we want. Get operand 2 instead.
16420 Base = Intrin->getOperand(2);
16421 MMO = Intrin->getMemOperand();
16422 break;
16423 }
16424 }
16425
16426 MVT VecTy = N->getValueType(0).getSimpleVT();
16427
16428 SDValue LoadOps[] = { Chain, Base };
16429 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16430 DAG.getVTList(MVT::v2f64, MVT::Other),
16431 LoadOps, MVT::v2f64, MMO);
16432
16433 DCI.AddToWorklist(Load.getNode());
16434 Chain = Load.getValue(1);
16435 SDValue Swap = DAG.getNode(
16436 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16437 DCI.AddToWorklist(Swap.getNode());
16438
16439 // Add a bitcast if the resulting load type doesn't match v2f64.
16440 if (VecTy != MVT::v2f64) {
16441 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16442 DCI.AddToWorklist(N.getNode());
16443 // Package {bitcast value, swap's chain} to match Load's shape.
16444 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16445 N, Swap.getValue(1));
16446 }
16447
16448 return Swap;
16449}
16450
16451// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16452// builtins) into stores with swaps.
16454 DAGCombinerInfo &DCI) const {
16455 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16456 // store combines.
16457 if (DCI.isBeforeLegalizeOps())
16458 return SDValue();
16459
16460 SelectionDAG &DAG = DCI.DAG;
16461 SDLoc dl(N);
16462 SDValue Chain;
16463 SDValue Base;
16464 unsigned SrcOpnd;
16465 MachineMemOperand *MMO;
16466
16467 switch (N->getOpcode()) {
16468 default:
16469 llvm_unreachable("Unexpected opcode for little endian VSX store");
16470 case ISD::STORE: {
16472 Chain = ST->getChain();
16473 Base = ST->getBasePtr();
16474 MMO = ST->getMemOperand();
16475 SrcOpnd = 1;
16476 // If the MMO suggests this isn't a store of a full vector, leave
16477 // things alone. For a built-in, we have to make the change for
16478 // correctness, so if there is a size problem that will be a bug.
16479 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16480 return SDValue();
16481 break;
16482 }
16483 case ISD::INTRINSIC_VOID: {
16485 Chain = Intrin->getChain();
16486 // Intrin->getBasePtr() oddly does not get what we want.
16487 Base = Intrin->getOperand(3);
16488 MMO = Intrin->getMemOperand();
16489 SrcOpnd = 2;
16490 break;
16491 }
16492 }
16493
16494 SDValue Src = N->getOperand(SrcOpnd);
16495 MVT VecTy = Src.getValueType().getSimpleVT();
16496
16497 // All stores are done as v2f64 and possible bit cast.
16498 if (VecTy != MVT::v2f64) {
16499 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16500 DCI.AddToWorklist(Src.getNode());
16501 }
16502
16503 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16504 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16505 DCI.AddToWorklist(Swap.getNode());
16506 Chain = Swap.getValue(1);
16507 SDValue StoreOps[] = { Chain, Swap, Base };
16508 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16509 DAG.getVTList(MVT::Other),
16510 StoreOps, VecTy, MMO);
16511 DCI.AddToWorklist(Store.getNode());
16512 return Store;
16513}
16514
16515// Handle DAG combine for STORE (FP_TO_INT F).
16516SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16517 DAGCombinerInfo &DCI) const {
16518 SelectionDAG &DAG = DCI.DAG;
16519 SDLoc dl(N);
16520 unsigned Opcode = N->getOperand(1).getOpcode();
16521 (void)Opcode;
16522 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16523
16524 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16525 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16526 && "Not a FP_TO_INT Instruction!");
16527
16528 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16529 EVT Op1VT = N->getOperand(1).getValueType();
16530 EVT ResVT = Val.getValueType();
16531
16532 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16533 return SDValue();
16534
16535 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16536 bool ValidTypeForStoreFltAsInt =
16537 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16538 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16539
16540 // TODO: Lower conversion from f128 on all VSX targets
16541 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16542 return SDValue();
16543
16544 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16545 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16546 return SDValue();
16547
16548 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16549
16550 // Set number of bytes being converted.
16551 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16552 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16553 DAG.getIntPtrConstant(ByteSize, dl, false),
16554 DAG.getValueType(Op1VT)};
16555
16556 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
16557 DAG.getVTList(MVT::Other), Ops,
16558 cast<StoreSDNode>(N)->getMemoryVT(),
16559 cast<StoreSDNode>(N)->getMemOperand());
16560
16561 return Val;
16562}
16563
16564static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16565 // Check that the source of the element keeps flipping
16566 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16567 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16568 for (int i = 1, e = Mask.size(); i < e; i++) {
16569 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16570 return false;
16571 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16572 return false;
16573 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16574 }
16575 return true;
16576}
16577
16578static bool isSplatBV(SDValue Op) {
16579 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16580 return false;
16581 SDValue FirstOp;
16582
16583 // Find first non-undef input.
16584 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16585 FirstOp = Op.getOperand(i);
16586 if (!FirstOp.isUndef())
16587 break;
16588 }
16589
16590 // All inputs are undef or the same as the first non-undef input.
16591 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16592 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16593 return false;
16594 return true;
16595}
16596
16598 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16599 return Op;
16600 if (Op.getOpcode() != ISD::BITCAST)
16601 return SDValue();
16602 Op = Op.getOperand(0);
16603 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16604 return Op;
16605 return SDValue();
16606}
16607
16608// Fix up the shuffle mask to account for the fact that the result of
16609// scalar_to_vector is not in lane zero. This just takes all values in
16610// the ranges specified by the min/max indices and adds the number of
16611// elements required to ensure each element comes from the respective
16612// position in the valid lane.
16613// On little endian, that's just the corresponding element in the other
16614// half of the vector. On big endian, it is in the same half but right
16615// justified rather than left justified in that half.
16617 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16618 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16619 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16620 int LHSEltFixup =
16621 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16622 int RHSEltFixup =
16623 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16624 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16625 int Idx = ShuffV[I];
16626 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16627 ShuffV[I] += LHSEltFixup;
16628 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16629 ShuffV[I] += RHSEltFixup;
16630 }
16631}
16632
16633// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16634// the original is:
16635// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16636// In such a case, just change the shuffle mask to extract the element
16637// from the permuted index.
16639 const PPCSubtarget &Subtarget) {
16640 SDLoc dl(OrigSToV);
16641 EVT VT = OrigSToV.getValueType();
16642 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16643 "Expecting a SCALAR_TO_VECTOR here");
16644 SDValue Input = OrigSToV.getOperand(0);
16645
16646 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16647 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16648 SDValue OrigVector = Input.getOperand(0);
16649
16650 // Can't handle non-const element indices or different vector types
16651 // for the input to the extract and the output of the scalar_to_vector.
16652 if (Idx && VT == OrigVector.getValueType()) {
16653 unsigned NumElts = VT.getVectorNumElements();
16654 assert(
16655 NumElts > 1 &&
16656 "Cannot produce a permuted scalar_to_vector for one element vector");
16657 SmallVector<int, 16> NewMask(NumElts, -1);
16658 unsigned ResultInElt = NumElts / 2;
16659 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16660 NewMask[ResultInElt] = Idx->getZExtValue();
16661 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16662 }
16663 }
16664 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16665 OrigSToV.getOperand(0));
16666}
16667
16669 int HalfVec, int LHSLastElementDefined,
16670 int RHSLastElementDefined) {
16671 for (int Index : ShuffV) {
16672 if (Index < 0) // Skip explicitly undefined mask indices.
16673 continue;
16674 // Handle first input vector of the vector_shuffle.
16675 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16676 (Index > LHSLastElementDefined))
16677 return false;
16678 // Handle second input vector of the vector_shuffle.
16679 if ((RHSLastElementDefined >= 0) &&
16680 (Index > HalfVec + RHSLastElementDefined))
16681 return false;
16682 }
16683 return true;
16684}
16685
16687 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16688 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16689 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16690 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16691 // Set up the values for the shuffle vector fixup.
16692 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16693 // The last element depends on if the input comes from the LHS or RHS.
16694 //
16695 // For example:
16696 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16697 //
16698 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16699 // because elements 1 and higher of a scalar_to_vector are undefined.
16700 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16701 // because elements 1 and higher of a scalar_to_vector are undefined.
16702 // It is also not 4 because the original scalar_to_vector is wider and
16703 // actually contains two i32 elements.
16704 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16705 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16706 : FirstElt;
16707 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16708 if (SToVPermuted.getValueType() != VecShuffOperandType)
16709 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16710 return SToVPermuted;
16711}
16712
16713// On little endian subtargets, combine shuffles such as:
16714// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16715// into:
16716// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16717// because the latter can be matched to a single instruction merge.
16718// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16719// to put the value into element zero. Adjust the shuffle mask so that the
16720// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16721// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16722// nodes with elements smaller than doubleword because all the ways
16723// of getting scalar data into a vector register put the value in the
16724// rightmost element of the left half of the vector.
16725SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16726 SelectionDAG &DAG) const {
16727 SDValue LHS = SVN->getOperand(0);
16728 SDValue RHS = SVN->getOperand(1);
16729 auto Mask = SVN->getMask();
16730 int NumElts = LHS.getValueType().getVectorNumElements();
16731 SDValue Res(SVN, 0);
16732 SDLoc dl(SVN);
16733 bool IsLittleEndian = Subtarget.isLittleEndian();
16734
16735 // On big endian targets this is only useful for subtargets with direct moves.
16736 // On little endian targets it would be useful for all subtargets with VSX.
16737 // However adding special handling for LE subtargets without direct moves
16738 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16739 // which includes direct moves.
16740 if (!Subtarget.hasDirectMove())
16741 return Res;
16742
16743 // If this is not a shuffle of a shuffle and the first element comes from
16744 // the second vector, canonicalize to the commuted form. This will make it
16745 // more likely to match one of the single instruction patterns.
16746 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16747 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16748 std::swap(LHS, RHS);
16749 Res = DAG.getCommutedVectorShuffle(*SVN);
16750 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16751 }
16752
16753 // Adjust the shuffle mask if either input vector comes from a
16754 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16755 // form (to prevent the need for a swap).
16756 SmallVector<int, 16> ShuffV(Mask);
16757 SDValue SToVLHS = isScalarToVec(LHS);
16758 SDValue SToVRHS = isScalarToVec(RHS);
16759 if (SToVLHS || SToVRHS) {
16760 EVT VT = SVN->getValueType(0);
16761 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16762 int ShuffleNumElts = ShuffV.size();
16763 int HalfVec = ShuffleNumElts / 2;
16764 // The width of the "valid lane" (i.e. the lane that contains the value that
16765 // is vectorized) needs to be expressed in terms of the number of elements
16766 // of the shuffle. It is thereby the ratio of the values before and after
16767 // any bitcast, which will be set later on if the LHS or RHS are
16768 // SCALAR_TO_VECTOR nodes.
16769 unsigned LHSNumValidElts = HalfVec;
16770 unsigned RHSNumValidElts = HalfVec;
16771
16772 // Initially assume that neither input is permuted. These will be adjusted
16773 // accordingly if either input is. Note, that -1 means that all elements
16774 // are undefined.
16775 int LHSFirstElt = 0;
16776 int RHSFirstElt = ShuffleNumElts;
16777 int LHSLastElt = -1;
16778 int RHSLastElt = -1;
16779
16780 // Get the permuted scalar to vector nodes for the source(s) that come from
16781 // ISD::SCALAR_TO_VECTOR.
16782 // On big endian systems, this only makes sense for element sizes smaller
16783 // than 64 bits since for 64-bit elements, all instructions already put
16784 // the value into element zero. Since scalar size of LHS and RHS may differ
16785 // after isScalarToVec, this should be checked using their own sizes.
16786 int LHSScalarSize = 0;
16787 int RHSScalarSize = 0;
16788 if (SToVLHS) {
16789 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16790 if (!IsLittleEndian && LHSScalarSize >= 64)
16791 return Res;
16792 }
16793 if (SToVRHS) {
16794 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16795 if (!IsLittleEndian && RHSScalarSize >= 64)
16796 return Res;
16797 }
16798 if (LHSScalarSize != 0)
16800 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
16801 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
16802 if (RHSScalarSize != 0)
16804 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
16805 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
16806
16807 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
16808 return Res;
16809
16810 // Fix up the shuffle mask to reflect where the desired element actually is.
16811 // The minimum and maximum indices that correspond to element zero for both
16812 // the LHS and RHS are computed and will control which shuffle mask entries
16813 // are to be changed. For example, if the RHS is permuted, any shuffle mask
16814 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16816 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16817 LHSNumValidElts, RHSNumValidElts, Subtarget);
16818 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16819
16820 // We may have simplified away the shuffle. We won't be able to do anything
16821 // further with it here.
16822 if (!isa<ShuffleVectorSDNode>(Res))
16823 return Res;
16824 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16825 }
16826
16827 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16828 // The common case after we commuted the shuffle is that the RHS is a splat
16829 // and we have elements coming in from the splat at indices that are not
16830 // conducive to using a merge.
16831 // Example:
16832 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16833 if (!isSplatBV(TheSplat))
16834 return Res;
16835
16836 // We are looking for a mask such that all even elements are from
16837 // one vector and all odd elements from the other.
16838 if (!isAlternatingShuffMask(Mask, NumElts))
16839 return Res;
16840
16841 // Adjust the mask so we are pulling in the same index from the splat
16842 // as the index from the interesting vector in consecutive elements.
16843 if (IsLittleEndian) {
16844 // Example (even elements from first vector):
16845 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16846 if (Mask[0] < NumElts)
16847 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16848 if (ShuffV[i] < 0)
16849 continue;
16850 // If element from non-splat is undef, pick first element from splat.
16851 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16852 }
16853 // Example (odd elements from first vector):
16854 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16855 else
16856 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16857 if (ShuffV[i] < 0)
16858 continue;
16859 // If element from non-splat is undef, pick first element from splat.
16860 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16861 }
16862 } else {
16863 // Example (even elements from first vector):
16864 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16865 if (Mask[0] < NumElts)
16866 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16867 if (ShuffV[i] < 0)
16868 continue;
16869 // If element from non-splat is undef, pick first element from splat.
16870 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16871 }
16872 // Example (odd elements from first vector):
16873 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16874 else
16875 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16876 if (ShuffV[i] < 0)
16877 continue;
16878 // If element from non-splat is undef, pick first element from splat.
16879 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16880 }
16881 }
16882
16883 // If the RHS has undefs, we need to remove them since we may have created
16884 // a shuffle that adds those instead of the splat value.
16885 SDValue SplatVal =
16886 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16887 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16888
16889 if (IsLittleEndian)
16890 RHS = TheSplat;
16891 else
16892 LHS = TheSplat;
16893 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16894}
16895
16896SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16897 LSBaseSDNode *LSBase,
16898 DAGCombinerInfo &DCI) const {
16899 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16900 "Not a reverse memop pattern!");
16901
16902 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16903 auto Mask = SVN->getMask();
16904 int i = 0;
16905 auto I = Mask.rbegin();
16906 auto E = Mask.rend();
16907
16908 for (; I != E; ++I) {
16909 if (*I != i)
16910 return false;
16911 i++;
16912 }
16913 return true;
16914 };
16915
16916 SelectionDAG &DAG = DCI.DAG;
16917 EVT VT = SVN->getValueType(0);
16918
16919 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16920 return SDValue();
16921
16922 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16923 // See comment in PPCVSXSwapRemoval.cpp.
16924 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16925 if (!Subtarget.hasP9Vector())
16926 return SDValue();
16927
16928 if(!IsElementReverse(SVN))
16929 return SDValue();
16930
16931 if (LSBase->getOpcode() == ISD::LOAD) {
16932 // If the load return value 0 has more than one user except the
16933 // shufflevector instruction, it is not profitable to replace the
16934 // shufflevector with a reverse load.
16935 for (SDUse &Use : LSBase->uses())
16936 if (Use.getResNo() == 0 &&
16937 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16938 return SDValue();
16939
16940 SDLoc dl(LSBase);
16941 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16942 return DAG.getMemIntrinsicNode(
16943 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16944 LSBase->getMemoryVT(), LSBase->getMemOperand());
16945 }
16946
16947 if (LSBase->getOpcode() == ISD::STORE) {
16948 // If there are other uses of the shuffle, the swap cannot be avoided.
16949 // Forcing the use of an X-Form (since swapped stores only have
16950 // X-Forms) without removing the swap is unprofitable.
16951 if (!SVN->hasOneUse())
16952 return SDValue();
16953
16954 SDLoc dl(LSBase);
16955 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16956 LSBase->getBasePtr()};
16957 return DAG.getMemIntrinsicNode(
16958 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16959 LSBase->getMemoryVT(), LSBase->getMemOperand());
16960 }
16961
16962 llvm_unreachable("Expected a load or store node here");
16963}
16964
16965static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16966 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16967 if (IntrinsicID == Intrinsic::ppc_stdcx)
16968 StoreWidth = 8;
16969 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16970 StoreWidth = 4;
16971 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16972 StoreWidth = 2;
16973 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16974 StoreWidth = 1;
16975 else
16976 return false;
16977 return true;
16978}
16979
16982 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
16983 // (ADDC (ADDE 0, 0, C), -1) -> C
16984 SDValue LHS = N->getOperand(0);
16985 SDValue RHS = N->getOperand(1);
16986 if (LHS->getOpcode() == PPCISD::ADDE &&
16987 isNullConstant(LHS->getOperand(0)) &&
16988 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
16989 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
16990 }
16991 }
16992 return SDValue();
16993}
16994
16996 DAGCombinerInfo &DCI) const {
16997 SelectionDAG &DAG = DCI.DAG;
16998 SDLoc dl(N);
16999 switch (N->getOpcode()) {
17000 default: break;
17001 case ISD::ADD:
17002 return combineADD(N, DCI);
17003 case ISD::AND: {
17004 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17005 // original input as that will prevent us from selecting optimal rotates.
17006 // This only matters if the input to the extend is i32 widened to i64.
17007 SDValue Op1 = N->getOperand(0);
17008 SDValue Op2 = N->getOperand(1);
17009 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17010 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17011 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17012 Op1.getOperand(0).getValueType() != MVT::i32)
17013 break;
17014 SDValue NarrowOp = Op1.getOperand(0);
17015 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17016 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17017 break;
17018
17019 uint64_t Imm = Op2->getAsZExtVal();
17020 // Make sure that the constant is narrow enough to fit in the narrow type.
17021 if (!isUInt<32>(Imm))
17022 break;
17023 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17024 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17025 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17026 }
17027 case ISD::SHL:
17028 return combineSHL(N, DCI);
17029 case ISD::SRA:
17030 return combineSRA(N, DCI);
17031 case ISD::SRL:
17032 return combineSRL(N, DCI);
17033 case ISD::MUL:
17034 return combineMUL(N, DCI);
17035 case ISD::FMA:
17036 case PPCISD::FNMSUB:
17037 return combineFMALike(N, DCI);
17038 case PPCISD::SHL:
17039 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17040 return N->getOperand(0);
17041 break;
17042 case PPCISD::SRL:
17043 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17044 return N->getOperand(0);
17045 break;
17046 case PPCISD::SRA:
17047 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17048 if (C->isZero() || // 0 >>s V -> 0.
17049 C->isAllOnes()) // -1 >>s V -> -1.
17050 return N->getOperand(0);
17051 }
17052 break;
17053 case ISD::SIGN_EXTEND:
17054 case ISD::ZERO_EXTEND:
17055 case ISD::ANY_EXTEND:
17056 return DAGCombineExtBoolTrunc(N, DCI);
17057 case ISD::TRUNCATE:
17058 return combineTRUNCATE(N, DCI);
17059 case ISD::SETCC:
17060 if (SDValue CSCC = combineSetCC(N, DCI))
17061 return CSCC;
17062 [[fallthrough]];
17063 case ISD::SELECT_CC:
17064 return DAGCombineTruncBoolExt(N, DCI);
17065 case ISD::SINT_TO_FP:
17066 case ISD::UINT_TO_FP:
17067 return combineFPToIntToFP(N, DCI);
17069 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17070 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17071 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17072 }
17073 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17074 case ISD::STORE: {
17075
17076 EVT Op1VT = N->getOperand(1).getValueType();
17077 unsigned Opcode = N->getOperand(1).getOpcode();
17078
17079 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17080 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17081 SDValue Val = combineStoreFPToInt(N, DCI);
17082 if (Val)
17083 return Val;
17084 }
17085
17086 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17087 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17088 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17089 if (Val)
17090 return Val;
17091 }
17092
17093 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17094 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17095 N->getOperand(1).getNode()->hasOneUse() &&
17096 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17097 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17098
17099 // STBRX can only handle simple types and it makes no sense to store less
17100 // two bytes in byte-reversed order.
17101 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17102 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17103 break;
17104
17105 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17106 // Do an any-extend to 32-bits if this is a half-word input.
17107 if (BSwapOp.getValueType() == MVT::i16)
17108 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17109
17110 // If the type of BSWAP operand is wider than stored memory width
17111 // it need to be shifted to the right side before STBRX.
17112 if (Op1VT.bitsGT(mVT)) {
17113 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17114 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17115 DAG.getConstant(Shift, dl, MVT::i32));
17116 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17117 if (Op1VT == MVT::i64)
17118 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17119 }
17120
17121 SDValue Ops[] = {
17122 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17123 };
17124 return
17125 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17126 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17127 cast<StoreSDNode>(N)->getMemOperand());
17128 }
17129
17130 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17131 // So it can increase the chance of CSE constant construction.
17132 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17133 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17134 // Need to sign-extended to 64-bits to handle negative values.
17135 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17136 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17137 MemVT.getSizeInBits());
17138 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17139
17140 auto *ST = cast<StoreSDNode>(N);
17141 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17142 ST->getBasePtr(), ST->getOffset(), MemVT,
17143 ST->getMemOperand(), ST->getAddressingMode(),
17144 /*IsTruncating=*/true);
17145 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17146 // new store which will change the constant by removing non-demanded bits.
17147 return ST->isUnindexed()
17148 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17149 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17150 }
17151
17152 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17153 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17154 if (Op1VT.isSimple()) {
17155 MVT StoreVT = Op1VT.getSimpleVT();
17156 if (Subtarget.needsSwapsForVSXMemOps() &&
17157 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17158 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17159 return expandVSXStoreForLE(N, DCI);
17160 }
17161 break;
17162 }
17163 case ISD::LOAD: {
17165 EVT VT = LD->getValueType(0);
17166
17167 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17168 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17169 if (VT.isSimple()) {
17170 MVT LoadVT = VT.getSimpleVT();
17171 if (Subtarget.needsSwapsForVSXMemOps() &&
17172 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17173 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17174 return expandVSXLoadForLE(N, DCI);
17175 }
17176
17177 // We sometimes end up with a 64-bit integer load, from which we extract
17178 // two single-precision floating-point numbers. This happens with
17179 // std::complex<float>, and other similar structures, because of the way we
17180 // canonicalize structure copies. However, if we lack direct moves,
17181 // then the final bitcasts from the extracted integer values to the
17182 // floating-point numbers turn into store/load pairs. Even with direct moves,
17183 // just loading the two floating-point numbers is likely better.
17184 auto ReplaceTwoFloatLoad = [&]() {
17185 if (VT != MVT::i64)
17186 return false;
17187
17188 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17189 LD->isVolatile())
17190 return false;
17191
17192 // We're looking for a sequence like this:
17193 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17194 // t16: i64 = srl t13, Constant:i32<32>
17195 // t17: i32 = truncate t16
17196 // t18: f32 = bitcast t17
17197 // t19: i32 = truncate t13
17198 // t20: f32 = bitcast t19
17199
17200 if (!LD->hasNUsesOfValue(2, 0))
17201 return false;
17202
17203 auto UI = LD->user_begin();
17204 while (UI.getUse().getResNo() != 0) ++UI;
17205 SDNode *Trunc = *UI++;
17206 while (UI.getUse().getResNo() != 0) ++UI;
17207 SDNode *RightShift = *UI;
17208 if (Trunc->getOpcode() != ISD::TRUNCATE)
17209 std::swap(Trunc, RightShift);
17210
17211 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17212 Trunc->getValueType(0) != MVT::i32 ||
17213 !Trunc->hasOneUse())
17214 return false;
17215 if (RightShift->getOpcode() != ISD::SRL ||
17216 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17217 RightShift->getConstantOperandVal(1) != 32 ||
17218 !RightShift->hasOneUse())
17219 return false;
17220
17221 SDNode *Trunc2 = *RightShift->user_begin();
17222 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17223 Trunc2->getValueType(0) != MVT::i32 ||
17224 !Trunc2->hasOneUse())
17225 return false;
17226
17227 SDNode *Bitcast = *Trunc->user_begin();
17228 SDNode *Bitcast2 = *Trunc2->user_begin();
17229
17230 if (Bitcast->getOpcode() != ISD::BITCAST ||
17231 Bitcast->getValueType(0) != MVT::f32)
17232 return false;
17233 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17234 Bitcast2->getValueType(0) != MVT::f32)
17235 return false;
17236
17237 if (Subtarget.isLittleEndian())
17238 std::swap(Bitcast, Bitcast2);
17239
17240 // Bitcast has the second float (in memory-layout order) and Bitcast2
17241 // has the first one.
17242
17243 SDValue BasePtr = LD->getBasePtr();
17244 if (LD->isIndexed()) {
17245 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17246 "Non-pre-inc AM on PPC?");
17247 BasePtr =
17248 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17249 LD->getOffset());
17250 }
17251
17252 auto MMOFlags =
17253 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17254 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17255 LD->getPointerInfo(), LD->getAlign(),
17256 MMOFlags, LD->getAAInfo());
17257 SDValue AddPtr =
17258 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17259 BasePtr, DAG.getIntPtrConstant(4, dl));
17260 SDValue FloatLoad2 = DAG.getLoad(
17261 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17262 LD->getPointerInfo().getWithOffset(4),
17263 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17264
17265 if (LD->isIndexed()) {
17266 // Note that DAGCombine should re-form any pre-increment load(s) from
17267 // what is produced here if that makes sense.
17268 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17269 }
17270
17271 DCI.CombineTo(Bitcast2, FloatLoad);
17272 DCI.CombineTo(Bitcast, FloatLoad2);
17273
17274 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17275 SDValue(FloatLoad2.getNode(), 1));
17276 return true;
17277 };
17278
17279 if (ReplaceTwoFloatLoad())
17280 return SDValue(N, 0);
17281
17282 EVT MemVT = LD->getMemoryVT();
17283 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17284 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17285 if (LD->isUnindexed() && VT.isVector() &&
17286 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17287 // P8 and later hardware should just use LOAD.
17288 !Subtarget.hasP8Vector() &&
17289 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17290 VT == MVT::v4f32))) &&
17291 LD->getAlign() < ABIAlignment) {
17292 // This is a type-legal unaligned Altivec load.
17293 SDValue Chain = LD->getChain();
17294 SDValue Ptr = LD->getBasePtr();
17295 bool isLittleEndian = Subtarget.isLittleEndian();
17296
17297 // This implements the loading of unaligned vectors as described in
17298 // the venerable Apple Velocity Engine overview. Specifically:
17299 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17300 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17301 //
17302 // The general idea is to expand a sequence of one or more unaligned
17303 // loads into an alignment-based permutation-control instruction (lvsl
17304 // or lvsr), a series of regular vector loads (which always truncate
17305 // their input address to an aligned address), and a series of
17306 // permutations. The results of these permutations are the requested
17307 // loaded values. The trick is that the last "extra" load is not taken
17308 // from the address you might suspect (sizeof(vector) bytes after the
17309 // last requested load), but rather sizeof(vector) - 1 bytes after the
17310 // last requested vector. The point of this is to avoid a page fault if
17311 // the base address happened to be aligned. This works because if the
17312 // base address is aligned, then adding less than a full vector length
17313 // will cause the last vector in the sequence to be (re)loaded.
17314 // Otherwise, the next vector will be fetched as you might suspect was
17315 // necessary.
17316
17317 // We might be able to reuse the permutation generation from
17318 // a different base address offset from this one by an aligned amount.
17319 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17320 // optimization later.
17321 Intrinsic::ID Intr, IntrLD, IntrPerm;
17322 MVT PermCntlTy, PermTy, LDTy;
17323 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17324 : Intrinsic::ppc_altivec_lvsl;
17325 IntrLD = Intrinsic::ppc_altivec_lvx;
17326 IntrPerm = Intrinsic::ppc_altivec_vperm;
17327 PermCntlTy = MVT::v16i8;
17328 PermTy = MVT::v4i32;
17329 LDTy = MVT::v4i32;
17330
17331 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17332
17333 // Create the new MMO for the new base load. It is like the original MMO,
17334 // but represents an area in memory almost twice the vector size centered
17335 // on the original address. If the address is unaligned, we might start
17336 // reading up to (sizeof(vector)-1) bytes below the address of the
17337 // original unaligned load.
17339 MachineMemOperand *BaseMMO =
17340 MF.getMachineMemOperand(LD->getMemOperand(),
17341 -(int64_t)MemVT.getStoreSize()+1,
17342 2*MemVT.getStoreSize()-1);
17343
17344 // Create the new base load.
17345 SDValue LDXIntID =
17346 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17347 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17348 SDValue BaseLoad =
17350 DAG.getVTList(PermTy, MVT::Other),
17351 BaseLoadOps, LDTy, BaseMMO);
17352
17353 // Note that the value of IncOffset (which is provided to the next
17354 // load's pointer info offset value, and thus used to calculate the
17355 // alignment), and the value of IncValue (which is actually used to
17356 // increment the pointer value) are different! This is because we
17357 // require the next load to appear to be aligned, even though it
17358 // is actually offset from the base pointer by a lesser amount.
17359 int IncOffset = VT.getSizeInBits() / 8;
17360 int IncValue = IncOffset;
17361
17362 // Walk (both up and down) the chain looking for another load at the real
17363 // (aligned) offset (the alignment of the other load does not matter in
17364 // this case). If found, then do not use the offset reduction trick, as
17365 // that will prevent the loads from being later combined (as they would
17366 // otherwise be duplicates).
17367 if (!findConsecutiveLoad(LD, DAG))
17368 --IncValue;
17369
17371 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17372 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17373
17374 MachineMemOperand *ExtraMMO =
17375 MF.getMachineMemOperand(LD->getMemOperand(),
17376 1, 2*MemVT.getStoreSize()-1);
17377 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17378 SDValue ExtraLoad =
17380 DAG.getVTList(PermTy, MVT::Other),
17381 ExtraLoadOps, LDTy, ExtraMMO);
17382
17383 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17384 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17385
17386 // Because vperm has a big-endian bias, we must reverse the order
17387 // of the input vectors and complement the permute control vector
17388 // when generating little endian code. We have already handled the
17389 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17390 // and ExtraLoad here.
17391 SDValue Perm;
17392 if (isLittleEndian)
17393 Perm = BuildIntrinsicOp(IntrPerm,
17394 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17395 else
17396 Perm = BuildIntrinsicOp(IntrPerm,
17397 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17398
17399 if (VT != PermTy)
17400 Perm = Subtarget.hasAltivec()
17401 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17402 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17403 DAG.getTargetConstant(1, dl, MVT::i64));
17404 // second argument is 1 because this rounding
17405 // is always exact.
17406
17407 // The output of the permutation is our loaded result, the TokenFactor is
17408 // our new chain.
17409 DCI.CombineTo(N, Perm, TF);
17410 return SDValue(N, 0);
17411 }
17412 }
17413 break;
17415 bool isLittleEndian = Subtarget.isLittleEndian();
17416 unsigned IID = N->getConstantOperandVal(0);
17417 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17418 : Intrinsic::ppc_altivec_lvsl);
17419 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17420 SDValue Add = N->getOperand(1);
17421
17422 int Bits = 4 /* 16 byte alignment */;
17423
17424 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17425 APInt::getAllOnes(Bits /* alignment */)
17426 .zext(Add.getScalarValueSizeInBits()))) {
17427 SDNode *BasePtr = Add->getOperand(0).getNode();
17428 for (SDNode *U : BasePtr->users()) {
17429 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17430 U->getConstantOperandVal(0) == IID) {
17431 // We've found another LVSL/LVSR, and this address is an aligned
17432 // multiple of that one. The results will be the same, so use the
17433 // one we've just found instead.
17434
17435 return SDValue(U, 0);
17436 }
17437 }
17438 }
17439
17440 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17441 SDNode *BasePtr = Add->getOperand(0).getNode();
17442 for (SDNode *U : BasePtr->users()) {
17443 if (U->getOpcode() == ISD::ADD &&
17444 isa<ConstantSDNode>(U->getOperand(1)) &&
17445 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17446 (1ULL << Bits) ==
17447 0) {
17448 SDNode *OtherAdd = U;
17449 for (SDNode *V : OtherAdd->users()) {
17450 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17451 V->getConstantOperandVal(0) == IID) {
17452 return SDValue(V, 0);
17453 }
17454 }
17455 }
17456 }
17457 }
17458 }
17459
17460 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17461 // Expose the vabsduw/h/b opportunity for down stream
17462 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17463 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17464 IID == Intrinsic::ppc_altivec_vmaxsh ||
17465 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17466 SDValue V1 = N->getOperand(1);
17467 SDValue V2 = N->getOperand(2);
17468 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17469 V1.getSimpleValueType() == MVT::v8i16 ||
17470 V1.getSimpleValueType() == MVT::v16i8) &&
17472 // (0-a, a)
17473 if (V1.getOpcode() == ISD::SUB &&
17475 V1.getOperand(1) == V2) {
17476 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17477 }
17478 // (a, 0-a)
17479 if (V2.getOpcode() == ISD::SUB &&
17481 V2.getOperand(1) == V1) {
17482 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17483 }
17484 // (x-y, y-x)
17485 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17486 V1.getOperand(0) == V2.getOperand(1) &&
17487 V1.getOperand(1) == V2.getOperand(0)) {
17488 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17489 }
17490 }
17491 }
17492 }
17493
17494 break;
17496 switch (N->getConstantOperandVal(1)) {
17497 default:
17498 break;
17499 case Intrinsic::ppc_altivec_vsum4sbs:
17500 case Intrinsic::ppc_altivec_vsum4shs:
17501 case Intrinsic::ppc_altivec_vsum4ubs: {
17502 // These sum-across intrinsics only have a chain due to the side effect
17503 // that they may set the SAT bit. If we know the SAT bit will not be set
17504 // for some inputs, we can replace any uses of their chain with the
17505 // input chain.
17506 if (BuildVectorSDNode *BVN =
17507 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17508 APInt APSplatBits, APSplatUndef;
17509 unsigned SplatBitSize;
17510 bool HasAnyUndefs;
17511 bool BVNIsConstantSplat = BVN->isConstantSplat(
17512 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17513 !Subtarget.isLittleEndian());
17514 // If the constant splat vector is 0, the SAT bit will not be set.
17515 if (BVNIsConstantSplat && APSplatBits == 0)
17516 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17517 }
17518 return SDValue();
17519 }
17520 case Intrinsic::ppc_vsx_lxvw4x:
17521 case Intrinsic::ppc_vsx_lxvd2x:
17522 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17523 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17524 if (Subtarget.needsSwapsForVSXMemOps())
17525 return expandVSXLoadForLE(N, DCI);
17526 break;
17527 }
17528 break;
17530 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17531 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17532 if (Subtarget.needsSwapsForVSXMemOps()) {
17533 switch (N->getConstantOperandVal(1)) {
17534 default:
17535 break;
17536 case Intrinsic::ppc_vsx_stxvw4x:
17537 case Intrinsic::ppc_vsx_stxvd2x:
17538 return expandVSXStoreForLE(N, DCI);
17539 }
17540 }
17541 break;
17542 case ISD::BSWAP: {
17543 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17544 // For subtargets without LDBRX, we can still do better than the default
17545 // expansion even for 64-bit BSWAP (LOAD).
17546 bool Is64BitBswapOn64BitTgt =
17547 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17548 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17549 N->getOperand(0).hasOneUse();
17550 if (IsSingleUseNormalLd &&
17551 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17552 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17553 SDValue Load = N->getOperand(0);
17554 LoadSDNode *LD = cast<LoadSDNode>(Load);
17555 // Create the byte-swapping load.
17556 SDValue Ops[] = {
17557 LD->getChain(), // Chain
17558 LD->getBasePtr(), // Ptr
17559 DAG.getValueType(N->getValueType(0)) // VT
17560 };
17561 SDValue BSLoad =
17562 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
17563 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
17564 MVT::i64 : MVT::i32, MVT::Other),
17565 Ops, LD->getMemoryVT(), LD->getMemOperand());
17566
17567 // If this is an i16 load, insert the truncate.
17568 SDValue ResVal = BSLoad;
17569 if (N->getValueType(0) == MVT::i16)
17570 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
17571
17572 // First, combine the bswap away. This makes the value produced by the
17573 // load dead.
17574 DCI.CombineTo(N, ResVal);
17575
17576 // Next, combine the load away, we give it a bogus result value but a real
17577 // chain result. The result value is dead because the bswap is dead.
17578 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
17579
17580 // Return N so it doesn't get rechecked!
17581 return SDValue(N, 0);
17582 }
17583 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17584 // before legalization so that the BUILD_PAIR is handled correctly.
17585 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17586 !IsSingleUseNormalLd)
17587 return SDValue();
17588 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
17589
17590 // Can't split volatile or atomic loads.
17591 if (!LD->isSimple())
17592 return SDValue();
17593 SDValue BasePtr = LD->getBasePtr();
17594 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
17595 LD->getPointerInfo(), LD->getAlign());
17596 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
17597 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17598 DAG.getIntPtrConstant(4, dl));
17600 LD->getMemOperand(), 4, 4);
17601 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
17602 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
17603 SDValue Res;
17604 if (Subtarget.isLittleEndian())
17605 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
17606 else
17607 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
17608 SDValue TF =
17609 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17610 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
17611 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
17612 return Res;
17613 }
17614 case PPCISD::VCMP:
17615 // If a VCMP_rec node already exists with exactly the same operands as this
17616 // node, use its result instead of this node (VCMP_rec computes both a CR6
17617 // and a normal output).
17618 //
17619 if (!N->getOperand(0).hasOneUse() &&
17620 !N->getOperand(1).hasOneUse() &&
17621 !N->getOperand(2).hasOneUse()) {
17622
17623 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17624 SDNode *VCMPrecNode = nullptr;
17625
17626 SDNode *LHSN = N->getOperand(0).getNode();
17627 for (SDNode *User : LHSN->users())
17628 if (User->getOpcode() == PPCISD::VCMP_rec &&
17629 User->getOperand(1) == N->getOperand(1) &&
17630 User->getOperand(2) == N->getOperand(2) &&
17631 User->getOperand(0) == N->getOperand(0)) {
17632 VCMPrecNode = User;
17633 break;
17634 }
17635
17636 // If there is no VCMP_rec node, or if the flag value has a single use,
17637 // don't transform this.
17638 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
17639 break;
17640
17641 // Look at the (necessarily single) use of the flag value. If it has a
17642 // chain, this transformation is more complex. Note that multiple things
17643 // could use the value result, which we should ignore.
17644 SDNode *FlagUser = nullptr;
17645 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17646 FlagUser == nullptr; ++UI) {
17647 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17648 SDNode *User = UI->getUser();
17649 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17650 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
17651 FlagUser = User;
17652 break;
17653 }
17654 }
17655 }
17656
17657 // If the user is a MFOCRF instruction, we know this is safe.
17658 // Otherwise we give up for right now.
17659 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17660 return SDValue(VCMPrecNode, 0);
17661 }
17662 break;
17663 case ISD::BR_CC: {
17664 // If this is a branch on an altivec predicate comparison, lower this so
17665 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17666 // lowering is done pre-legalize, because the legalizer lowers the predicate
17667 // compare down to code that is difficult to reassemble.
17668 // This code also handles branches that depend on the result of a store
17669 // conditional.
17670 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17671 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
17672
17673 int CompareOpc;
17674 bool isDot;
17675
17676 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17677 break;
17678
17679 // Since we are doing this pre-legalize, the RHS can be a constant of
17680 // arbitrary bitwidth which may cause issues when trying to get the value
17681 // from the underlying APInt.
17682 auto RHSAPInt = RHS->getAsAPIntVal();
17683 if (!RHSAPInt.isIntN(64))
17684 break;
17685
17686 unsigned Val = RHSAPInt.getZExtValue();
17687 auto isImpossibleCompare = [&]() {
17688 // If this is a comparison against something other than 0/1, then we know
17689 // that the condition is never/always true.
17690 if (Val != 0 && Val != 1) {
17691 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17692 return N->getOperand(0);
17693 // Always !=, turn it into an unconditional branch.
17694 return DAG.getNode(ISD::BR, dl, MVT::Other,
17695 N->getOperand(0), N->getOperand(4));
17696 }
17697 return SDValue();
17698 };
17699 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17700 unsigned StoreWidth = 0;
17701 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17702 isStoreConditional(LHS, StoreWidth)) {
17703 if (SDValue Impossible = isImpossibleCompare())
17704 return Impossible;
17705 PPC::Predicate CompOpc;
17706 // eq 0 => ne
17707 // ne 0 => eq
17708 // eq 1 => eq
17709 // ne 1 => ne
17710 if (Val == 0)
17711 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17712 else
17713 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17714
17715 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
17716 DAG.getConstant(StoreWidth, dl, MVT::i32)};
17717 auto *MemNode = cast<MemSDNode>(LHS);
17718 SDValue ConstSt = DAG.getMemIntrinsicNode(
17719 PPCISD::STORE_COND, dl,
17720 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
17721 MemNode->getMemoryVT(), MemNode->getMemOperand());
17722
17723 SDValue InChain;
17724 // Unchain the branch from the original store conditional.
17725 if (N->getOperand(0) == LHS.getValue(1))
17726 InChain = LHS.getOperand(0);
17727 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
17728 SmallVector<SDValue, 4> InChains;
17729 SDValue InTF = N->getOperand(0);
17730 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17731 if (InTF.getOperand(i) != LHS.getValue(1))
17732 InChains.push_back(InTF.getOperand(i));
17733 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
17734 }
17735
17736 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
17737 DAG.getConstant(CompOpc, dl, MVT::i32),
17738 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
17739 ConstSt.getValue(2));
17740 }
17741
17742 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17743 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
17744 assert(isDot && "Can't compare against a vector result!");
17745
17746 if (SDValue Impossible = isImpossibleCompare())
17747 return Impossible;
17748
17749 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17750 // Create the PPCISD altivec 'dot' comparison node.
17751 SDValue Ops[] = {
17752 LHS.getOperand(2), // LHS of compare
17753 LHS.getOperand(3), // RHS of compare
17754 DAG.getConstant(CompareOpc, dl, MVT::i32)
17755 };
17756 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
17757 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
17758
17759 // Unpack the result based on how the target uses it.
17760 PPC::Predicate CompOpc;
17761 switch (LHS.getConstantOperandVal(1)) {
17762 default: // Can't happen, don't crash on invalid number though.
17763 case 0: // Branch on the value of the EQ bit of CR6.
17764 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17765 break;
17766 case 1: // Branch on the inverted value of the EQ bit of CR6.
17767 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17768 break;
17769 case 2: // Branch on the value of the LT bit of CR6.
17770 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17771 break;
17772 case 3: // Branch on the inverted value of the LT bit of CR6.
17773 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17774 break;
17775 }
17776
17777 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
17778 DAG.getConstant(CompOpc, dl, MVT::i32),
17779 DAG.getRegister(PPC::CR6, MVT::i32),
17780 N->getOperand(4), CompNode.getValue(1));
17781 }
17782 break;
17783 }
17784 case ISD::BUILD_VECTOR:
17785 return DAGCombineBuildVector(N, DCI);
17786 case PPCISD::ADDC:
17787 return DAGCombineAddc(N, DCI);
17788 }
17789
17790 return SDValue();
17791}
17792
17793SDValue
17795 SelectionDAG &DAG,
17796 SmallVectorImpl<SDNode *> &Created) const {
17797 // fold (sdiv X, pow2)
17798 EVT VT = N->getValueType(0);
17799 if (VT == MVT::i64 && !Subtarget.isPPC64())
17800 return SDValue();
17801 if ((VT != MVT::i32 && VT != MVT::i64) ||
17802 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
17803 return SDValue();
17804
17805 SDLoc DL(N);
17806 SDValue N0 = N->getOperand(0);
17807
17808 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17809 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17810 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
17811
17812 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
17813 Created.push_back(Op.getNode());
17814
17815 if (IsNegPow2) {
17816 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
17817 Created.push_back(Op.getNode());
17818 }
17819
17820 return Op;
17821}
17822
17823//===----------------------------------------------------------------------===//
17824// Inline Assembly Support
17825//===----------------------------------------------------------------------===//
17826
17828 KnownBits &Known,
17829 const APInt &DemandedElts,
17830 const SelectionDAG &DAG,
17831 unsigned Depth) const {
17832 Known.resetAll();
17833 switch (Op.getOpcode()) {
17834 default: break;
17835 case PPCISD::LBRX: {
17836 // lhbrx is known to have the top bits cleared out.
17837 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
17838 Known.Zero = 0xFFFF0000;
17839 break;
17840 }
17841 case PPCISD::ADDE: {
17842 if (Op.getResNo() == 0) {
17843 // (0|1), _ = ADDE 0, 0, CARRY
17844 SDValue LHS = Op.getOperand(0);
17845 SDValue RHS = Op.getOperand(1);
17846 if (isNullConstant(LHS) && isNullConstant(RHS))
17847 Known.Zero = ~1ULL;
17848 }
17849 break;
17850 }
17852 switch (Op.getConstantOperandVal(0)) {
17853 default: break;
17854 case Intrinsic::ppc_altivec_vcmpbfp_p:
17855 case Intrinsic::ppc_altivec_vcmpeqfp_p:
17856 case Intrinsic::ppc_altivec_vcmpequb_p:
17857 case Intrinsic::ppc_altivec_vcmpequh_p:
17858 case Intrinsic::ppc_altivec_vcmpequw_p:
17859 case Intrinsic::ppc_altivec_vcmpequd_p:
17860 case Intrinsic::ppc_altivec_vcmpequq_p:
17861 case Intrinsic::ppc_altivec_vcmpgefp_p:
17862 case Intrinsic::ppc_altivec_vcmpgtfp_p:
17863 case Intrinsic::ppc_altivec_vcmpgtsb_p:
17864 case Intrinsic::ppc_altivec_vcmpgtsh_p:
17865 case Intrinsic::ppc_altivec_vcmpgtsw_p:
17866 case Intrinsic::ppc_altivec_vcmpgtsd_p:
17867 case Intrinsic::ppc_altivec_vcmpgtsq_p:
17868 case Intrinsic::ppc_altivec_vcmpgtub_p:
17869 case Intrinsic::ppc_altivec_vcmpgtuh_p:
17870 case Intrinsic::ppc_altivec_vcmpgtuw_p:
17871 case Intrinsic::ppc_altivec_vcmpgtud_p:
17872 case Intrinsic::ppc_altivec_vcmpgtuq_p:
17873 Known.Zero = ~1U; // All bits but the low one are known to be zero.
17874 break;
17875 }
17876 break;
17877 }
17879 switch (Op.getConstantOperandVal(1)) {
17880 default:
17881 break;
17882 case Intrinsic::ppc_load2r:
17883 // Top bits are cleared for load2r (which is the same as lhbrx).
17884 Known.Zero = 0xFFFF0000;
17885 break;
17886 }
17887 break;
17888 }
17889 }
17890}
17891
17893 switch (Subtarget.getCPUDirective()) {
17894 default: break;
17895 case PPC::DIR_970:
17896 case PPC::DIR_PWR4:
17897 case PPC::DIR_PWR5:
17898 case PPC::DIR_PWR5X:
17899 case PPC::DIR_PWR6:
17900 case PPC::DIR_PWR6X:
17901 case PPC::DIR_PWR7:
17902 case PPC::DIR_PWR8:
17903 case PPC::DIR_PWR9:
17904 case PPC::DIR_PWR10:
17905 case PPC::DIR_PWR11:
17906 case PPC::DIR_PWR_FUTURE: {
17907 if (!ML)
17908 break;
17909
17911 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17912 // so that we can decrease cache misses and branch-prediction misses.
17913 // Actual alignment of the loop will depend on the hotness check and other
17914 // logic in alignBlocks.
17915 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17916 return Align(32);
17917 }
17918
17919 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17920
17921 // For small loops (between 5 and 8 instructions), align to a 32-byte
17922 // boundary so that the entire loop fits in one instruction-cache line.
17923 uint64_t LoopSize = 0;
17924 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17925 for (const MachineInstr &J : **I) {
17926 LoopSize += TII->getInstSizeInBytes(J);
17927 if (LoopSize > 32)
17928 break;
17929 }
17930
17931 if (LoopSize > 16 && LoopSize <= 32)
17932 return Align(32);
17933
17934 break;
17935 }
17936 }
17937
17939}
17940
17941/// getConstraintType - Given a constraint, return the type of
17942/// constraint it is for this target.
17945 if (Constraint.size() == 1) {
17946 switch (Constraint[0]) {
17947 default: break;
17948 case 'b':
17949 case 'r':
17950 case 'f':
17951 case 'd':
17952 case 'v':
17953 case 'y':
17954 return C_RegisterClass;
17955 case 'Z':
17956 // FIXME: While Z does indicate a memory constraint, it specifically
17957 // indicates an r+r address (used in conjunction with the 'y' modifier
17958 // in the replacement string). Currently, we're forcing the base
17959 // register to be r0 in the asm printer (which is interpreted as zero)
17960 // and forming the complete address in the second register. This is
17961 // suboptimal.
17962 return C_Memory;
17963 }
17964 } else if (Constraint == "wc") { // individual CR bits.
17965 return C_RegisterClass;
17966 } else if (Constraint == "wa" || Constraint == "wd" ||
17967 Constraint == "wf" || Constraint == "ws" ||
17968 Constraint == "wi" || Constraint == "ww") {
17969 return C_RegisterClass; // VSX registers.
17970 }
17971 return TargetLowering::getConstraintType(Constraint);
17972}
17973
17974/// Examine constraint type and operand type and determine a weight value.
17975/// This object must already have been set up with the operand type
17976/// and the current alternative constraint selected.
17979 AsmOperandInfo &info, const char *constraint) const {
17981 Value *CallOperandVal = info.CallOperandVal;
17982 // If we don't have a value, we can't do a match,
17983 // but allow it at the lowest weight.
17984 if (!CallOperandVal)
17985 return CW_Default;
17986 Type *type = CallOperandVal->getType();
17987
17988 // Look at the constraint type.
17989 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
17990 return CW_Register; // an individual CR bit.
17991 else if ((StringRef(constraint) == "wa" ||
17992 StringRef(constraint) == "wd" ||
17993 StringRef(constraint) == "wf") &&
17994 type->isVectorTy())
17995 return CW_Register;
17996 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
17997 return CW_Register; // just hold 64-bit integers data.
17998 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17999 return CW_Register;
18000 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18001 return CW_Register;
18002
18003 switch (*constraint) {
18004 default:
18006 break;
18007 case 'b':
18008 if (type->isIntegerTy())
18009 weight = CW_Register;
18010 break;
18011 case 'f':
18012 if (type->isFloatTy())
18013 weight = CW_Register;
18014 break;
18015 case 'd':
18016 if (type->isDoubleTy())
18017 weight = CW_Register;
18018 break;
18019 case 'v':
18020 if (type->isVectorTy())
18021 weight = CW_Register;
18022 break;
18023 case 'y':
18024 weight = CW_Register;
18025 break;
18026 case 'Z':
18027 weight = CW_Memory;
18028 break;
18029 }
18030 return weight;
18031}
18032
18033std::pair<unsigned, const TargetRegisterClass *>
18035 StringRef Constraint,
18036 MVT VT) const {
18037 if (Constraint.size() == 1) {
18038 // GCC RS6000 Constraint Letters
18039 switch (Constraint[0]) {
18040 case 'b': // R1-R31
18041 if (VT == MVT::i64 && Subtarget.isPPC64())
18042 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18043 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18044 case 'r': // R0-R31
18045 if (VT == MVT::i64 && Subtarget.isPPC64())
18046 return std::make_pair(0U, &PPC::G8RCRegClass);
18047 return std::make_pair(0U, &PPC::GPRCRegClass);
18048 // 'd' and 'f' constraints are both defined to be "the floating point
18049 // registers", where one is for 32-bit and the other for 64-bit. We don't
18050 // really care overly much here so just give them all the same reg classes.
18051 case 'd':
18052 case 'f':
18053 if (Subtarget.hasSPE()) {
18054 if (VT == MVT::f32 || VT == MVT::i32)
18055 return std::make_pair(0U, &PPC::GPRCRegClass);
18056 if (VT == MVT::f64 || VT == MVT::i64)
18057 return std::make_pair(0U, &PPC::SPERCRegClass);
18058 } else {
18059 if (VT == MVT::f32 || VT == MVT::i32)
18060 return std::make_pair(0U, &PPC::F4RCRegClass);
18061 if (VT == MVT::f64 || VT == MVT::i64)
18062 return std::make_pair(0U, &PPC::F8RCRegClass);
18063 }
18064 break;
18065 case 'v':
18066 if (Subtarget.hasAltivec() && VT.isVector())
18067 return std::make_pair(0U, &PPC::VRRCRegClass);
18068 else if (Subtarget.hasVSX())
18069 // Scalars in Altivec registers only make sense with VSX.
18070 return std::make_pair(0U, &PPC::VFRCRegClass);
18071 break;
18072 case 'y': // crrc
18073 return std::make_pair(0U, &PPC::CRRCRegClass);
18074 }
18075 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18076 // An individual CR bit.
18077 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18078 } else if ((Constraint == "wa" || Constraint == "wd" ||
18079 Constraint == "wf" || Constraint == "wi") &&
18080 Subtarget.hasVSX()) {
18081 // A VSX register for either a scalar (FP) or vector. There is no
18082 // support for single precision scalars on subtargets prior to Power8.
18083 if (VT.isVector())
18084 return std::make_pair(0U, &PPC::VSRCRegClass);
18085 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18086 return std::make_pair(0U, &PPC::VSSRCRegClass);
18087 return std::make_pair(0U, &PPC::VSFRCRegClass);
18088 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18089 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18090 return std::make_pair(0U, &PPC::VSSRCRegClass);
18091 else
18092 return std::make_pair(0U, &PPC::VSFRCRegClass);
18093 } else if (Constraint == "lr") {
18094 if (VT == MVT::i64)
18095 return std::make_pair(0U, &PPC::LR8RCRegClass);
18096 else
18097 return std::make_pair(0U, &PPC::LRRCRegClass);
18098 }
18099
18100 // Handle special cases of physical registers that are not properly handled
18101 // by the base class.
18102 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18103 // If we name a VSX register, we can't defer to the base class because it
18104 // will not recognize the correct register (their names will be VSL{0-31}
18105 // and V{0-31} so they won't match). So we match them here.
18106 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18107 int VSNum = atoi(Constraint.data() + 3);
18108 assert(VSNum >= 0 && VSNum <= 63 &&
18109 "Attempted to access a vsr out of range");
18110 if (VSNum < 32)
18111 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18112 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18113 }
18114
18115 // For float registers, we can't defer to the base class as it will match
18116 // the SPILLTOVSRRC class.
18117 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18118 int RegNum = atoi(Constraint.data() + 2);
18119 if (RegNum > 31 || RegNum < 0)
18120 report_fatal_error("Invalid floating point register number");
18121 if (VT == MVT::f32 || VT == MVT::i32)
18122 return Subtarget.hasSPE()
18123 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18124 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18125 if (VT == MVT::f64 || VT == MVT::i64)
18126 return Subtarget.hasSPE()
18127 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18128 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18129 }
18130 }
18131
18132 std::pair<unsigned, const TargetRegisterClass *> R =
18134
18135 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18136 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18137 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18138 // register.
18139 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18140 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18141 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18142 PPC::GPRCRegClass.contains(R.first))
18143 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18144 PPC::sub_32, &PPC::G8RCRegClass),
18145 &PPC::G8RCRegClass);
18146
18147 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18148 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18149 R.first = PPC::CR0;
18150 R.second = &PPC::CRRCRegClass;
18151 }
18152 // FIXME: This warning should ideally be emitted in the front end.
18153 const auto &TM = getTargetMachine();
18154 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18155 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18156 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18157 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18158 errs() << "warning: vector registers 20 to 32 are reserved in the "
18159 "default AIX AltiVec ABI and cannot be used\n";
18160 }
18161
18162 return R;
18163}
18164
18165/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18166/// vector. If it is invalid, don't add anything to Ops.
18168 StringRef Constraint,
18169 std::vector<SDValue> &Ops,
18170 SelectionDAG &DAG) const {
18171 SDValue Result;
18172
18173 // Only support length 1 constraints.
18174 if (Constraint.size() > 1)
18175 return;
18176
18177 char Letter = Constraint[0];
18178 switch (Letter) {
18179 default: break;
18180 case 'I':
18181 case 'J':
18182 case 'K':
18183 case 'L':
18184 case 'M':
18185 case 'N':
18186 case 'O':
18187 case 'P': {
18189 if (!CST) return; // Must be an immediate to match.
18190 SDLoc dl(Op);
18191 int64_t Value = CST->getSExtValue();
18192 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18193 // numbers are printed as such.
18194 switch (Letter) {
18195 default: llvm_unreachable("Unknown constraint letter!");
18196 case 'I': // "I" is a signed 16-bit constant.
18197 if (isInt<16>(Value))
18198 Result = DAG.getTargetConstant(Value, dl, TCVT);
18199 break;
18200 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18202 Result = DAG.getTargetConstant(Value, dl, TCVT);
18203 break;
18204 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18206 Result = DAG.getTargetConstant(Value, dl, TCVT);
18207 break;
18208 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18209 if (isUInt<16>(Value))
18210 Result = DAG.getTargetConstant(Value, dl, TCVT);
18211 break;
18212 case 'M': // "M" is a constant that is greater than 31.
18213 if (Value > 31)
18214 Result = DAG.getTargetConstant(Value, dl, TCVT);
18215 break;
18216 case 'N': // "N" is a positive constant that is an exact power of two.
18217 if (Value > 0 && isPowerOf2_64(Value))
18218 Result = DAG.getTargetConstant(Value, dl, TCVT);
18219 break;
18220 case 'O': // "O" is the constant zero.
18221 if (Value == 0)
18222 Result = DAG.getTargetConstant(Value, dl, TCVT);
18223 break;
18224 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18225 if (isInt<16>(-Value))
18226 Result = DAG.getTargetConstant(Value, dl, TCVT);
18227 break;
18228 }
18229 break;
18230 }
18231 }
18232
18233 if (Result.getNode()) {
18234 Ops.push_back(Result);
18235 return;
18236 }
18237
18238 // Handle standard constraint letters.
18240}
18241
18244 SelectionDAG &DAG) const {
18245 if (I.getNumOperands() <= 1)
18246 return;
18247 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18248 return;
18249 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18250 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18251 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18252 return;
18253
18254 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18255 Ops.push_back(DAG.getMDNode(MDN));
18256}
18257
18258// isLegalAddressingMode - Return true if the addressing mode represented
18259// by AM is legal for this target, for a load/store of the specified type.
18261 const AddrMode &AM, Type *Ty,
18262 unsigned AS,
18263 Instruction *I) const {
18264 // Vector type r+i form is supported since power9 as DQ form. We don't check
18265 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18266 // imm form is preferred and the offset can be adjusted to use imm form later
18267 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18268 // max offset to check legal addressing mode, we should be a little aggressive
18269 // to contain other offsets for that LSRUse.
18270 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18271 return false;
18272
18273 // PPC allows a sign-extended 16-bit immediate field.
18274 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18275 return false;
18276
18277 // No global is ever allowed as a base.
18278 if (AM.BaseGV)
18279 return false;
18280
18281 // PPC only support r+r,
18282 switch (AM.Scale) {
18283 case 0: // "r+i" or just "i", depending on HasBaseReg.
18284 break;
18285 case 1:
18286 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18287 return false;
18288 // Otherwise we have r+r or r+i.
18289 break;
18290 case 2:
18291 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18292 return false;
18293 // Allow 2*r as r+r.
18294 break;
18295 default:
18296 // No other scales are supported.
18297 return false;
18298 }
18299
18300 return true;
18301}
18302
18303SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18304 SelectionDAG &DAG) const {
18306 MachineFrameInfo &MFI = MF.getFrameInfo();
18307 MFI.setReturnAddressIsTaken(true);
18308
18309 SDLoc dl(Op);
18310 unsigned Depth = Op.getConstantOperandVal(0);
18311
18312 // Make sure the function does not optimize away the store of the RA to
18313 // the stack.
18314 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18315 FuncInfo->setLRStoreRequired();
18316 auto PtrVT = getPointerTy(MF.getDataLayout());
18317
18318 if (Depth > 0) {
18319 // The link register (return address) is saved in the caller's frame
18320 // not the callee's stack frame. So we must get the caller's frame
18321 // address and load the return address at the LR offset from there.
18322 SDValue FrameAddr =
18323 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18325 SDValue Offset =
18326 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18327 Subtarget.getScalarIntVT());
18328 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18329 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18331 }
18332
18333 // Just load the return address off the stack.
18334 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18335 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18337}
18338
18339SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18340 SelectionDAG &DAG) const {
18341 SDLoc dl(Op);
18342 unsigned Depth = Op.getConstantOperandVal(0);
18343
18344 MachineFunction &MF = DAG.getMachineFunction();
18345 MachineFrameInfo &MFI = MF.getFrameInfo();
18346 MFI.setFrameAddressIsTaken(true);
18347
18348 EVT PtrVT = getPointerTy(MF.getDataLayout());
18349 bool isPPC64 = PtrVT == MVT::i64;
18350
18351 // Naked functions never have a frame pointer, and so we use r1. For all
18352 // other functions, this decision must be delayed until during PEI.
18353 unsigned FrameReg;
18354 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18355 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18356 else
18357 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18358
18359 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18360 PtrVT);
18361 while (Depth--)
18362 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18363 FrameAddr, MachinePointerInfo());
18364 return FrameAddr;
18365}
18366
18367#define GET_REGISTER_MATCHER
18368#include "PPCGenAsmMatcher.inc"
18369
18371 const MachineFunction &MF) const {
18372 bool IsPPC64 = Subtarget.isPPC64();
18373
18374 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18375 if (!Is64Bit && VT != LLT::scalar(32))
18376 report_fatal_error("Invalid register global variable type");
18377
18379 if (!Reg)
18380 return Reg;
18381
18382 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18383 // Need followup investigation as to why.
18384 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18385 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18386 StringRef(RegName) + "\"."));
18387
18388 // Convert GPR to GP8R register for 64bit.
18389 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18390 Reg = Reg.id() - PPC::R0 + PPC::X0;
18391
18392 return Reg;
18393}
18394
18396 // 32-bit SVR4 ABI access everything as got-indirect.
18397 if (Subtarget.is32BitELFABI())
18398 return true;
18399
18400 // AIX accesses everything indirectly through the TOC, which is similar to
18401 // the GOT.
18402 if (Subtarget.isAIXABI())
18403 return true;
18404
18406 // If it is small or large code model, module locals are accessed
18407 // indirectly by loading their address from .toc/.got.
18408 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18409 return true;
18410
18411 // JumpTable and BlockAddress are accessed as got-indirect.
18413 return true;
18414
18416 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18417
18418 return false;
18419}
18420
18421bool
18423 // The PowerPC target isn't yet aware of offsets.
18424 return false;
18425}
18426
18428 const CallInst &I,
18429 MachineFunction &MF,
18430 unsigned Intrinsic) const {
18431 switch (Intrinsic) {
18432 case Intrinsic::ppc_atomicrmw_xchg_i128:
18433 case Intrinsic::ppc_atomicrmw_add_i128:
18434 case Intrinsic::ppc_atomicrmw_sub_i128:
18435 case Intrinsic::ppc_atomicrmw_nand_i128:
18436 case Intrinsic::ppc_atomicrmw_and_i128:
18437 case Intrinsic::ppc_atomicrmw_or_i128:
18438 case Intrinsic::ppc_atomicrmw_xor_i128:
18439 case Intrinsic::ppc_cmpxchg_i128:
18440 Info.opc = ISD::INTRINSIC_W_CHAIN;
18441 Info.memVT = MVT::i128;
18442 Info.ptrVal = I.getArgOperand(0);
18443 Info.offset = 0;
18444 Info.align = Align(16);
18447 return true;
18448 case Intrinsic::ppc_atomic_load_i128:
18449 Info.opc = ISD::INTRINSIC_W_CHAIN;
18450 Info.memVT = MVT::i128;
18451 Info.ptrVal = I.getArgOperand(0);
18452 Info.offset = 0;
18453 Info.align = Align(16);
18455 return true;
18456 case Intrinsic::ppc_atomic_store_i128:
18457 Info.opc = ISD::INTRINSIC_VOID;
18458 Info.memVT = MVT::i128;
18459 Info.ptrVal = I.getArgOperand(2);
18460 Info.offset = 0;
18461 Info.align = Align(16);
18463 return true;
18464 case Intrinsic::ppc_altivec_lvx:
18465 case Intrinsic::ppc_altivec_lvxl:
18466 case Intrinsic::ppc_altivec_lvebx:
18467 case Intrinsic::ppc_altivec_lvehx:
18468 case Intrinsic::ppc_altivec_lvewx:
18469 case Intrinsic::ppc_vsx_lxvd2x:
18470 case Intrinsic::ppc_vsx_lxvw4x:
18471 case Intrinsic::ppc_vsx_lxvd2x_be:
18472 case Intrinsic::ppc_vsx_lxvw4x_be:
18473 case Intrinsic::ppc_vsx_lxvl:
18474 case Intrinsic::ppc_vsx_lxvll: {
18475 EVT VT;
18476 switch (Intrinsic) {
18477 case Intrinsic::ppc_altivec_lvebx:
18478 VT = MVT::i8;
18479 break;
18480 case Intrinsic::ppc_altivec_lvehx:
18481 VT = MVT::i16;
18482 break;
18483 case Intrinsic::ppc_altivec_lvewx:
18484 VT = MVT::i32;
18485 break;
18486 case Intrinsic::ppc_vsx_lxvd2x:
18487 case Intrinsic::ppc_vsx_lxvd2x_be:
18488 VT = MVT::v2f64;
18489 break;
18490 default:
18491 VT = MVT::v4i32;
18492 break;
18493 }
18494
18495 Info.opc = ISD::INTRINSIC_W_CHAIN;
18496 Info.memVT = VT;
18497 Info.ptrVal = I.getArgOperand(0);
18498 Info.offset = -VT.getStoreSize()+1;
18499 Info.size = 2*VT.getStoreSize()-1;
18500 Info.align = Align(1);
18501 Info.flags = MachineMemOperand::MOLoad;
18502 return true;
18503 }
18504 case Intrinsic::ppc_altivec_stvx:
18505 case Intrinsic::ppc_altivec_stvxl:
18506 case Intrinsic::ppc_altivec_stvebx:
18507 case Intrinsic::ppc_altivec_stvehx:
18508 case Intrinsic::ppc_altivec_stvewx:
18509 case Intrinsic::ppc_vsx_stxvd2x:
18510 case Intrinsic::ppc_vsx_stxvw4x:
18511 case Intrinsic::ppc_vsx_stxvd2x_be:
18512 case Intrinsic::ppc_vsx_stxvw4x_be:
18513 case Intrinsic::ppc_vsx_stxvl:
18514 case Intrinsic::ppc_vsx_stxvll: {
18515 EVT VT;
18516 switch (Intrinsic) {
18517 case Intrinsic::ppc_altivec_stvebx:
18518 VT = MVT::i8;
18519 break;
18520 case Intrinsic::ppc_altivec_stvehx:
18521 VT = MVT::i16;
18522 break;
18523 case Intrinsic::ppc_altivec_stvewx:
18524 VT = MVT::i32;
18525 break;
18526 case Intrinsic::ppc_vsx_stxvd2x:
18527 case Intrinsic::ppc_vsx_stxvd2x_be:
18528 VT = MVT::v2f64;
18529 break;
18530 default:
18531 VT = MVT::v4i32;
18532 break;
18533 }
18534
18535 Info.opc = ISD::INTRINSIC_VOID;
18536 Info.memVT = VT;
18537 Info.ptrVal = I.getArgOperand(1);
18538 Info.offset = -VT.getStoreSize()+1;
18539 Info.size = 2*VT.getStoreSize()-1;
18540 Info.align = Align(1);
18541 Info.flags = MachineMemOperand::MOStore;
18542 return true;
18543 }
18544 case Intrinsic::ppc_stdcx:
18545 case Intrinsic::ppc_stwcx:
18546 case Intrinsic::ppc_sthcx:
18547 case Intrinsic::ppc_stbcx: {
18548 EVT VT;
18549 auto Alignment = Align(8);
18550 switch (Intrinsic) {
18551 case Intrinsic::ppc_stdcx:
18552 VT = MVT::i64;
18553 break;
18554 case Intrinsic::ppc_stwcx:
18555 VT = MVT::i32;
18556 Alignment = Align(4);
18557 break;
18558 case Intrinsic::ppc_sthcx:
18559 VT = MVT::i16;
18560 Alignment = Align(2);
18561 break;
18562 case Intrinsic::ppc_stbcx:
18563 VT = MVT::i8;
18564 Alignment = Align(1);
18565 break;
18566 }
18567 Info.opc = ISD::INTRINSIC_W_CHAIN;
18568 Info.memVT = VT;
18569 Info.ptrVal = I.getArgOperand(0);
18570 Info.offset = 0;
18571 Info.align = Alignment;
18573 return true;
18574 }
18575 default:
18576 break;
18577 }
18578
18579 return false;
18580}
18581
18582/// It returns EVT::Other if the type should be determined using generic
18583/// target-independent logic.
18585 LLVMContext &Context, const MemOp &Op,
18586 const AttributeList &FuncAttributes) const {
18587 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18588 // We should use Altivec/VSX loads and stores when available. For unaligned
18589 // addresses, unaligned VSX loads are only fast starting with the P8.
18590 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18591 if (Op.isMemset() && Subtarget.hasVSX()) {
18592 uint64_t TailSize = Op.size() % 16;
18593 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18594 // element if vector element type matches tail store. For tail size
18595 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18596 if (TailSize > 2 && TailSize <= 4) {
18597 return MVT::v8i16;
18598 }
18599 return MVT::v4i32;
18600 }
18601 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
18602 return MVT::v4i32;
18603 }
18604 }
18605
18606 if (Subtarget.isPPC64()) {
18607 return MVT::i64;
18608 }
18609
18610 return MVT::i32;
18611}
18612
18613/// Returns true if it is beneficial to convert a load of a constant
18614/// to just the constant itself.
18616 Type *Ty) const {
18617 assert(Ty->isIntegerTy());
18618
18619 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18620 return !(BitSize == 0 || BitSize > 64);
18621}
18622
18624 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18625 return false;
18626 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18627 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18628 return NumBits1 == 64 && NumBits2 == 32;
18629}
18630
18632 if (!VT1.isInteger() || !VT2.isInteger())
18633 return false;
18634 unsigned NumBits1 = VT1.getSizeInBits();
18635 unsigned NumBits2 = VT2.getSizeInBits();
18636 return NumBits1 == 64 && NumBits2 == 32;
18637}
18638
18640 // Generally speaking, zexts are not free, but they are free when they can be
18641 // folded with other operations.
18642 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18643 EVT MemVT = LD->getMemoryVT();
18644 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18645 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18646 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18647 LD->getExtensionType() == ISD::ZEXTLOAD))
18648 return true;
18649 }
18650
18651 // FIXME: Add other cases...
18652 // - 32-bit shifts with a zext to i64
18653 // - zext after ctlz, bswap, etc.
18654 // - zext after and by a constant mask
18655
18656 return TargetLowering::isZExtFree(Val, VT2);
18657}
18658
18659bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18660 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18661 "invalid fpext types");
18662 // Extending to float128 is not free.
18663 if (DestVT == MVT::f128)
18664 return false;
18665 return true;
18666}
18667
18669 return isInt<16>(Imm) || isUInt<16>(Imm);
18670}
18671
18673 return isInt<16>(Imm) || isUInt<16>(Imm);
18674}
18675
18678 unsigned *Fast) const {
18680 return false;
18681
18682 // PowerPC supports unaligned memory access for simple non-vector types.
18683 // Although accessing unaligned addresses is not as efficient as accessing
18684 // aligned addresses, it is generally more efficient than manual expansion,
18685 // and generally only traps for software emulation when crossing page
18686 // boundaries.
18687
18688 if (!VT.isSimple())
18689 return false;
18690
18691 if (VT.isFloatingPoint() && !VT.isVector() &&
18692 !Subtarget.allowsUnalignedFPAccess())
18693 return false;
18694
18695 if (VT.getSimpleVT().isVector()) {
18696 if (Subtarget.hasVSX()) {
18697 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18698 VT != MVT::v4f32 && VT != MVT::v4i32)
18699 return false;
18700 } else {
18701 return false;
18702 }
18703 }
18704
18705 if (VT == MVT::ppcf128)
18706 return false;
18707
18708 if (Fast)
18709 *Fast = 1;
18710
18711 return true;
18712}
18713
18715 SDValue C) const {
18716 // Check integral scalar types.
18717 if (!VT.isScalarInteger())
18718 return false;
18719 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18720 if (!ConstNode->getAPIntValue().isSignedIntN(64))
18721 return false;
18722 // This transformation will generate >= 2 operations. But the following
18723 // cases will generate <= 2 instructions during ISEL. So exclude them.
18724 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18725 // HW instruction, ie. MULLI
18726 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18727 // instruction is needed than case 1, ie. MULLI and RLDICR
18728 int64_t Imm = ConstNode->getSExtValue();
18729 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
18730 Imm >>= Shift;
18731 if (isInt<16>(Imm))
18732 return false;
18733 uint64_t UImm = static_cast<uint64_t>(Imm);
18734 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
18735 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
18736 return true;
18737 }
18738 return false;
18739}
18740
18746
18748 Type *Ty) const {
18749 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18750 return false;
18751 switch (Ty->getScalarType()->getTypeID()) {
18752 case Type::FloatTyID:
18753 case Type::DoubleTyID:
18754 return true;
18755 case Type::FP128TyID:
18756 return Subtarget.hasP9Vector();
18757 default:
18758 return false;
18759 }
18760}
18761
18762// FIXME: add more patterns which are not profitable to hoist.
18764 if (!I->hasOneUse())
18765 return true;
18766
18767 Instruction *User = I->user_back();
18768 assert(User && "A single use instruction with no uses.");
18769
18770 switch (I->getOpcode()) {
18771 case Instruction::FMul: {
18772 // Don't break FMA, PowerPC prefers FMA.
18773 if (User->getOpcode() != Instruction::FSub &&
18774 User->getOpcode() != Instruction::FAdd)
18775 return true;
18776
18778 const Function *F = I->getFunction();
18779 const DataLayout &DL = F->getDataLayout();
18780 Type *Ty = User->getOperand(0)->getType();
18781 bool AllowContract = I->getFastMathFlags().allowContract() &&
18782 User->getFastMathFlags().allowContract();
18783
18784 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
18786 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
18787 }
18788 case Instruction::Load: {
18789 // Don't break "store (load float*)" pattern, this pattern will be combined
18790 // to "store (load int32)" in later InstCombine pass. See function
18791 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18792 // cycles than loading a 32 bit integer.
18793 LoadInst *LI = cast<LoadInst>(I);
18794 // For the loads that combineLoadToOperationType does nothing, like
18795 // ordered load, it should be profitable to hoist them.
18796 // For swifterror load, it can only be used for pointer to pointer type, so
18797 // later type check should get rid of this case.
18798 if (!LI->isUnordered())
18799 return true;
18800
18801 if (User->getOpcode() != Instruction::Store)
18802 return true;
18803
18804 if (I->getType()->getTypeID() != Type::FloatTyID)
18805 return true;
18806
18807 return false;
18808 }
18809 default:
18810 return true;
18811 }
18812 return true;
18813}
18814
18815const MCPhysReg *
18817 // LR is a callee-save register, but we must treat it as clobbered by any call
18818 // site. Hence we include LR in the scratch registers, which are in turn added
18819 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18820 // to CTR, which is used by any indirect call.
18821 static const MCPhysReg ScratchRegs[] = {
18822 PPC::X12, PPC::LR8, PPC::CTR8, 0
18823 };
18824
18825 return ScratchRegs;
18826}
18827
18829 const Constant *PersonalityFn) const {
18830 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18831}
18832
18834 const Constant *PersonalityFn) const {
18835 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18836}
18837
18838bool
18840 EVT VT , unsigned DefinedValues) const {
18841 if (VT == MVT::v2i64)
18842 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18843
18844 if (Subtarget.hasVSX())
18845 return true;
18846
18848}
18849
18851 if (DisableILPPref || Subtarget.enableMachineScheduler())
18853
18854 return Sched::ILP;
18855}
18856
18857// Create a fast isel object.
18858FastISel *
18860 const TargetLibraryInfo *LibInfo) const {
18861 return PPC::createFastISel(FuncInfo, LibInfo);
18862}
18863
18864// 'Inverted' means the FMA opcode after negating one multiplicand.
18865// For example, (fma -a b c) = (fnmsub a b c)
18866static unsigned invertFMAOpcode(unsigned Opc) {
18867 switch (Opc) {
18868 default:
18869 llvm_unreachable("Invalid FMA opcode for PowerPC!");
18870 case ISD::FMA:
18871 return PPCISD::FNMSUB;
18872 case PPCISD::FNMSUB:
18873 return ISD::FMA;
18874 }
18875}
18876
18878 bool LegalOps, bool OptForSize,
18880 unsigned Depth) const {
18882 return SDValue();
18883
18884 unsigned Opc = Op.getOpcode();
18885 EVT VT = Op.getValueType();
18886 SDNodeFlags Flags = Op.getNode()->getFlags();
18887
18888 switch (Opc) {
18889 case PPCISD::FNMSUB:
18890 if (!Op.hasOneUse() || !isTypeLegal(VT))
18891 break;
18892
18894 SDValue N0 = Op.getOperand(0);
18895 SDValue N1 = Op.getOperand(1);
18896 SDValue N2 = Op.getOperand(2);
18897 SDLoc Loc(Op);
18898
18900 SDValue NegN2 =
18901 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18902
18903 if (!NegN2)
18904 return SDValue();
18905
18906 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18907 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18908 // These transformations may change sign of zeroes. For example,
18909 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18910 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18911 // Try and choose the cheaper one to negate.
18913 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18914 N0Cost, Depth + 1);
18915
18917 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18918 N1Cost, Depth + 1);
18919
18920 if (NegN0 && N0Cost <= N1Cost) {
18921 Cost = std::min(N0Cost, N2Cost);
18922 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18923 } else if (NegN1) {
18924 Cost = std::min(N1Cost, N2Cost);
18925 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18926 }
18927 }
18928
18929 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18930 if (isOperationLegal(ISD::FMA, VT)) {
18931 Cost = N2Cost;
18932 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18933 }
18934
18935 break;
18936 }
18937
18938 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18939 Cost, Depth);
18940}
18941
18942// Override to enable LOAD_STACK_GUARD lowering on Linux.
18944 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18945 return true;
18947}
18948
18950 bool ForCodeSize) const {
18951 if (!VT.isSimple() || !Subtarget.hasVSX())
18952 return false;
18953
18954 switch(VT.getSimpleVT().SimpleTy) {
18955 default:
18956 // For FP types that are currently not supported by PPC backend, return
18957 // false. Examples: f16, f80.
18958 return false;
18959 case MVT::f32:
18960 case MVT::f64: {
18961 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18962 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18963 return true;
18964 }
18965 bool IsExact;
18966 APSInt IntResult(16, false);
18967 // The rounding mode doesn't really matter because we only care about floats
18968 // that can be converted to integers exactly.
18969 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18970 // For exact values in the range [-16, 15] we can materialize the float.
18971 if (IsExact && IntResult <= 15 && IntResult >= -16)
18972 return true;
18973 return Imm.isZero();
18974 }
18975 case MVT::ppcf128:
18976 return Imm.isPosZero();
18977 }
18978}
18979
18980// For vector shift operation op, fold
18981// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18983 SelectionDAG &DAG) {
18984 SDValue N0 = N->getOperand(0);
18985 SDValue N1 = N->getOperand(1);
18986 EVT VT = N0.getValueType();
18987 unsigned OpSizeInBits = VT.getScalarSizeInBits();
18988 unsigned Opcode = N->getOpcode();
18989 unsigned TargetOpcode;
18990
18991 switch (Opcode) {
18992 default:
18993 llvm_unreachable("Unexpected shift operation");
18994 case ISD::SHL:
18995 TargetOpcode = PPCISD::SHL;
18996 break;
18997 case ISD::SRL:
18998 TargetOpcode = PPCISD::SRL;
18999 break;
19000 case ISD::SRA:
19001 TargetOpcode = PPCISD::SRA;
19002 break;
19003 }
19004
19005 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19006 N1->getOpcode() == ISD::AND)
19007 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19008 if (Mask->getZExtValue() == OpSizeInBits - 1)
19009 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19010
19011 return SDValue();
19012}
19013
19014SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19015 DAGCombinerInfo &DCI) const {
19016 EVT VT = N->getValueType(0);
19017 assert(VT.isVector() && "Vector type expected.");
19018
19019 unsigned Opc = N->getOpcode();
19020 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19021 "Unexpected opcode.");
19022
19023 if (!isOperationLegal(Opc, VT))
19024 return SDValue();
19025
19026 EVT EltTy = VT.getScalarType();
19027 unsigned EltBits = EltTy.getSizeInBits();
19028 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19029 return SDValue();
19030
19031 SDValue N1 = N->getOperand(1);
19032 uint64_t SplatBits = 0;
19033 bool AddSplatCase = false;
19034 unsigned OpcN1 = N1.getOpcode();
19035 if (OpcN1 == PPCISD::VADD_SPLAT &&
19037 AddSplatCase = true;
19038 SplatBits = N1.getConstantOperandVal(0);
19039 }
19040
19041 if (!AddSplatCase) {
19042 if (OpcN1 != ISD::BUILD_VECTOR)
19043 return SDValue();
19044
19045 unsigned SplatBitSize;
19046 bool HasAnyUndefs;
19047 APInt APSplatBits, APSplatUndef;
19048 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19049 bool BVNIsConstantSplat =
19050 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19051 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19052 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19053 return SDValue();
19054 SplatBits = APSplatBits.getZExtValue();
19055 }
19056
19057 SDLoc DL(N);
19058 SDValue N0 = N->getOperand(0);
19059 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19060 // shift vector, which means the max value is 31/63. A shift vector of all
19061 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19062 // -16 to 15 range.
19063 if (SplatBits == (EltBits - 1)) {
19064 unsigned NewOpc;
19065 switch (Opc) {
19066 case ISD::SHL:
19067 NewOpc = PPCISD::SHL;
19068 break;
19069 case ISD::SRL:
19070 NewOpc = PPCISD::SRL;
19071 break;
19072 case ISD::SRA:
19073 NewOpc = PPCISD::SRA;
19074 break;
19075 }
19076 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19077 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19078 }
19079
19080 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19081 return SDValue();
19082
19083 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19084 // before the BUILD_VECTOR is replaced by a load.
19085 if (EltTy != MVT::i64 || SplatBits != 1)
19086 return SDValue();
19087
19088 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19089}
19090
19091SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19092 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19093 return Value;
19094
19095 if (N->getValueType(0).isVector())
19096 return combineVectorShift(N, DCI);
19097
19098 SDValue N0 = N->getOperand(0);
19099 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19100 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19101 N0.getOpcode() != ISD::SIGN_EXTEND ||
19102 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19103 N->getValueType(0) != MVT::i64)
19104 return SDValue();
19105
19106 // We can't save an operation here if the value is already extended, and
19107 // the existing shift is easier to combine.
19108 SDValue ExtsSrc = N0.getOperand(0);
19109 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19110 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19111 return SDValue();
19112
19113 SDLoc DL(N0);
19114 SDValue ShiftBy = SDValue(CN1, 0);
19115 // We want the shift amount to be i32 on the extswli, but the shift could
19116 // have an i64.
19117 if (ShiftBy.getValueType() == MVT::i64)
19118 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19119
19120 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19121 ShiftBy);
19122}
19123
19124SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19125 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19126 return Value;
19127
19128 if (N->getValueType(0).isVector())
19129 return combineVectorShift(N, DCI);
19130
19131 return SDValue();
19132}
19133
19134SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19135 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19136 return Value;
19137
19138 if (N->getValueType(0).isVector())
19139 return combineVectorShift(N, DCI);
19140
19141 return SDValue();
19142}
19143
19144// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19145// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19146// When C is zero, the equation (addi Z, -C) can be simplified to Z
19147// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19149 const PPCSubtarget &Subtarget) {
19150 if (!Subtarget.isPPC64())
19151 return SDValue();
19152
19153 SDValue LHS = N->getOperand(0);
19154 SDValue RHS = N->getOperand(1);
19155
19156 auto isZextOfCompareWithConstant = [](SDValue Op) {
19157 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19158 Op.getValueType() != MVT::i64)
19159 return false;
19160
19161 SDValue Cmp = Op.getOperand(0);
19162 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19163 Cmp.getOperand(0).getValueType() != MVT::i64)
19164 return false;
19165
19166 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19167 int64_t NegConstant = 0 - Constant->getSExtValue();
19168 // Due to the limitations of the addi instruction,
19169 // -C is required to be [-32768, 32767].
19170 return isInt<16>(NegConstant);
19171 }
19172
19173 return false;
19174 };
19175
19176 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19177 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19178
19179 // If there is a pattern, canonicalize a zext operand to the RHS.
19180 if (LHSHasPattern && !RHSHasPattern)
19181 std::swap(LHS, RHS);
19182 else if (!LHSHasPattern && !RHSHasPattern)
19183 return SDValue();
19184
19185 SDLoc DL(N);
19186 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19187 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19188 SDValue Cmp = RHS.getOperand(0);
19189 SDValue Z = Cmp.getOperand(0);
19190 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19191 int64_t NegConstant = 0 - Constant->getSExtValue();
19192
19193 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19194 default: break;
19195 case ISD::SETNE: {
19196 // when C == 0
19197 // --> addze X, (addic Z, -1).carry
19198 // /
19199 // add X, (zext(setne Z, C))--
19200 // \ when -32768 <= -C <= 32767 && C != 0
19201 // --> addze X, (addic (addi Z, -C), -1).carry
19202 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19203 DAG.getConstant(NegConstant, DL, MVT::i64));
19204 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19205 SDValue Addc =
19206 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19207 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19208 DAG.getConstant(0, DL, CarryType));
19209 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19210 DAG.getConstant(0, DL, MVT::i64),
19211 SDValue(Addc.getNode(), 1));
19212 }
19213 case ISD::SETEQ: {
19214 // when C == 0
19215 // --> addze X, (subfic Z, 0).carry
19216 // /
19217 // add X, (zext(sete Z, C))--
19218 // \ when -32768 <= -C <= 32767 && C != 0
19219 // --> addze X, (subfic (addi Z, -C), 0).carry
19220 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19221 DAG.getConstant(NegConstant, DL, MVT::i64));
19222 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19223 SDValue Subc =
19224 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19225 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19226 DAG.getConstant(0, DL, CarryType));
19227 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19228 DAG.getConstant(1UL, DL, CarryType));
19229 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19230 DAG.getConstant(0, DL, MVT::i64), Invert);
19231 }
19232 }
19233
19234 return SDValue();
19235}
19236
19237// Transform
19238// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19239// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19240// In this case both C1 and C2 must be known constants.
19241// C1+C2 must fit into a 34 bit signed integer.
19243 const PPCSubtarget &Subtarget) {
19244 if (!Subtarget.isUsingPCRelativeCalls())
19245 return SDValue();
19246
19247 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19248 // If we find that node try to cast the Global Address and the Constant.
19249 SDValue LHS = N->getOperand(0);
19250 SDValue RHS = N->getOperand(1);
19251
19252 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19253 std::swap(LHS, RHS);
19254
19255 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19256 return SDValue();
19257
19258 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19261
19262 // Check that both casts succeeded.
19263 if (!GSDN || !ConstNode)
19264 return SDValue();
19265
19266 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19267 SDLoc DL(GSDN);
19268
19269 // The signed int offset needs to fit in 34 bits.
19270 if (!isInt<34>(NewOffset))
19271 return SDValue();
19272
19273 // The new global address is a copy of the old global address except
19274 // that it has the updated Offset.
19275 SDValue GA =
19276 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19277 NewOffset, GSDN->getTargetFlags());
19278 SDValue MatPCRel =
19279 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19280 return MatPCRel;
19281}
19282
19283// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19284// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19285// Mathematical identity: X + 1 = X - (-1)
19286// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19287// Requirement: VSX feature for efficient xxleqv generation
19289 const PPCSubtarget &Subtarget) {
19290
19291 EVT VT = N->getValueType(0);
19292 if (!Subtarget.hasVSX())
19293 return SDValue();
19294
19295 // Handle v2i64, v4i32, v8i16 and v16i8 types
19296 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19297 VT == MVT::v2i64))
19298 return SDValue();
19299
19300 SDValue LHS = N->getOperand(0);
19301 SDValue RHS = N->getOperand(1);
19302
19303 // Check if RHS is BUILD_VECTOR
19304 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19305 return SDValue();
19306
19307 // Check if all the elements are 1
19308 unsigned NumOfEles = RHS.getNumOperands();
19309 for (unsigned i = 0; i < NumOfEles; ++i) {
19310 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
19311 if (!CN || CN->getSExtValue() != 1)
19312 return SDValue();
19313 }
19314 SDLoc DL(N);
19315
19316 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
19317 SmallVector<SDValue, 4> Ops(4, MinusOne);
19318 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
19319
19320 // Bitcast to the target vector type
19321 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
19322
19323 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
19324}
19325
19326SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19327 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19328 return Value;
19329
19330 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19331 return Value;
19332
19333 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
19334 return Value;
19335 return SDValue();
19336}
19337
19338// Detect TRUNCATE operations on bitcasts of float128 values.
19339// What we are looking for here is the situtation where we extract a subset
19340// of bits from a 128 bit float.
19341// This can be of two forms:
19342// 1) BITCAST of f128 feeding TRUNCATE
19343// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19344// The reason this is required is because we do not have a legal i128 type
19345// and so we want to prevent having to store the f128 and then reload part
19346// of it.
19347SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19348 DAGCombinerInfo &DCI) const {
19349 // If we are using CRBits then try that first.
19350 if (Subtarget.useCRBits()) {
19351 // Check if CRBits did anything and return that if it did.
19352 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19353 return CRTruncValue;
19354 }
19355
19356 SDLoc dl(N);
19357 SDValue Op0 = N->getOperand(0);
19358
19359 // Looking for a truncate of i128 to i64.
19360 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19361 return SDValue();
19362
19363 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19364
19365 // SRL feeding TRUNCATE.
19366 if (Op0.getOpcode() == ISD::SRL) {
19367 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19368 // The right shift has to be by 64 bits.
19369 if (!ConstNode || ConstNode->getZExtValue() != 64)
19370 return SDValue();
19371
19372 // Switch the element number to extract.
19373 EltToExtract = EltToExtract ? 0 : 1;
19374 // Update Op0 past the SRL.
19375 Op0 = Op0.getOperand(0);
19376 }
19377
19378 // BITCAST feeding a TRUNCATE possibly via SRL.
19379 if (Op0.getOpcode() == ISD::BITCAST &&
19380 Op0.getValueType() == MVT::i128 &&
19381 Op0.getOperand(0).getValueType() == MVT::f128) {
19382 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19383 return DCI.DAG.getNode(
19384 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19385 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19386 }
19387 return SDValue();
19388}
19389
19390SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19391 SelectionDAG &DAG = DCI.DAG;
19392
19393 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19394 if (!ConstOpOrElement)
19395 return SDValue();
19396
19397 // An imul is usually smaller than the alternative sequence for legal type.
19399 isOperationLegal(ISD::MUL, N->getValueType(0)))
19400 return SDValue();
19401
19402 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19403 switch (this->Subtarget.getCPUDirective()) {
19404 default:
19405 // TODO: enhance the condition for subtarget before pwr8
19406 return false;
19407 case PPC::DIR_PWR8:
19408 // type mul add shl
19409 // scalar 4 1 1
19410 // vector 7 2 2
19411 return true;
19412 case PPC::DIR_PWR9:
19413 case PPC::DIR_PWR10:
19414 case PPC::DIR_PWR11:
19416 // type mul add shl
19417 // scalar 5 2 2
19418 // vector 7 2 2
19419
19420 // The cycle RATIO of related operations are showed as a table above.
19421 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19422 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19423 // are 4, it is always profitable; but for 3 instrs patterns
19424 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19425 // So we should only do it for vector type.
19426 return IsAddOne && IsNeg ? VT.isVector() : true;
19427 }
19428 };
19429
19430 EVT VT = N->getValueType(0);
19431 SDLoc DL(N);
19432
19433 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19434 bool IsNeg = MulAmt.isNegative();
19435 APInt MulAmtAbs = MulAmt.abs();
19436
19437 if ((MulAmtAbs - 1).isPowerOf2()) {
19438 // (mul x, 2^N + 1) => (add (shl x, N), x)
19439 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19440
19441 if (!IsProfitable(IsNeg, true, VT))
19442 return SDValue();
19443
19444 SDValue Op0 = N->getOperand(0);
19445 SDValue Op1 =
19446 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19447 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19448 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19449
19450 if (!IsNeg)
19451 return Res;
19452
19453 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19454 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19455 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19456 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19457
19458 if (!IsProfitable(IsNeg, false, VT))
19459 return SDValue();
19460
19461 SDValue Op0 = N->getOperand(0);
19462 SDValue Op1 =
19463 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19464 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19465
19466 if (!IsNeg)
19467 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19468 else
19469 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19470
19471 } else {
19472 return SDValue();
19473 }
19474}
19475
19476// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19477// in combiner since we need to check SD flags and other subtarget features.
19478SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19479 DAGCombinerInfo &DCI) const {
19480 SDValue N0 = N->getOperand(0);
19481 SDValue N1 = N->getOperand(1);
19482 SDValue N2 = N->getOperand(2);
19483 SDNodeFlags Flags = N->getFlags();
19484 EVT VT = N->getValueType(0);
19485 SelectionDAG &DAG = DCI.DAG;
19486 const TargetOptions &Options = getTargetMachine().Options;
19487 unsigned Opc = N->getOpcode();
19489 bool LegalOps = !DCI.isBeforeLegalizeOps();
19490 SDLoc Loc(N);
19491
19492 if (!isOperationLegal(ISD::FMA, VT))
19493 return SDValue();
19494
19495 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19496 // since (fnmsub a b c)=-0 while c-ab=+0.
19497 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19498 return SDValue();
19499
19500 // (fma (fneg a) b c) => (fnmsub a b c)
19501 // (fnmsub (fneg a) b c) => (fma a b c)
19502 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19503 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19504
19505 // (fma a (fneg b) c) => (fnmsub a b c)
19506 // (fnmsub a (fneg b) c) => (fma a b c)
19507 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19508 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19509
19510 return SDValue();
19511}
19512
19513bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19514 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19515 if (!Subtarget.is64BitELFABI())
19516 return false;
19517
19518 // If not a tail call then no need to proceed.
19519 if (!CI->isTailCall())
19520 return false;
19521
19522 // If sibling calls have been disabled and tail-calls aren't guaranteed
19523 // there is no reason to duplicate.
19524 auto &TM = getTargetMachine();
19525 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19526 return false;
19527
19528 // Can't tail call a function called indirectly, or if it has variadic args.
19529 const Function *Callee = CI->getCalledFunction();
19530 if (!Callee || Callee->isVarArg())
19531 return false;
19532
19533 // Make sure the callee and caller calling conventions are eligible for tco.
19534 const Function *Caller = CI->getParent()->getParent();
19535 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19536 CI->getCallingConv()))
19537 return false;
19538
19539 // If the function is local then we have a good chance at tail-calling it
19540 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19541}
19542
19543bool PPCTargetLowering::
19544isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19545 const Value *Mask = AndI.getOperand(1);
19546 // If the mask is suitable for andi. or andis. we should sink the and.
19547 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19548 // Can't handle constants wider than 64-bits.
19549 if (CI->getBitWidth() > 64)
19550 return false;
19551 int64_t ConstVal = CI->getZExtValue();
19552 return isUInt<16>(ConstVal) ||
19553 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19554 }
19555
19556 // For non-constant masks, we can always use the record-form and.
19557 return true;
19558}
19559
19560/// getAddrModeForFlags - Based on the set of address flags, select the most
19561/// optimal instruction format to match by.
19562PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19563 // This is not a node we should be handling here.
19564 if (Flags == PPC::MOF_None)
19565 return PPC::AM_None;
19566 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19567 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
19568 if ((Flags & FlagSet) == FlagSet)
19569 return PPC::AM_DForm;
19570 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
19571 if ((Flags & FlagSet) == FlagSet)
19572 return PPC::AM_DSForm;
19573 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
19574 if ((Flags & FlagSet) == FlagSet)
19575 return PPC::AM_DQForm;
19576 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
19577 if ((Flags & FlagSet) == FlagSet)
19578 return PPC::AM_PrefixDForm;
19579 // If no other forms are selected, return an X-Form as it is the most
19580 // general addressing mode.
19581 return PPC::AM_XForm;
19582}
19583
19584/// Set alignment flags based on whether or not the Frame Index is aligned.
19585/// Utilized when computing flags for address computation when selecting
19586/// load and store instructions.
19587static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19588 SelectionDAG &DAG) {
19589 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19590 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
19591 if (!FI)
19592 return;
19594 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
19595 // If this is (add $FI, $S16Imm), the alignment flags are already set
19596 // based on the immediate. We just need to clear the alignment flags
19597 // if the FI alignment is weaker.
19598 if ((FrameIndexAlign % 4) != 0)
19599 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19600 if ((FrameIndexAlign % 16) != 0)
19601 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19602 // If the address is a plain FrameIndex, set alignment flags based on
19603 // FI alignment.
19604 if (!IsAdd) {
19605 if ((FrameIndexAlign % 4) == 0)
19606 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19607 if ((FrameIndexAlign % 16) == 0)
19608 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19609 }
19610}
19611
19612/// Given a node, compute flags that are used for address computation when
19613/// selecting load and store instructions. The flags computed are stored in
19614/// FlagSet. This function takes into account whether the node is a constant,
19615/// an ADD, OR, or a constant, and computes the address flags accordingly.
19616static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19617 SelectionDAG &DAG) {
19618 // Set the alignment flags for the node depending on if the node is
19619 // 4-byte or 16-byte aligned.
19620 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19621 if ((Imm & 0x3) == 0)
19622 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19623 if ((Imm & 0xf) == 0)
19624 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19625 };
19626
19628 // All 32-bit constants can be computed as LIS + Disp.
19629 const APInt &ConstImm = CN->getAPIntValue();
19630 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
19631 FlagSet |= PPC::MOF_AddrIsSImm32;
19632 SetAlignFlagsForImm(ConstImm.getZExtValue());
19633 setAlignFlagsForFI(N, FlagSet, DAG);
19634 }
19635 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
19636 FlagSet |= PPC::MOF_RPlusSImm34;
19637 else // Let constant materialization handle large constants.
19638 FlagSet |= PPC::MOF_NotAddNorCst;
19639 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19640 // This address can be represented as an addition of:
19641 // - Register + Imm16 (possibly a multiple of 4/16)
19642 // - Register + Imm34
19643 // - Register + PPCISD::Lo
19644 // - Register + Register
19645 // In any case, we won't have to match this as Base + Zero.
19646 SDValue RHS = N.getOperand(1);
19648 const APInt &ConstImm = CN->getAPIntValue();
19649 if (ConstImm.isSignedIntN(16)) {
19650 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19651 SetAlignFlagsForImm(ConstImm.getZExtValue());
19652 setAlignFlagsForFI(N, FlagSet, DAG);
19653 }
19654 if (ConstImm.isSignedIntN(34))
19655 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19656 else
19657 FlagSet |= PPC::MOF_RPlusR; // Register.
19658 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
19659 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19660 else
19661 FlagSet |= PPC::MOF_RPlusR;
19662 } else { // The address computation is not a constant or an addition.
19663 setAlignFlagsForFI(N, FlagSet, DAG);
19664 FlagSet |= PPC::MOF_NotAddNorCst;
19665 }
19666}
19667
19668static bool isPCRelNode(SDValue N) {
19669 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
19674}
19675
19676/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19677/// the address flags of the load/store instruction that is to be matched.
19678unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19679 SelectionDAG &DAG) const {
19680 unsigned FlagSet = PPC::MOF_None;
19681
19682 // Compute subtarget flags.
19683 if (!Subtarget.hasP9Vector())
19684 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19685 else
19686 FlagSet |= PPC::MOF_SubtargetP9;
19687
19688 if (Subtarget.hasPrefixInstrs())
19689 FlagSet |= PPC::MOF_SubtargetP10;
19690
19691 if (Subtarget.hasSPE())
19692 FlagSet |= PPC::MOF_SubtargetSPE;
19693
19694 // Check if we have a PCRel node and return early.
19695 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19696 return FlagSet;
19697
19698 // If the node is the paired load/store intrinsics, compute flags for
19699 // address computation and return early.
19700 unsigned ParentOp = Parent->getOpcode();
19701 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19702 (ParentOp == ISD::INTRINSIC_VOID))) {
19703 unsigned ID = Parent->getConstantOperandVal(1);
19704 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19705 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19706 ? Parent->getOperand(2)
19707 : Parent->getOperand(3);
19708 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
19709 FlagSet |= PPC::MOF_Vector;
19710 return FlagSet;
19711 }
19712 }
19713
19714 // Mark this as something we don't want to handle here if it is atomic
19715 // or pre-increment instruction.
19716 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
19717 if (LSB->isIndexed())
19718 return PPC::MOF_None;
19719
19720 // Compute in-memory type flags. This is based on if there are scalars,
19721 // floats or vectors.
19722 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
19723 assert(MN && "Parent should be a MemSDNode!");
19724 EVT MemVT = MN->getMemoryVT();
19725 unsigned Size = MemVT.getSizeInBits();
19726 if (MemVT.isScalarInteger()) {
19727 assert(Size <= 128 &&
19728 "Not expecting scalar integers larger than 16 bytes!");
19729 if (Size < 32)
19730 FlagSet |= PPC::MOF_SubWordInt;
19731 else if (Size == 32)
19732 FlagSet |= PPC::MOF_WordInt;
19733 else
19734 FlagSet |= PPC::MOF_DoubleWordInt;
19735 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19736 if (Size == 128)
19737 FlagSet |= PPC::MOF_Vector;
19738 else if (Size == 256) {
19739 assert(Subtarget.pairedVectorMemops() &&
19740 "256-bit vectors are only available when paired vector memops is "
19741 "enabled!");
19742 FlagSet |= PPC::MOF_Vector;
19743 } else
19744 llvm_unreachable("Not expecting illegal vectors!");
19745 } else { // Floating point type: can be scalar, f128 or vector types.
19746 if (Size == 32 || Size == 64)
19747 FlagSet |= PPC::MOF_ScalarFloat;
19748 else if (MemVT == MVT::f128 || MemVT.isVector())
19749 FlagSet |= PPC::MOF_Vector;
19750 else
19751 llvm_unreachable("Not expecting illegal scalar floats!");
19752 }
19753
19754 // Compute flags for address computation.
19755 computeFlagsForAddressComputation(N, FlagSet, DAG);
19756
19757 // Compute type extension flags.
19758 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
19759 switch (LN->getExtensionType()) {
19760 case ISD::SEXTLOAD:
19761 FlagSet |= PPC::MOF_SExt;
19762 break;
19763 case ISD::EXTLOAD:
19764 case ISD::ZEXTLOAD:
19765 FlagSet |= PPC::MOF_ZExt;
19766 break;
19767 case ISD::NON_EXTLOAD:
19768 FlagSet |= PPC::MOF_NoExt;
19769 break;
19770 }
19771 } else
19772 FlagSet |= PPC::MOF_NoExt;
19773
19774 // For integers, no extension is the same as zero extension.
19775 // We set the extension mode to zero extension so we don't have
19776 // to add separate entries in AddrModesMap for loads and stores.
19777 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19778 FlagSet |= PPC::MOF_ZExt;
19779 FlagSet &= ~PPC::MOF_NoExt;
19780 }
19781
19782 // If we don't have prefixed instructions, 34-bit constants should be
19783 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19784 bool IsNonP1034BitConst =
19786 FlagSet) == PPC::MOF_RPlusSImm34;
19787 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19788 IsNonP1034BitConst)
19789 FlagSet |= PPC::MOF_NotAddNorCst;
19790
19791 return FlagSet;
19792}
19793
19794/// SelectForceXFormMode - Given the specified address, force it to be
19795/// represented as an indexed [r+r] operation (an XForm instruction).
19797 SDValue &Base,
19798 SelectionDAG &DAG) const {
19799
19801 int16_t ForceXFormImm = 0;
19802 if (provablyDisjointOr(DAG, N) &&
19803 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
19804 Disp = N.getOperand(0);
19805 Base = N.getOperand(1);
19806 return Mode;
19807 }
19808
19809 // If the address is the result of an add, we will utilize the fact that the
19810 // address calculation includes an implicit add. However, we can reduce
19811 // register pressure if we do not materialize a constant just for use as the
19812 // index register. We only get rid of the add if it is not an add of a
19813 // value and a 16-bit signed constant and both have a single use.
19814 if (N.getOpcode() == ISD::ADD &&
19815 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
19816 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
19817 Disp = N.getOperand(0);
19818 Base = N.getOperand(1);
19819 return Mode;
19820 }
19821
19822 // Otherwise, use R0 as the base register.
19823 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19824 N.getValueType());
19825 Base = N;
19826
19827 return Mode;
19828}
19829
19831 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19832 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19833 EVT ValVT = Val.getValueType();
19834 // If we are splitting a scalar integer into f64 parts (i.e. so they
19835 // can be placed into VFRC registers), we need to zero extend and
19836 // bitcast the values. This will ensure the value is placed into a
19837 // VSR using direct moves or stack operations as needed.
19838 if (PartVT == MVT::f64 &&
19839 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
19840 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
19841 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
19842 Parts[0] = Val;
19843 return true;
19844 }
19845 return false;
19846}
19847
19848SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19849 SelectionDAG &DAG) const {
19850 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19852 EVT RetVT = Op.getValueType();
19853 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
19854 SDValue Callee =
19855 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
19856 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
19858 for (const SDValue &N : Op->op_values()) {
19859 EVT ArgVT = N.getValueType();
19860 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
19861 TargetLowering::ArgListEntry Entry(N, ArgTy);
19862 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
19863 Entry.IsZExt = !Entry.IsSExt;
19864 Args.push_back(Entry);
19865 }
19866
19867 SDValue InChain = DAG.getEntryNode();
19868 SDValue TCChain = InChain;
19869 const Function &F = DAG.getMachineFunction().getFunction();
19870 bool isTailCall =
19871 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
19872 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
19873 if (isTailCall)
19874 InChain = TCChain;
19875 CLI.setDebugLoc(SDLoc(Op))
19876 .setChain(InChain)
19877 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
19878 .setTailCall(isTailCall)
19879 .setSExtResult(SignExtend)
19880 .setZExtResult(!SignExtend)
19882 return TLI.LowerCallTo(CLI).first;
19883}
19884
19885SDValue PPCTargetLowering::lowerLibCallBasedOnType(
19886 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
19887 SelectionDAG &DAG) const {
19888 if (Op.getValueType() == MVT::f32)
19889 return lowerToLibCall(LibCallFloatName, Op, DAG);
19890
19891 if (Op.getValueType() == MVT::f64)
19892 return lowerToLibCall(LibCallDoubleName, Op, DAG);
19893
19894 return SDValue();
19895}
19896
19897bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
19898 SDNodeFlags Flags = Op.getNode()->getFlags();
19899 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
19900 Flags.hasNoNaNs() && Flags.hasNoInfs();
19901}
19902
19903bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
19904 return Op.getNode()->getFlags().hasApproximateFuncs();
19905}
19906
19907bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
19909}
19910
19911SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
19912 const char *LibCallFloatName,
19913 const char *LibCallDoubleNameFinite,
19914 const char *LibCallFloatNameFinite,
19915 SDValue Op,
19916 SelectionDAG &DAG) const {
19917 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
19918 return SDValue();
19919
19920 if (!isLowringToMASSFiniteSafe(Op))
19921 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
19922 DAG);
19923
19924 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
19925 LibCallDoubleNameFinite, Op, DAG);
19926}
19927
19928SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
19929 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
19930 "__xl_powf_finite", Op, DAG);
19931}
19932
19933SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
19934 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
19935 "__xl_sinf_finite", Op, DAG);
19936}
19937
19938SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
19939 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
19940 "__xl_cosf_finite", Op, DAG);
19941}
19942
19943SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
19944 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
19945 "__xl_logf_finite", Op, DAG);
19946}
19947
19948SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
19949 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
19950 "__xl_log10f_finite", Op, DAG);
19951}
19952
19953SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
19954 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
19955 "__xl_expf_finite", Op, DAG);
19956}
19957
19958// If we happen to match to an aligned D-Form, check if the Frame Index is
19959// adequately aligned. If it is not, reset the mode to match to X-Form.
19960static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
19963 return;
19964 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
19967}
19968
19969/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
19970/// compute the address flags of the node, get the optimal address mode based
19971/// on the flags, and set the Base and Disp based on the address mode.
19973 SDValue N, SDValue &Disp,
19974 SDValue &Base,
19975 SelectionDAG &DAG,
19976 MaybeAlign Align) const {
19977 SDLoc DL(Parent);
19978
19979 // Compute the address flags.
19980 unsigned Flags = computeMOFlags(Parent, N, DAG);
19981
19982 // Get the optimal address mode based on the Flags.
19983 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
19984
19985 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
19986 // Select an X-Form load if it is not.
19987 setXFormForUnalignedFI(N, Flags, Mode);
19988
19989 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
19990 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
19991 assert(Subtarget.isUsingPCRelativeCalls() &&
19992 "Must be using PC-Relative calls when a valid PC-Relative node is "
19993 "present!");
19994 Mode = PPC::AM_PCRel;
19995 }
19996
19997 // Set Base and Disp accordingly depending on the address mode.
19998 switch (Mode) {
19999 case PPC::AM_DForm:
20000 case PPC::AM_DSForm:
20001 case PPC::AM_DQForm: {
20002 // This is a register plus a 16-bit immediate. The base will be the
20003 // register and the displacement will be the immediate unless it
20004 // isn't sufficiently aligned.
20005 if (Flags & PPC::MOF_RPlusSImm16) {
20006 SDValue Op0 = N.getOperand(0);
20007 SDValue Op1 = N.getOperand(1);
20008 int16_t Imm = Op1->getAsZExtVal();
20009 if (!Align || isAligned(*Align, Imm)) {
20010 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20011 Base = Op0;
20013 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20014 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20015 }
20016 break;
20017 }
20018 }
20019 // This is a register plus the @lo relocation. The base is the register
20020 // and the displacement is the global address.
20021 else if (Flags & PPC::MOF_RPlusLo) {
20022 Disp = N.getOperand(1).getOperand(0); // The global address.
20027 Base = N.getOperand(0);
20028 break;
20029 }
20030 // This is a constant address at most 32 bits. The base will be
20031 // zero or load-immediate-shifted and the displacement will be
20032 // the low 16 bits of the address.
20033 else if (Flags & PPC::MOF_AddrIsSImm32) {
20034 auto *CN = cast<ConstantSDNode>(N);
20035 EVT CNType = CN->getValueType(0);
20036 uint64_t CNImm = CN->getZExtValue();
20037 // If this address fits entirely in a 16-bit sext immediate field, codegen
20038 // this as "d, 0".
20039 int16_t Imm;
20040 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20041 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20042 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20043 CNType);
20044 break;
20045 }
20046 // Handle 32-bit sext immediate with LIS + Addr mode.
20047 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20048 (!Align || isAligned(*Align, CNImm))) {
20049 int32_t Addr = (int32_t)CNImm;
20050 // Otherwise, break this down into LIS + Disp.
20051 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20052 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20053 MVT::i32);
20054 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20055 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20056 break;
20057 }
20058 }
20059 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20060 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20062 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20063 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20064 } else
20065 Base = N;
20066 break;
20067 }
20068 case PPC::AM_PrefixDForm: {
20069 int64_t Imm34 = 0;
20070 unsigned Opcode = N.getOpcode();
20071 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20072 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20073 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20074 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20075 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20076 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20077 else
20078 Base = N.getOperand(0);
20079 } else if (isIntS34Immediate(N, Imm34)) {
20080 // The address is a 34-bit signed immediate.
20081 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20082 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20083 }
20084 break;
20085 }
20086 case PPC::AM_PCRel: {
20087 // When selecting PC-Relative instructions, "Base" is not utilized as
20088 // we select the address as [PC+imm].
20089 Disp = N;
20090 break;
20091 }
20092 case PPC::AM_None:
20093 break;
20094 default: { // By default, X-Form is always available to be selected.
20095 // When a frame index is not aligned, we also match by XForm.
20097 Base = FI ? N : N.getOperand(1);
20098 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20099 N.getValueType())
20100 : N.getOperand(0);
20101 break;
20102 }
20103 }
20104 return Mode;
20105}
20106
20108 bool Return,
20109 bool IsVarArg) const {
20110 switch (CC) {
20111 case CallingConv::Cold:
20112 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20113 default:
20114 return CC_PPC64_ELF;
20115 }
20116}
20117
20119 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20120}
20121
20124 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20125 if (shouldInlineQuadwordAtomics() && Size == 128)
20127
20128 switch (AI->getOperation()) {
20134 default:
20136 }
20137
20138 llvm_unreachable("unreachable atomicrmw operation");
20139}
20140
20148
20149static Intrinsic::ID
20151 switch (BinOp) {
20152 default:
20153 llvm_unreachable("Unexpected AtomicRMW BinOp");
20155 return Intrinsic::ppc_atomicrmw_xchg_i128;
20156 case AtomicRMWInst::Add:
20157 return Intrinsic::ppc_atomicrmw_add_i128;
20158 case AtomicRMWInst::Sub:
20159 return Intrinsic::ppc_atomicrmw_sub_i128;
20160 case AtomicRMWInst::And:
20161 return Intrinsic::ppc_atomicrmw_and_i128;
20162 case AtomicRMWInst::Or:
20163 return Intrinsic::ppc_atomicrmw_or_i128;
20164 case AtomicRMWInst::Xor:
20165 return Intrinsic::ppc_atomicrmw_xor_i128;
20167 return Intrinsic::ppc_atomicrmw_nand_i128;
20168 }
20169}
20170
20172 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20173 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20174 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20175 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20176 Type *ValTy = Incr->getType();
20177 assert(ValTy->getPrimitiveSizeInBits() == 128);
20178 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20179 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20180 Value *IncrHi =
20181 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20182 Value *LoHi = Builder.CreateIntrinsic(
20184 {AlignedAddr, IncrLo, IncrHi});
20185 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20186 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20187 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20188 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20189 return Builder.CreateOr(
20190 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20191}
20192
20194 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20195 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20196 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20197 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20198 Type *ValTy = CmpVal->getType();
20199 assert(ValTy->getPrimitiveSizeInBits() == 128);
20200 Function *IntCmpXchg =
20201 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20202 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20203 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20204 Value *CmpHi =
20205 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20206 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20207 Value *NewHi =
20208 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20209 emitLeadingFence(Builder, CI, Ord);
20210 Value *LoHi =
20211 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20212 emitTrailingFence(Builder, CI, Ord);
20213 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20214 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20215 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20216 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20217 return Builder.CreateOr(
20218 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20219}
20220
20222 return Subtarget.useCRBits();
20223}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
bool isDenormal() const
Definition APFloat.h:1432
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
APInt abs() const
Get the absolute value.
Definition APInt.h:1796
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1723
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
arg_iterator arg_begin()
Definition Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:191
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1078
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:180
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ TargetConstantPool
Definition ISDOpcodes.h:184
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ TargetExternalSymbol
Definition ISDOpcodes.h:185
@ TargetJumpTable
Definition ISDOpcodes.h:183
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:181
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:133
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:200
@ MO_TPREL_HA
Definition PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:113
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:140
@ MO_TPREL_LO
Definition PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:160
@ MO_HA
Definition PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:117
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ Define
Register definition.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.