LLVM 22.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/APSInt.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/ADT/StringRef.h"
57#include "llvm/IR/CallingConv.h"
58#include "llvm/IR/Constant.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/DataLayout.h"
61#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/Function.h"
64#include "llvm/IR/GlobalValue.h"
65#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Intrinsics.h"
68#include "llvm/IR/IntrinsicsPowerPC.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCContext.h"
74#include "llvm/MC/MCExpr.h"
83#include "llvm/Support/Debug.h"
85#include "llvm/Support/Format.h"
91#include <algorithm>
92#include <cassert>
93#include <cstdint>
94#include <iterator>
95#include <list>
96#include <optional>
97#include <utility>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE "ppc-lowering"
103
105 "disable-p10-store-forward",
106 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
107 cl::init(false));
108
109static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
110cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
113cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
116cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
117
118static cl::opt<bool> DisableSCO("disable-ppc-sco",
119cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
120
121static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
122cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
123
124static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
125cl::desc("use absolute jump tables on ppc"), cl::Hidden);
126
127static cl::opt<bool>
128 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
129 cl::desc("disable vector permute decomposition"),
130 cl::init(true), cl::Hidden);
131
133 "disable-auto-paired-vec-st",
134 cl::desc("disable automatically generated 32byte paired vector stores"),
135 cl::init(true), cl::Hidden);
136
138 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
139 cl::desc("Set minimum number of entries to use a jump table on PPC"));
140
142 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
143 cl::desc("Set minimum of largest number of comparisons to use bit test for "
144 "switch on PPC."));
145
147 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
148 cl::desc("max depth when checking alias info in GatherAllAliases()"));
149
151 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
152 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
153 "function to use initial-exec"));
154
155STATISTIC(NumTailCalls, "Number of tail calls");
156STATISTIC(NumSiblingCalls, "Number of sibling calls");
157STATISTIC(ShufflesHandledWithVPERM,
158 "Number of shuffles lowered to a VPERM or XXPERM");
159STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
160
161static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
162
163static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
164
165// A faster local-[exec|dynamic] TLS access sequence (enabled with the
166// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
167// variables; consistent with the IBM XL compiler, we apply a max size of
168// slightly under 32KB.
170
171// FIXME: Remove this once the bug has been fixed!
173
175 const PPCSubtarget &STI)
176 : TargetLowering(TM), Subtarget(STI) {
177 // Initialize map that relates the PPC addressing modes to the computed flags
178 // of a load/store instruction. The map is used to determine the optimal
179 // addressing mode when selecting load and stores.
180 initializeAddrModeMap();
181 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
182 // arguments are at least 4/8 bytes aligned.
183 bool isPPC64 = Subtarget.isPPC64();
184 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
185 const MVT RegVT = Subtarget.getScalarIntVT();
186
187 // Set up the register classes.
188 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
189 if (!useSoftFloat()) {
190 if (hasSPE()) {
191 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
192 // EFPU2 APU only supports f32
193 if (!Subtarget.hasEFPU2())
194 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
195 } else {
196 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
197 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
198 }
199 }
200
203
204 // PowerPC uses addo_carry,subo_carry to propagate carry.
207
208 // On P10, the default lowering generates better code using the
209 // setbc instruction.
210 if (!Subtarget.hasP10Vector()) {
212 if (isPPC64)
214 }
215
216 // Match BITREVERSE to customized fast code sequence in the td file.
219
220 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
221 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
222
223 // Custom lower inline assembly to check for special registers.
224 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
225 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
226
227 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
228 for (MVT VT : MVT::integer_valuetypes()) {
231 }
232
233 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
234 setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
235
236 if (Subtarget.isISA3_0()) {
237 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
238 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
239 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
240 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
241 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
242 } else {
243 // No extending loads from f16 or HW conversions back and forth.
244 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
245 setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
246 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
247 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
248 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
249 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
250 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
251 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
252 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
253 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
254 }
255
256 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
257
258 // PowerPC has pre-inc load and store's.
269 if (!Subtarget.hasSPE()) {
274 }
275
276 if (Subtarget.useCRBits()) {
278
279 if (isPPC64 || Subtarget.hasFPCVT()) {
284
286 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
288 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
289
294
296 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
298 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
299 } else {
304 }
305
306 // PowerPC does not support direct load/store of condition registers.
307 setOperationAction(ISD::LOAD, MVT::i1, Custom);
308 setOperationAction(ISD::STORE, MVT::i1, Custom);
309
310 // FIXME: Remove this once the ANDI glue bug is fixed:
311 if (ANDIGlueBug)
313
314 for (MVT VT : MVT::integer_valuetypes()) {
317 setTruncStoreAction(VT, MVT::i1, Expand);
318 }
319
320 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
321 }
322
323 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
324 // PPC (the libcall is not available).
329
330 // We do not currently implement these libm ops for PowerPC.
331 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
332 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
333 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
334 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
335 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
336 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
337
338 // PowerPC has no SREM/UREM instructions unless we are on P9
339 // On P9 we may use a hardware instruction to compute the remainder.
340 // When the result of both the remainder and the division is required it is
341 // more efficient to compute the remainder from the result of the division
342 // rather than use the remainder instruction. The instructions are legalized
343 // directly because the DivRemPairsPass performs the transformation at the IR
344 // level.
345 if (Subtarget.isISA3_0()) {
350 } else {
355 }
356
357 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
366
367 // Handle constrained floating-point operations of scalar.
368 // TODO: Handle SPE specific operation.
374
379
380 if (!Subtarget.hasSPE()) {
383 }
384
385 if (Subtarget.hasVSX()) {
388 }
389
390 if (Subtarget.hasFSQRT()) {
393 }
394
395 if (Subtarget.hasFPRND()) {
400
405 }
406
407 // We don't support sin/cos/sqrt/fmod/pow
408 setOperationAction(ISD::FSIN , MVT::f64, Expand);
409 setOperationAction(ISD::FCOS , MVT::f64, Expand);
410 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
412 setOperationAction(ISD::FPOW , MVT::f64, Expand);
413 setOperationAction(ISD::FSIN , MVT::f32, Expand);
414 setOperationAction(ISD::FCOS , MVT::f32, Expand);
415 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
417 setOperationAction(ISD::FPOW , MVT::f32, Expand);
418
419 // MASS transformation for LLVM intrinsics with replicating fast-math flag
420 // to be consistent to PPCGenScalarMASSEntries pass
421 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
422 setOperationAction(ISD::FSIN , MVT::f64, Custom);
423 setOperationAction(ISD::FCOS , MVT::f64, Custom);
424 setOperationAction(ISD::FPOW , MVT::f64, Custom);
425 setOperationAction(ISD::FLOG, MVT::f64, Custom);
426 setOperationAction(ISD::FLOG10, MVT::f64, Custom);
427 setOperationAction(ISD::FEXP, MVT::f64, Custom);
428 setOperationAction(ISD::FSIN , MVT::f32, Custom);
429 setOperationAction(ISD::FCOS , MVT::f32, Custom);
430 setOperationAction(ISD::FPOW , MVT::f32, Custom);
431 setOperationAction(ISD::FLOG, MVT::f32, Custom);
432 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
433 setOperationAction(ISD::FEXP, MVT::f32, Custom);
434 }
435
436 if (Subtarget.hasSPE()) {
439 } else {
440 setOperationAction(ISD::FMA , MVT::f64, Legal);
441 setOperationAction(ISD::FMA , MVT::f32, Legal);
443 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
444 }
445
446 if (Subtarget.hasSPE())
447 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
448
449 // If we're enabling GP optimizations, use hardware square root
450 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
451 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
452
453 if (!Subtarget.hasFSQRT() &&
454 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
455 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
456
457 if (Subtarget.hasFCPSGN()) {
460 } else {
463 }
464
465 if (Subtarget.hasFPRND()) {
466 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
467 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
468 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
469 setOperationAction(ISD::FROUND, MVT::f64, Legal);
470
471 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
472 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
473 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
474 setOperationAction(ISD::FROUND, MVT::f32, Legal);
475 }
476
477 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
478 // instruction xxbrd to speed up scalar BSWAP64.
479 if (Subtarget.isISA3_1()) {
482 } else {
485 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
486 }
487
488 // CTPOP or CTTZ were introduced in P8/P9 respectively
489 if (Subtarget.isISA3_0()) {
490 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
491 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
492 } else {
493 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
494 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
495 }
496
497 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
500 } else {
503 }
504
505 // PowerPC does not have ROTR
508
509 if (!Subtarget.useCRBits()) {
510 // PowerPC does not have Select
515 }
516
517 // PowerPC wants to turn select_cc of FP into fsel when possible.
520
521 // PowerPC wants to optimize integer setcc a bit
522 if (!Subtarget.useCRBits())
524
525 if (Subtarget.hasFPU()) {
529
533 }
534
535 // PowerPC does not have BRCOND which requires SetCC
536 if (!Subtarget.useCRBits())
537 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
538
539 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
540
541 if (Subtarget.hasSPE()) {
542 // SPE has built-in conversions
549
550 // SPE supports signaling compare of f32/f64.
553 } else {
554 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
557
558 // PowerPC does not have [U|S]INT_TO_FP
563 }
564
565 if (Subtarget.hasDirectMove() && isPPC64) {
566 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
567 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
568 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
569 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
570
579 } else {
580 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
581 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
582 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
583 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
584 }
585
586 // We cannot sextinreg(i1). Expand to shifts.
588
589 // Custom handling for PowerPC ucmp instruction
591 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
592
593 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
594 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
595 // support continuation, user-level threading, and etc.. As a result, no
596 // other SjLj exception interfaces are implemented and please don't build
597 // your own exception handling based on them.
598 // LLVM/Clang supports zero-cost DWARF exception handling.
601
602 // We want to legalize GlobalAddress and ConstantPool nodes into the
603 // appropriate instructions to materialize the address.
614
615 // TRAP is legal.
616 setOperationAction(ISD::TRAP, MVT::Other, Legal);
617
618 // TRAMPOLINE is custom lowered.
619 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
620 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
621
622 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
623 setOperationAction(ISD::VASTART , MVT::Other, Custom);
624
625 if (Subtarget.is64BitELFABI()) {
626 // VAARG always uses double-word chunks, so promote anything smaller.
627 setOperationAction(ISD::VAARG, MVT::i1, Promote);
628 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
629 setOperationAction(ISD::VAARG, MVT::i8, Promote);
630 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
631 setOperationAction(ISD::VAARG, MVT::i16, Promote);
632 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
633 setOperationAction(ISD::VAARG, MVT::i32, Promote);
634 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
635 setOperationAction(ISD::VAARG, MVT::Other, Expand);
636 } else if (Subtarget.is32BitELFABI()) {
637 // VAARG is custom lowered with the 32-bit SVR4 ABI.
638 setOperationAction(ISD::VAARG, MVT::Other, Custom);
639 setOperationAction(ISD::VAARG, MVT::i64, Custom);
640 } else
641 setOperationAction(ISD::VAARG, MVT::Other, Expand);
642
643 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
644 if (Subtarget.is32BitELFABI())
645 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
646 else
647 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
648
649 // Use the default implementation.
650 setOperationAction(ISD::VAEND , MVT::Other, Expand);
651 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
652 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
653 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
654 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
655 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
656 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
659
660 if (Subtarget.isISA3_0() && isPPC64) {
661 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
662 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
663 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
664 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
665 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
666 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
667 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
668 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
669 }
670
671 // We want to custom lower some of our intrinsics.
677
678 // To handle counter-based loop conditions.
680
685
686 // Comparisons that require checking two conditions.
687 if (Subtarget.hasSPE()) {
692 }
705
708
709 if (Subtarget.has64BitSupport()) {
710 // They also have instructions for converting between i64 and fp.
719 // This is just the low 32 bits of a (signed) fp->i64 conversion.
720 // We cannot do this with Promote because i64 is not a legal type.
723
724 if (Subtarget.hasLFIWAX() || isPPC64) {
727 }
728 } else {
729 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
730 if (Subtarget.hasSPE()) {
733 } else {
736 }
737 }
738
739 // With the instructions enabled under FPCVT, we can do everything.
740 if (Subtarget.hasFPCVT()) {
741 if (Subtarget.has64BitSupport()) {
750 }
751
760 }
761
762 if (Subtarget.use64BitRegs()) {
763 // 64-bit PowerPC implementations can support i64 types directly
764 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
765 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
767 // 64-bit PowerPC wants to expand i128 shifts itself.
771 } else {
772 // 32-bit PowerPC wants to expand i64 shifts itself.
776 }
777
778 // PowerPC has better expansions for funnel shifts than the generic
779 // TargetLowering::expandFunnelShift.
780 if (Subtarget.has64BitSupport()) {
783 }
786
787 if (Subtarget.hasVSX()) {
788 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
789 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
790 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
791 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
794 }
795
796 if (Subtarget.hasAltivec()) {
797 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
802 }
803 // First set operation action for all vector types to expand. Then we
804 // will selectively turn on ones that can be effectively codegen'd.
806 // add/sub are legal for all supported vector VT's.
809
810 // For v2i64, these are only valid with P8Vector. This is corrected after
811 // the loop.
812 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
817 }
818 else {
823 }
824
825 if (Subtarget.hasVSX()) {
826 setOperationAction(ISD::FMAXNUM, VT, Legal);
827 setOperationAction(ISD::FMINNUM, VT, Legal);
828 }
829
830 // Vector instructions introduced in P8
831 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
834 }
835 else {
838 }
839
840 // Vector instructions introduced in P9
841 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
843 else
845
846 // We promote all shuffles to v16i8.
848 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
849
850 // We promote all non-typed operations to v4i32.
852 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
854 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
856 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
857 setOperationAction(ISD::LOAD , VT, Promote);
858 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
860 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
863 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
864 setOperationAction(ISD::STORE, VT, Promote);
865 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
866
867 // No other operations are legal.
875 setOperationAction(ISD::FNEG, VT, Expand);
876 setOperationAction(ISD::FSQRT, VT, Expand);
877 setOperationAction(ISD::FLOG, VT, Expand);
878 setOperationAction(ISD::FLOG10, VT, Expand);
879 setOperationAction(ISD::FLOG2, VT, Expand);
880 setOperationAction(ISD::FEXP, VT, Expand);
881 setOperationAction(ISD::FEXP2, VT, Expand);
882 setOperationAction(ISD::FSIN, VT, Expand);
883 setOperationAction(ISD::FCOS, VT, Expand);
884 setOperationAction(ISD::FABS, VT, Expand);
885 setOperationAction(ISD::FFLOOR, VT, Expand);
886 setOperationAction(ISD::FCEIL, VT, Expand);
887 setOperationAction(ISD::FTRUNC, VT, Expand);
888 setOperationAction(ISD::FRINT, VT, Expand);
889 setOperationAction(ISD::FLDEXP, VT, Expand);
890 setOperationAction(ISD::FNEARBYINT, VT, Expand);
901 setOperationAction(ISD::FPOW, VT, Expand);
906
907 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
908 setTruncStoreAction(VT, InnerVT, Expand);
911 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
912 }
913 }
915 if (!Subtarget.hasP8Vector()) {
916 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
917 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
918 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
919 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
920 }
921
922 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
923 // with merges, splats, etc.
925
926 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
927 // are cheap, so handle them before they get expanded to scalar.
933
934 setOperationAction(ISD::AND , MVT::v4i32, Legal);
935 setOperationAction(ISD::OR , MVT::v4i32, Legal);
936 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
937 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
939 Subtarget.useCRBits() ? Legal : Expand);
940 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
949 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
950 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
951 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
952 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
953
954 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
955 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
956 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
957 if (Subtarget.hasAltivec())
958 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
960 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
961 if (Subtarget.hasP8Altivec())
962 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
963
964 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
965 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
966 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
967 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
968
969 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
970 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
971
972 if (Subtarget.hasVSX()) {
973 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
974 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
976 }
977
978 if (Subtarget.hasP8Altivec())
979 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
980 else
981 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
982
983 if (Subtarget.isISA3_1()) {
984 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
985 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
986 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
987 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
988 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
989 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
990 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
991 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
992 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
993 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
994 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
995 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
996 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
997 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
998 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
999 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1000 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1001 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1002 }
1003
1004 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1005 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1006
1009 // LE is P8+/64-bit so direct moves are supported and these operations
1010 // are legal. The custom transformation requires 64-bit since we need a
1011 // pair of stores that will cover a 128-bit load for P10.
1012 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1016 }
1017
1022
1023 // Altivec does not contain unordered floating-point compare instructions
1024 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1025 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1026 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1027 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1028
1029 if (Subtarget.hasVSX()) {
1032 if (Subtarget.hasP8Vector()) {
1035 }
1036 if (Subtarget.hasDirectMove() && isPPC64) {
1045 }
1047
1048 // The nearbyint variants are not allowed to raise the inexact exception
1049 // so we can only code-gen them with fpexcept.ignore.
1052
1053 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1054 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1055 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1056 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1057 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1058 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1059 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1060 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1061
1062 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1063 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1064 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1065 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1066 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1067
1068 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1069 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1070
1071 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1072 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1073
1074 // Share the Altivec comparison restrictions.
1075 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1076 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1077 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1078 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1079
1080 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1081 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1082
1084
1085 if (Subtarget.hasP8Vector())
1086 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1087
1088 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1089
1090 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1091 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1092 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1093
1094 if (Subtarget.hasP8Altivec()) {
1095 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1096 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1097 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1098
1099 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1100 // SRL, but not for SRA because of the instructions available:
1101 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1102 // doing
1103 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1104 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1105 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1106
1107 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1108 }
1109 else {
1110 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1111 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1112 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1113
1114 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1115
1116 // VSX v2i64 only supports non-arithmetic operations.
1117 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1118 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1119 }
1120
1121 if (Subtarget.isISA3_1())
1122 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1123 else
1124 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1125
1126 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1127 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1128 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1129 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1130
1132
1141
1142 // Custom handling for partial vectors of integers converted to
1143 // floating point. We already have optimal handling for v2i32 through
1144 // the DAG combine, so those aren't necessary.
1161
1162 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1163 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1164 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1165 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1168
1171
1172 // Handle constrained floating-point operations of vector.
1173 // The predictor is `hasVSX` because altivec instruction has
1174 // no exception but VSX vector instruction has.
1188
1202
1203 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1204 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1205
1206 for (MVT FPT : MVT::fp_valuetypes())
1207 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1208
1209 // Expand the SELECT to SELECT_CC
1211
1212 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1213 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1214
1215 // No implementation for these ops for PowerPC.
1216 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1217 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1218 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1219 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1220 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1221 setOperationAction(ISD::FREM, MVT::f128, Expand);
1222 }
1223
1224 if (Subtarget.hasP8Altivec()) {
1225 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1226 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1227 }
1228
1229 if (Subtarget.hasP9Vector()) {
1232
1233 // Test data class instructions store results in CR bits.
1234 if (Subtarget.useCRBits()) {
1239 }
1240
1241 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1242 // SRL, but not for SRA because of the instructions available:
1243 // VS{RL} and VS{RL}O.
1244 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1245 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1246 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1247
1248 setOperationAction(ISD::FADD, MVT::f128, Legal);
1249 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1250 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1251 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1252 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1253
1254 setOperationAction(ISD::FMA, MVT::f128, Legal);
1261
1262 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1263 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1264 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1265 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1266 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1267 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1268
1271 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1272
1273 // Handle constrained floating-point operations of fp128
1289 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1290 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1291 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1292 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1293 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1294 } else if (Subtarget.hasVSX()) {
1295 setOperationAction(ISD::LOAD, MVT::f128, Promote);
1296 setOperationAction(ISD::STORE, MVT::f128, Promote);
1297
1298 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1299 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1300
1301 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1302 // fp_to_uint and int_to_fp.
1305
1306 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1307 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1308 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1309 setOperationAction(ISD::FABS, MVT::f128, Expand);
1310 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1311 setOperationAction(ISD::FMA, MVT::f128, Expand);
1313
1314 // Expand the fp_extend if the target type is fp128.
1315 setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1317
1318 // Expand the fp_round if the source type is fp128.
1319 for (MVT VT : {MVT::f32, MVT::f64}) {
1322 }
1323
1327 setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1328
1329 // Lower following f128 select_cc pattern:
1330 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1332
1333 // We need to handle f128 SELECT_CC with integer result type.
1335 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1336 }
1337
1338 if (Subtarget.hasP9Altivec()) {
1339 if (Subtarget.isISA3_1()) {
1344 } else {
1347 }
1355
1356 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1357 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1358 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1359 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1360 }
1361
1362 if (Subtarget.hasP10Vector()) {
1364 }
1365 }
1366
1367 if (Subtarget.pairedVectorMemops()) {
1368 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1369 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1370 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1371 }
1372 if (Subtarget.hasMMA()) {
1373 if (Subtarget.isISAFuture()) {
1374 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1375 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1376 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1377 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1378 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1379 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1380 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1381 } else {
1382 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1383 }
1384 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1385 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1387 }
1388
1389 if (Subtarget.has64BitSupport())
1390 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1391
1392 if (Subtarget.isISA3_1())
1393 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1394
1395 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1396
1397 if (!isPPC64) {
1398 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1399 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1400 }
1401
1403 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1404 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1406 }
1407
1409
1410 if (Subtarget.hasAltivec()) {
1411 // Altivec instructions set fields to all zeros or all ones.
1413 }
1414
1417 else if (isPPC64)
1419 else
1421
1422 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1423
1424 // We have target-specific dag combine patterns for the following nodes:
1427 if (Subtarget.hasFPCVT())
1429 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1430 if (Subtarget.useCRBits())
1431 setTargetDAGCombine(ISD::BRCOND);
1434
1436
1438
1439 if (Subtarget.useCRBits()) {
1441 }
1442
1443 // With 32 condition bits, we don't need to sink (and duplicate) compares
1444 // aggressively in CodeGenPrep.
1445 if (Subtarget.useCRBits()) {
1447 }
1448
1449 // TODO: The default entry number is set to 64. This stops most jump table
1450 // generation on PPC. But it is good for current PPC HWs because the indirect
1451 // branch instruction mtctr to the jump table may lead to bad branch predict.
1452 // Re-evaluate this value on future HWs that can do better with mtctr.
1454
1455 // The default minimum of largest number in a BitTest cluster is 3.
1457
1459 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1460
1461 auto CPUDirective = Subtarget.getCPUDirective();
1462 switch (CPUDirective) {
1463 default: break;
1464 case PPC::DIR_970:
1465 case PPC::DIR_A2:
1466 case PPC::DIR_E500:
1467 case PPC::DIR_E500mc:
1468 case PPC::DIR_E5500:
1469 case PPC::DIR_PWR4:
1470 case PPC::DIR_PWR5:
1471 case PPC::DIR_PWR5X:
1472 case PPC::DIR_PWR6:
1473 case PPC::DIR_PWR6X:
1474 case PPC::DIR_PWR7:
1475 case PPC::DIR_PWR8:
1476 case PPC::DIR_PWR9:
1477 case PPC::DIR_PWR10:
1478 case PPC::DIR_PWR11:
1482 break;
1483 }
1484
1485 if (Subtarget.enableMachineScheduler())
1487 else
1489
1491
1492 // The Freescale cores do better with aggressive inlining of memcpy and
1493 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1494 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1495 MaxStoresPerMemset = 32;
1497 MaxStoresPerMemcpy = 32;
1501 } else if (CPUDirective == PPC::DIR_A2) {
1502 // The A2 also benefits from (very) aggressive inlining of memcpy and
1503 // friends. The overhead of a the function call, even when warm, can be
1504 // over one hundred cycles.
1505 MaxStoresPerMemset = 128;
1506 MaxStoresPerMemcpy = 128;
1507 MaxStoresPerMemmove = 128;
1508 MaxLoadsPerMemcmp = 128;
1509 } else {
1512 }
1513
1514 // Enable generation of STXVP instructions by default for mcpu=future.
1515 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1516 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1517 DisableAutoPairedVecSt = false;
1518
1519 IsStrictFPEnabled = true;
1520
1521 // Let the subtarget (CPU) decide if a predictable select is more expensive
1522 // than the corresponding branch. This information is used in CGP to decide
1523 // when to convert selects into branches.
1524 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1525
1527}
1528
1529// *********************************** NOTE ************************************
1530// For selecting load and store instructions, the addressing modes are defined
1531// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1532// patterns to match the load the store instructions.
1533//
1534// The TD definitions for the addressing modes correspond to their respective
1535// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1536// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1537// address mode flags of a particular node. Afterwards, the computed address
1538// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1539// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1540// accordingly, based on the preferred addressing mode.
1541//
1542// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1543// MemOpFlags contains all the possible flags that can be used to compute the
1544// optimal addressing mode for load and store instructions.
1545// AddrMode contains all the possible load and store addressing modes available
1546// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1547//
1548// When adding new load and store instructions, it is possible that new address
1549// flags may need to be added into MemOpFlags, and a new addressing mode will
1550// need to be added to AddrMode. An entry of the new addressing mode (consisting
1551// of the minimal and main distinguishing address flags for the new load/store
1552// instructions) will need to be added into initializeAddrModeMap() below.
1553// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1554// need to be updated to account for selecting the optimal addressing mode.
1555// *****************************************************************************
1556/// Initialize the map that relates the different addressing modes of the load
1557/// and store instructions to a set of flags. This ensures the load/store
1558/// instruction is correctly matched during instruction selection.
1559void PPCTargetLowering::initializeAddrModeMap() {
1560 AddrModesMap[PPC::AM_DForm] = {
1561 // LWZ, STW
1566 // LBZ, LHZ, STB, STH
1571 // LHA
1576 // LFS, LFD, STFS, STFD
1581 };
1582 AddrModesMap[PPC::AM_DSForm] = {
1583 // LWA
1587 // LD, STD
1591 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1595 };
1596 AddrModesMap[PPC::AM_DQForm] = {
1597 // LXV, STXV
1601 };
1602 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1604 // TODO: Add mapping for quadword load/store.
1605}
1606
1607/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1608/// the desired ByVal argument alignment.
1609static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1610 if (MaxAlign == MaxMaxAlign)
1611 return;
1612 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1613 if (MaxMaxAlign >= 32 &&
1614 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1615 MaxAlign = Align(32);
1616 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1617 MaxAlign < 16)
1618 MaxAlign = Align(16);
1619 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1620 Align EltAlign;
1621 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1622 if (EltAlign > MaxAlign)
1623 MaxAlign = EltAlign;
1624 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1625 for (auto *EltTy : STy->elements()) {
1626 Align EltAlign;
1627 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1628 if (EltAlign > MaxAlign)
1629 MaxAlign = EltAlign;
1630 if (MaxAlign == MaxMaxAlign)
1631 break;
1632 }
1633 }
1634}
1635
1636/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1637/// function arguments in the caller parameter area.
1639 const DataLayout &DL) const {
1640 // 16byte and wider vectors are passed on 16byte boundary.
1641 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1642 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1643 if (Subtarget.hasAltivec())
1644 getMaxByValAlign(Ty, Alignment, Align(16));
1645 return Alignment;
1646}
1647
1649 return Subtarget.useSoftFloat();
1650}
1651
1653 return Subtarget.hasSPE();
1654}
1655
1657 return VT.isScalarInteger();
1658}
1659
1661 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1662 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1663 return false;
1664
1665 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1666 if (VTy->getScalarType()->isIntegerTy()) {
1667 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1668 if (ElemSizeInBits == 32) {
1669 Index = Subtarget.isLittleEndian() ? 2 : 1;
1670 return true;
1671 }
1672 if (ElemSizeInBits == 64) {
1673 Index = Subtarget.isLittleEndian() ? 1 : 0;
1674 return true;
1675 }
1676 }
1677 }
1678 return false;
1679}
1680
1681const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1682 switch ((PPCISD::NodeType)Opcode) {
1683 case PPCISD::FIRST_NUMBER: break;
1684 case PPCISD::FSEL: return "PPCISD::FSEL";
1685 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1686 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1687 case PPCISD::FCFID: return "PPCISD::FCFID";
1688 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1689 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1690 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1691 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1692 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1693 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1694 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1695 case PPCISD::FRE: return "PPCISD::FRE";
1696 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1697 case PPCISD::FTSQRT:
1698 return "PPCISD::FTSQRT";
1699 case PPCISD::FSQRT:
1700 return "PPCISD::FSQRT";
1701 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1702 case PPCISD::VPERM: return "PPCISD::VPERM";
1703 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1705 return "PPCISD::XXSPLTI_SP_TO_DP";
1707 return "PPCISD::XXSPLTI32DX";
1708 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1709 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1710 case PPCISD::XXPERM:
1711 return "PPCISD::XXPERM";
1712 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1713 case PPCISD::VSRQ:
1714 return "PPCISD::VSRQ";
1715 case PPCISD::CMPB: return "PPCISD::CMPB";
1716 case PPCISD::Hi: return "PPCISD::Hi";
1717 case PPCISD::Lo: return "PPCISD::Lo";
1718 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1719 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1720 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1721 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1722 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1723 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1724 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1725 case PPCISD::SRL: return "PPCISD::SRL";
1726 case PPCISD::SRA: return "PPCISD::SRA";
1727 case PPCISD::SHL: return "PPCISD::SHL";
1728 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1729 case PPCISD::CALL: return "PPCISD::CALL";
1730 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1731 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1732 case PPCISD::CALL_RM:
1733 return "PPCISD::CALL_RM";
1735 return "PPCISD::CALL_NOP_RM";
1737 return "PPCISD::CALL_NOTOC_RM";
1738 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1739 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1740 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1741 case PPCISD::BCTRL_RM:
1742 return "PPCISD::BCTRL_RM";
1744 return "PPCISD::BCTRL_LOAD_TOC_RM";
1745 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1746 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1747 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1748 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1749 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1750 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1751 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1752 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1753 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1754 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1756 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1758 return "PPCISD::ANDI_rec_1_EQ_BIT";
1760 return "PPCISD::ANDI_rec_1_GT_BIT";
1761 case PPCISD::VCMP: return "PPCISD::VCMP";
1762 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1763 case PPCISD::LBRX: return "PPCISD::LBRX";
1764 case PPCISD::STBRX: return "PPCISD::STBRX";
1765 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1766 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1767 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1768 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1769 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1770 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1771 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1772 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1773 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1775 return "PPCISD::ST_VSR_SCAL_INT";
1776 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1777 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1778 case PPCISD::BDZ: return "PPCISD::BDZ";
1779 case PPCISD::MFFS: return "PPCISD::MFFS";
1780 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1781 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1782 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1783 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1784 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1785 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1786 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1787 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1788 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1789 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1790 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1791 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1792 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1793 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1794 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1795 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1796 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1797 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1798 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1799 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1800 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1801 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1802 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1804 return "PPCISD::PADDI_DTPREL";
1805 case PPCISD::VADD_SPLAT:
1806 return "PPCISD::VADD_SPLAT";
1807 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1808 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1809 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1810 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1811 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1812 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1813 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1814 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1815 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1817 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1819 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1820 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1821 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1822 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1823 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1824 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1825 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1826 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1827 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1829 return "PPCISD::STRICT_FADDRTZ";
1831 return "PPCISD::STRICT_FCTIDZ";
1833 return "PPCISD::STRICT_FCTIWZ";
1835 return "PPCISD::STRICT_FCTIDUZ";
1837 return "PPCISD::STRICT_FCTIWUZ";
1839 return "PPCISD::STRICT_FCFID";
1841 return "PPCISD::STRICT_FCFIDU";
1843 return "PPCISD::STRICT_FCFIDS";
1845 return "PPCISD::STRICT_FCFIDUS";
1846 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1847 case PPCISD::STORE_COND:
1848 return "PPCISD::STORE_COND";
1849 case PPCISD::SETBC:
1850 return "PPCISD::SETBC";
1851 case PPCISD::SETBCR:
1852 return "PPCISD::SETBCR";
1853 case PPCISD::ADDC:
1854 return "PPCISD::ADDC";
1855 case PPCISD::ADDE:
1856 return "PPCISD::ADDE";
1857 case PPCISD::SUBC:
1858 return "PPCISD::SUBC";
1859 case PPCISD::SUBE:
1860 return "PPCISD::SUBE";
1861 }
1862 return nullptr;
1863}
1864
1866 EVT VT) const {
1867 if (!VT.isVector())
1868 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1869
1871}
1872
1874 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1875 return true;
1876}
1877
1878//===----------------------------------------------------------------------===//
1879// Node matching predicates, for use by the tblgen matching code.
1880//===----------------------------------------------------------------------===//
1881
1882/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1885 return CFP->getValueAPF().isZero();
1886 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1887 // Maybe this has already been legalized into the constant pool?
1888 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1889 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1890 return CFP->getValueAPF().isZero();
1891 }
1892 return false;
1893}
1894
1895/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1896/// true if Op is undef or if it matches the specified value.
1897static bool isConstantOrUndef(int Op, int Val) {
1898 return Op < 0 || Op == Val;
1899}
1900
1901/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1902/// VPKUHUM instruction.
1903/// The ShuffleKind distinguishes between big-endian operations with
1904/// two different inputs (0), either-endian operations with two identical
1905/// inputs (1), and little-endian operations with two different inputs (2).
1906/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1908 SelectionDAG &DAG) {
1909 bool IsLE = DAG.getDataLayout().isLittleEndian();
1910 if (ShuffleKind == 0) {
1911 if (IsLE)
1912 return false;
1913 for (unsigned i = 0; i != 16; ++i)
1914 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1915 return false;
1916 } else if (ShuffleKind == 2) {
1917 if (!IsLE)
1918 return false;
1919 for (unsigned i = 0; i != 16; ++i)
1920 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1921 return false;
1922 } else if (ShuffleKind == 1) {
1923 unsigned j = IsLE ? 0 : 1;
1924 for (unsigned i = 0; i != 8; ++i)
1925 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1926 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1927 return false;
1928 }
1929 return true;
1930}
1931
1932/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1933/// VPKUWUM instruction.
1934/// The ShuffleKind distinguishes between big-endian operations with
1935/// two different inputs (0), either-endian operations with two identical
1936/// inputs (1), and little-endian operations with two different inputs (2).
1937/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1939 SelectionDAG &DAG) {
1940 bool IsLE = DAG.getDataLayout().isLittleEndian();
1941 if (ShuffleKind == 0) {
1942 if (IsLE)
1943 return false;
1944 for (unsigned i = 0; i != 16; i += 2)
1945 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1946 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1947 return false;
1948 } else if (ShuffleKind == 2) {
1949 if (!IsLE)
1950 return false;
1951 for (unsigned i = 0; i != 16; i += 2)
1952 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1953 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1954 return false;
1955 } else if (ShuffleKind == 1) {
1956 unsigned j = IsLE ? 0 : 2;
1957 for (unsigned i = 0; i != 8; i += 2)
1958 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1959 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1960 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1961 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1962 return false;
1963 }
1964 return true;
1965}
1966
1967/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1968/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1969/// current subtarget.
1970///
1971/// The ShuffleKind distinguishes between big-endian operations with
1972/// two different inputs (0), either-endian operations with two identical
1973/// inputs (1), and little-endian operations with two different inputs (2).
1974/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1976 SelectionDAG &DAG) {
1977 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1978 if (!Subtarget.hasP8Vector())
1979 return false;
1980
1981 bool IsLE = DAG.getDataLayout().isLittleEndian();
1982 if (ShuffleKind == 0) {
1983 if (IsLE)
1984 return false;
1985 for (unsigned i = 0; i != 16; i += 4)
1986 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1987 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1988 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1989 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1990 return false;
1991 } else if (ShuffleKind == 2) {
1992 if (!IsLE)
1993 return false;
1994 for (unsigned i = 0; i != 16; i += 4)
1995 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1996 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1997 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1998 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1999 return false;
2000 } else if (ShuffleKind == 1) {
2001 unsigned j = IsLE ? 0 : 4;
2002 for (unsigned i = 0; i != 8; i += 4)
2003 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
2004 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
2005 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
2006 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
2007 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
2008 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
2009 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
2010 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
2011 return false;
2012 }
2013 return true;
2014}
2015
2016/// isVMerge - Common function, used to match vmrg* shuffles.
2017///
2018static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2019 unsigned LHSStart, unsigned RHSStart) {
2020 if (N->getValueType(0) != MVT::v16i8)
2021 return false;
2022 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2023 "Unsupported merge size!");
2024
2025 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2026 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2027 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2028 LHSStart+j+i*UnitSize) ||
2029 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2030 RHSStart+j+i*UnitSize))
2031 return false;
2032 }
2033 return true;
2034}
2035
2036/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2037/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2038/// The ShuffleKind distinguishes between big-endian merges with two
2039/// different inputs (0), either-endian merges with two identical inputs (1),
2040/// and little-endian merges with two different inputs (2). For the latter,
2041/// the input operands are swapped (see PPCInstrAltivec.td).
2043 unsigned ShuffleKind, SelectionDAG &DAG) {
2044 if (DAG.getDataLayout().isLittleEndian()) {
2045 if (ShuffleKind == 1) // unary
2046 return isVMerge(N, UnitSize, 0, 0);
2047 else if (ShuffleKind == 2) // swapped
2048 return isVMerge(N, UnitSize, 0, 16);
2049 else
2050 return false;
2051 } else {
2052 if (ShuffleKind == 1) // unary
2053 return isVMerge(N, UnitSize, 8, 8);
2054 else if (ShuffleKind == 0) // normal
2055 return isVMerge(N, UnitSize, 8, 24);
2056 else
2057 return false;
2058 }
2059}
2060
2061/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2062/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2063/// The ShuffleKind distinguishes between big-endian merges with two
2064/// different inputs (0), either-endian merges with two identical inputs (1),
2065/// and little-endian merges with two different inputs (2). For the latter,
2066/// the input operands are swapped (see PPCInstrAltivec.td).
2068 unsigned ShuffleKind, SelectionDAG &DAG) {
2069 if (DAG.getDataLayout().isLittleEndian()) {
2070 if (ShuffleKind == 1) // unary
2071 return isVMerge(N, UnitSize, 8, 8);
2072 else if (ShuffleKind == 2) // swapped
2073 return isVMerge(N, UnitSize, 8, 24);
2074 else
2075 return false;
2076 } else {
2077 if (ShuffleKind == 1) // unary
2078 return isVMerge(N, UnitSize, 0, 0);
2079 else if (ShuffleKind == 0) // normal
2080 return isVMerge(N, UnitSize, 0, 16);
2081 else
2082 return false;
2083 }
2084}
2085
2086/**
2087 * Common function used to match vmrgew and vmrgow shuffles
2088 *
2089 * The indexOffset determines whether to look for even or odd words in
2090 * the shuffle mask. This is based on the of the endianness of the target
2091 * machine.
2092 * - Little Endian:
2093 * - Use offset of 0 to check for odd elements
2094 * - Use offset of 4 to check for even elements
2095 * - Big Endian:
2096 * - Use offset of 0 to check for even elements
2097 * - Use offset of 4 to check for odd elements
2098 * A detailed description of the vector element ordering for little endian and
2099 * big endian can be found at
2100 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2101 * Targeting your applications - what little endian and big endian IBM XL C/C++
2102 * compiler differences mean to you
2103 *
2104 * The mask to the shuffle vector instruction specifies the indices of the
2105 * elements from the two input vectors to place in the result. The elements are
2106 * numbered in array-access order, starting with the first vector. These vectors
2107 * are always of type v16i8, thus each vector will contain 16 elements of size
2108 * 8. More info on the shuffle vector can be found in the
2109 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2110 * Language Reference.
2111 *
2112 * The RHSStartValue indicates whether the same input vectors are used (unary)
2113 * or two different input vectors are used, based on the following:
2114 * - If the instruction uses the same vector for both inputs, the range of the
2115 * indices will be 0 to 15. In this case, the RHSStart value passed should
2116 * be 0.
2117 * - If the instruction has two different vectors then the range of the
2118 * indices will be 0 to 31. In this case, the RHSStart value passed should
2119 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2120 * to 31 specify elements in the second vector).
2121 *
2122 * \param[in] N The shuffle vector SD Node to analyze
2123 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2124 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2125 * vector to the shuffle_vector instruction
2126 * \return true iff this shuffle vector represents an even or odd word merge
2127 */
2128static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2129 unsigned RHSStartValue) {
2130 if (N->getValueType(0) != MVT::v16i8)
2131 return false;
2132
2133 for (unsigned i = 0; i < 2; ++i)
2134 for (unsigned j = 0; j < 4; ++j)
2135 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2136 i*RHSStartValue+j+IndexOffset) ||
2137 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2138 i*RHSStartValue+j+IndexOffset+8))
2139 return false;
2140 return true;
2141}
2142
2143/**
2144 * Determine if the specified shuffle mask is suitable for the vmrgew or
2145 * vmrgow instructions.
2146 *
2147 * \param[in] N The shuffle vector SD Node to analyze
2148 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2149 * \param[in] ShuffleKind Identify the type of merge:
2150 * - 0 = big-endian merge with two different inputs;
2151 * - 1 = either-endian merge with two identical inputs;
2152 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2153 * little-endian merges).
2154 * \param[in] DAG The current SelectionDAG
2155 * \return true iff this shuffle mask
2156 */
2158 unsigned ShuffleKind, SelectionDAG &DAG) {
2159 if (DAG.getDataLayout().isLittleEndian()) {
2160 unsigned indexOffset = CheckEven ? 4 : 0;
2161 if (ShuffleKind == 1) // Unary
2162 return isVMerge(N, indexOffset, 0);
2163 else if (ShuffleKind == 2) // swapped
2164 return isVMerge(N, indexOffset, 16);
2165 else
2166 return false;
2167 }
2168 else {
2169 unsigned indexOffset = CheckEven ? 0 : 4;
2170 if (ShuffleKind == 1) // Unary
2171 return isVMerge(N, indexOffset, 0);
2172 else if (ShuffleKind == 0) // Normal
2173 return isVMerge(N, indexOffset, 16);
2174 else
2175 return false;
2176 }
2177 return false;
2178}
2179
2180/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2181/// amount, otherwise return -1.
2182/// The ShuffleKind distinguishes between big-endian operations with two
2183/// different inputs (0), either-endian operations with two identical inputs
2184/// (1), and little-endian operations with two different inputs (2). For the
2185/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2186int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2187 SelectionDAG &DAG) {
2188 if (N->getValueType(0) != MVT::v16i8)
2189 return -1;
2190
2192
2193 // Find the first non-undef value in the shuffle mask.
2194 unsigned i;
2195 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2196 /*search*/;
2197
2198 if (i == 16) return -1; // all undef.
2199
2200 // Otherwise, check to see if the rest of the elements are consecutively
2201 // numbered from this value.
2202 unsigned ShiftAmt = SVOp->getMaskElt(i);
2203 if (ShiftAmt < i) return -1;
2204
2205 ShiftAmt -= i;
2206 bool isLE = DAG.getDataLayout().isLittleEndian();
2207
2208 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2209 // Check the rest of the elements to see if they are consecutive.
2210 for (++i; i != 16; ++i)
2211 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2212 return -1;
2213 } else if (ShuffleKind == 1) {
2214 // Check the rest of the elements to see if they are consecutive.
2215 for (++i; i != 16; ++i)
2216 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2217 return -1;
2218 } else
2219 return -1;
2220
2221 if (isLE)
2222 ShiftAmt = 16 - ShiftAmt;
2223
2224 return ShiftAmt;
2225}
2226
2227/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2228/// specifies a splat of a single element that is suitable for input to
2229/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2231 EVT VT = N->getValueType(0);
2232 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2233 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2234
2235 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2236 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2237
2238 // The consecutive indices need to specify an element, not part of two
2239 // different elements. So abandon ship early if this isn't the case.
2240 if (N->getMaskElt(0) % EltSize != 0)
2241 return false;
2242
2243 // This is a splat operation if each element of the permute is the same, and
2244 // if the value doesn't reference the second vector.
2245 unsigned ElementBase = N->getMaskElt(0);
2246
2247 // FIXME: Handle UNDEF elements too!
2248 if (ElementBase >= 16)
2249 return false;
2250
2251 // Check that the indices are consecutive, in the case of a multi-byte element
2252 // splatted with a v16i8 mask.
2253 for (unsigned i = 1; i != EltSize; ++i)
2254 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2255 return false;
2256
2257 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2258 // An UNDEF element is a sequence of UNDEF bytes.
2259 if (N->getMaskElt(i) < 0) {
2260 for (unsigned j = 1; j != EltSize; ++j)
2261 if (N->getMaskElt(i + j) >= 0)
2262 return false;
2263 } else
2264 for (unsigned j = 0; j != EltSize; ++j)
2265 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2266 return false;
2267 }
2268 return true;
2269}
2270
2271/// Check that the mask is shuffling N byte elements. Within each N byte
2272/// element of the mask, the indices could be either in increasing or
2273/// decreasing order as long as they are consecutive.
2274/// \param[in] N the shuffle vector SD Node to analyze
2275/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2276/// Word/DoubleWord/QuadWord).
2277/// \param[in] StepLen the delta indices number among the N byte element, if
2278/// the mask is in increasing/decreasing order then it is 1/-1.
2279/// \return true iff the mask is shuffling N byte elements.
2280static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2281 int StepLen) {
2282 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2283 "Unexpected element width.");
2284 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2285
2286 unsigned NumOfElem = 16 / Width;
2287 unsigned MaskVal[16]; // Width is never greater than 16
2288 for (unsigned i = 0; i < NumOfElem; ++i) {
2289 MaskVal[0] = N->getMaskElt(i * Width);
2290 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2291 return false;
2292 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2293 return false;
2294 }
2295
2296 for (unsigned int j = 1; j < Width; ++j) {
2297 MaskVal[j] = N->getMaskElt(i * Width + j);
2298 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2299 return false;
2300 }
2301 }
2302 }
2303
2304 return true;
2305}
2306
2307bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2308 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2309 if (!isNByteElemShuffleMask(N, 4, 1))
2310 return false;
2311
2312 // Now we look at mask elements 0,4,8,12
2313 unsigned M0 = N->getMaskElt(0) / 4;
2314 unsigned M1 = N->getMaskElt(4) / 4;
2315 unsigned M2 = N->getMaskElt(8) / 4;
2316 unsigned M3 = N->getMaskElt(12) / 4;
2317 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2318 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2319
2320 // Below, let H and L be arbitrary elements of the shuffle mask
2321 // where H is in the range [4,7] and L is in the range [0,3].
2322 // H, 1, 2, 3 or L, 5, 6, 7
2323 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2324 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2325 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2326 InsertAtByte = IsLE ? 12 : 0;
2327 Swap = M0 < 4;
2328 return true;
2329 }
2330 // 0, H, 2, 3 or 4, L, 6, 7
2331 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2332 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2333 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2334 InsertAtByte = IsLE ? 8 : 4;
2335 Swap = M1 < 4;
2336 return true;
2337 }
2338 // 0, 1, H, 3 or 4, 5, L, 7
2339 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2340 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2341 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2342 InsertAtByte = IsLE ? 4 : 8;
2343 Swap = M2 < 4;
2344 return true;
2345 }
2346 // 0, 1, 2, H or 4, 5, 6, L
2347 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2348 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2349 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2350 InsertAtByte = IsLE ? 0 : 12;
2351 Swap = M3 < 4;
2352 return true;
2353 }
2354
2355 // If both vector operands for the shuffle are the same vector, the mask will
2356 // contain only elements from the first one and the second one will be undef.
2357 if (N->getOperand(1).isUndef()) {
2358 ShiftElts = 0;
2359 Swap = true;
2360 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2361 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2362 InsertAtByte = IsLE ? 12 : 0;
2363 return true;
2364 }
2365 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2366 InsertAtByte = IsLE ? 8 : 4;
2367 return true;
2368 }
2369 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2370 InsertAtByte = IsLE ? 4 : 8;
2371 return true;
2372 }
2373 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2374 InsertAtByte = IsLE ? 0 : 12;
2375 return true;
2376 }
2377 }
2378
2379 return false;
2380}
2381
2383 bool &Swap, bool IsLE) {
2384 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2385 // Ensure each byte index of the word is consecutive.
2386 if (!isNByteElemShuffleMask(N, 4, 1))
2387 return false;
2388
2389 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2390 unsigned M0 = N->getMaskElt(0) / 4;
2391 unsigned M1 = N->getMaskElt(4) / 4;
2392 unsigned M2 = N->getMaskElt(8) / 4;
2393 unsigned M3 = N->getMaskElt(12) / 4;
2394
2395 // If both vector operands for the shuffle are the same vector, the mask will
2396 // contain only elements from the first one and the second one will be undef.
2397 if (N->getOperand(1).isUndef()) {
2398 assert(M0 < 4 && "Indexing into an undef vector?");
2399 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2400 return false;
2401
2402 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2403 Swap = false;
2404 return true;
2405 }
2406
2407 // Ensure each word index of the ShuffleVector Mask is consecutive.
2408 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2409 return false;
2410
2411 if (IsLE) {
2412 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2413 // Input vectors don't need to be swapped if the leading element
2414 // of the result is one of the 3 left elements of the second vector
2415 // (or if there is no shift to be done at all).
2416 Swap = false;
2417 ShiftElts = (8 - M0) % 8;
2418 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2419 // Input vectors need to be swapped if the leading element
2420 // of the result is one of the 3 left elements of the first vector
2421 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2422 Swap = true;
2423 ShiftElts = (4 - M0) % 4;
2424 }
2425
2426 return true;
2427 } else { // BE
2428 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2429 // Input vectors don't need to be swapped if the leading element
2430 // of the result is one of the 4 elements of the first vector.
2431 Swap = false;
2432 ShiftElts = M0;
2433 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2434 // Input vectors need to be swapped if the leading element
2435 // of the result is one of the 4 elements of the right vector.
2436 Swap = true;
2437 ShiftElts = M0 - 4;
2438 }
2439
2440 return true;
2441 }
2442}
2443
2445 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2446
2447 if (!isNByteElemShuffleMask(N, Width, -1))
2448 return false;
2449
2450 for (int i = 0; i < 16; i += Width)
2451 if (N->getMaskElt(i) != i + Width - 1)
2452 return false;
2453
2454 return true;
2455}
2456
2460
2464
2468
2472
2473/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2474/// if the inputs to the instruction should be swapped and set \p DM to the
2475/// value for the immediate.
2476/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2477/// AND element 0 of the result comes from the first input (LE) or second input
2478/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2479/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2480/// mask.
2482 bool &Swap, bool IsLE) {
2483 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2484
2485 // Ensure each byte index of the double word is consecutive.
2486 if (!isNByteElemShuffleMask(N, 8, 1))
2487 return false;
2488
2489 unsigned M0 = N->getMaskElt(0) / 8;
2490 unsigned M1 = N->getMaskElt(8) / 8;
2491 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2492
2493 // If both vector operands for the shuffle are the same vector, the mask will
2494 // contain only elements from the first one and the second one will be undef.
2495 if (N->getOperand(1).isUndef()) {
2496 if ((M0 | M1) < 2) {
2497 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2498 Swap = false;
2499 return true;
2500 } else
2501 return false;
2502 }
2503
2504 if (IsLE) {
2505 if (M0 > 1 && M1 < 2) {
2506 Swap = false;
2507 } else if (M0 < 2 && M1 > 1) {
2508 M0 = (M0 + 2) % 4;
2509 M1 = (M1 + 2) % 4;
2510 Swap = true;
2511 } else
2512 return false;
2513
2514 // Note: if control flow comes here that means Swap is already set above
2515 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2516 return true;
2517 } else { // BE
2518 if (M0 < 2 && M1 > 1) {
2519 Swap = false;
2520 } else if (M0 > 1 && M1 < 2) {
2521 M0 = (M0 + 2) % 4;
2522 M1 = (M1 + 2) % 4;
2523 Swap = true;
2524 } else
2525 return false;
2526
2527 // Note: if control flow comes here that means Swap is already set above
2528 DM = (M0 << 1) + (M1 & 1);
2529 return true;
2530 }
2531}
2532
2533
2534/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2535/// appropriate for PPC mnemonics (which have a big endian bias - namely
2536/// elements are counted from the left of the vector register).
2537unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2538 SelectionDAG &DAG) {
2540 assert(isSplatShuffleMask(SVOp, EltSize));
2541 EVT VT = SVOp->getValueType(0);
2542
2543 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2544 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2545 : SVOp->getMaskElt(0);
2546
2547 if (DAG.getDataLayout().isLittleEndian())
2548 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2549 else
2550 return SVOp->getMaskElt(0) / EltSize;
2551}
2552
2553/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2554/// by using a vspltis[bhw] instruction of the specified element size, return
2555/// the constant being splatted. The ByteSize field indicates the number of
2556/// bytes of each element [124] -> [bhw].
2558 SDValue OpVal;
2559
2560 // If ByteSize of the splat is bigger than the element size of the
2561 // build_vector, then we have a case where we are checking for a splat where
2562 // multiple elements of the buildvector are folded together into a single
2563 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2564 unsigned EltSize = 16/N->getNumOperands();
2565 if (EltSize < ByteSize) {
2566 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2567 SDValue UniquedVals[4];
2568 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2569
2570 // See if all of the elements in the buildvector agree across.
2571 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2572 if (N->getOperand(i).isUndef()) continue;
2573 // If the element isn't a constant, bail fully out.
2574 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2575
2576 if (!UniquedVals[i&(Multiple-1)].getNode())
2577 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2578 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2579 return SDValue(); // no match.
2580 }
2581
2582 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2583 // either constant or undef values that are identical for each chunk. See
2584 // if these chunks can form into a larger vspltis*.
2585
2586 // Check to see if all of the leading entries are either 0 or -1. If
2587 // neither, then this won't fit into the immediate field.
2588 bool LeadingZero = true;
2589 bool LeadingOnes = true;
2590 for (unsigned i = 0; i != Multiple-1; ++i) {
2591 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2592
2593 LeadingZero &= isNullConstant(UniquedVals[i]);
2594 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2595 }
2596 // Finally, check the least significant entry.
2597 if (LeadingZero) {
2598 if (!UniquedVals[Multiple-1].getNode())
2599 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2600 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2601 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2602 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2603 }
2604 if (LeadingOnes) {
2605 if (!UniquedVals[Multiple-1].getNode())
2606 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2607 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2608 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2609 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2610 }
2611
2612 return SDValue();
2613 }
2614
2615 // Check to see if this buildvec has a single non-undef value in its elements.
2616 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2617 if (N->getOperand(i).isUndef()) continue;
2618 if (!OpVal.getNode())
2619 OpVal = N->getOperand(i);
2620 else if (OpVal != N->getOperand(i))
2621 return SDValue();
2622 }
2623
2624 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2625
2626 unsigned ValSizeInBytes = EltSize;
2627 uint64_t Value = 0;
2628 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2629 Value = CN->getZExtValue();
2630 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2631 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2632 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2633 }
2634
2635 // If the splat value is larger than the element value, then we can never do
2636 // this splat. The only case that we could fit the replicated bits into our
2637 // immediate field for would be zero, and we prefer to use vxor for it.
2638 if (ValSizeInBytes < ByteSize) return SDValue();
2639
2640 // If the element value is larger than the splat value, check if it consists
2641 // of a repeated bit pattern of size ByteSize.
2642 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2643 return SDValue();
2644
2645 // Properly sign extend the value.
2646 int MaskVal = SignExtend32(Value, ByteSize * 8);
2647
2648 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2649 if (MaskVal == 0) return SDValue();
2650
2651 // Finally, if this value fits in a 5 bit sext field, return it
2652 if (SignExtend32<5>(MaskVal) == MaskVal)
2653 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2654 return SDValue();
2655}
2656
2657//===----------------------------------------------------------------------===//
2658// Addressing Mode Selection
2659//===----------------------------------------------------------------------===//
2660
2661/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2662/// or 64-bit immediate, and if the value can be accurately represented as a
2663/// sign extension from a 16-bit value. If so, this returns true and the
2664/// immediate.
2665bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2666 if (!isa<ConstantSDNode>(N))
2667 return false;
2668
2669 Imm = (int16_t)N->getAsZExtVal();
2670 if (N->getValueType(0) == MVT::i32)
2671 return Imm == (int32_t)N->getAsZExtVal();
2672 else
2673 return Imm == (int64_t)N->getAsZExtVal();
2674}
2676 return isIntS16Immediate(Op.getNode(), Imm);
2677}
2678
2679/// Used when computing address flags for selecting loads and stores.
2680/// If we have an OR, check if the LHS and RHS are provably disjoint.
2681/// An OR of two provably disjoint values is equivalent to an ADD.
2682/// Most PPC load/store instructions compute the effective address as a sum,
2683/// so doing this conversion is useful.
2684static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2685 if (N.getOpcode() != ISD::OR)
2686 return false;
2687 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2688 if (!LHSKnown.Zero.getBoolValue())
2689 return false;
2690 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2691 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2692}
2693
2694/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2695/// be represented as an indexed [r+r] operation.
2697 SDValue &Index,
2698 SelectionDAG &DAG) const {
2699 for (SDNode *U : N->users()) {
2700 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2701 if (Memop->getMemoryVT() == MVT::f64) {
2702 Base = N.getOperand(0);
2703 Index = N.getOperand(1);
2704 return true;
2705 }
2706 }
2707 }
2708 return false;
2709}
2710
2711/// isIntS34Immediate - This method tests if value of node given can be
2712/// accurately represented as a sign extension from a 34-bit value. If so,
2713/// this returns true and the immediate.
2714bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2715 if (!isa<ConstantSDNode>(N))
2716 return false;
2717
2718 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2719 return isInt<34>(Imm);
2720}
2722 return isIntS34Immediate(Op.getNode(), Imm);
2723}
2724
2725/// SelectAddressRegReg - Given the specified addressed, check to see if it
2726/// can be represented as an indexed [r+r] operation. Returns false if it
2727/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2728/// non-zero and N can be represented by a base register plus a signed 16-bit
2729/// displacement, make a more precise judgement by checking (displacement % \p
2730/// EncodingAlignment).
2732 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2733 MaybeAlign EncodingAlignment) const {
2734 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2735 // a [pc+imm].
2737 return false;
2738
2739 int16_t Imm = 0;
2740 if (N.getOpcode() == ISD::ADD) {
2741 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2742 // SPE load/store can only handle 8-bit offsets.
2743 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2744 return true;
2745 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2746 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2747 return false; // r+i
2748 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2749 return false; // r+i
2750
2751 Base = N.getOperand(0);
2752 Index = N.getOperand(1);
2753 return true;
2754 } else if (N.getOpcode() == ISD::OR) {
2755 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2756 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2757 return false; // r+i can fold it if we can.
2758
2759 // If this is an or of disjoint bitfields, we can codegen this as an add
2760 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2761 // disjoint.
2762 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2763
2764 if (LHSKnown.Zero.getBoolValue()) {
2765 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2766 // If all of the bits are known zero on the LHS or RHS, the add won't
2767 // carry.
2768 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2769 Base = N.getOperand(0);
2770 Index = N.getOperand(1);
2771 return true;
2772 }
2773 }
2774 }
2775
2776 return false;
2777}
2778
2779// If we happen to be doing an i64 load or store into a stack slot that has
2780// less than a 4-byte alignment, then the frame-index elimination may need to
2781// use an indexed load or store instruction (because the offset may not be a
2782// multiple of 4). The extra register needed to hold the offset comes from the
2783// register scavenger, and it is possible that the scavenger will need to use
2784// an emergency spill slot. As a result, we need to make sure that a spill slot
2785// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2786// stack slot.
2787static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2788 // FIXME: This does not handle the LWA case.
2789 if (VT != MVT::i64)
2790 return;
2791
2792 // NOTE: We'll exclude negative FIs here, which come from argument
2793 // lowering, because there are no known test cases triggering this problem
2794 // using packed structures (or similar). We can remove this exclusion if
2795 // we find such a test case. The reason why this is so test-case driven is
2796 // because this entire 'fixup' is only to prevent crashes (from the
2797 // register scavenger) on not-really-valid inputs. For example, if we have:
2798 // %a = alloca i1
2799 // %b = bitcast i1* %a to i64*
2800 // store i64* a, i64 b
2801 // then the store should really be marked as 'align 1', but is not. If it
2802 // were marked as 'align 1' then the indexed form would have been
2803 // instruction-selected initially, and the problem this 'fixup' is preventing
2804 // won't happen regardless.
2805 if (FrameIdx < 0)
2806 return;
2807
2809 MachineFrameInfo &MFI = MF.getFrameInfo();
2810
2811 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2812 return;
2813
2814 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2815 FuncInfo->setHasNonRISpills();
2816}
2817
2818/// Returns true if the address N can be represented by a base register plus
2819/// a signed 16-bit displacement [r+imm], and if it is not better
2820/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2821/// displacements that are multiples of that value.
2823 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2824 MaybeAlign EncodingAlignment) const {
2825 // FIXME dl should come from parent load or store, not from address
2826 SDLoc dl(N);
2827
2828 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2829 // a [pc+imm].
2831 return false;
2832
2833 // If this can be more profitably realized as r+r, fail.
2834 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2835 return false;
2836
2837 if (N.getOpcode() == ISD::ADD) {
2838 int16_t imm = 0;
2839 if (isIntS16Immediate(N.getOperand(1), imm) &&
2840 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2841 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2842 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2843 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2844 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2845 } else {
2846 Base = N.getOperand(0);
2847 }
2848 return true; // [r+i]
2849 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2850 // Match LOAD (ADD (X, Lo(G))).
2851 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2852 "Cannot handle constant offsets yet!");
2853 Disp = N.getOperand(1).getOperand(0); // The global address.
2858 Base = N.getOperand(0);
2859 return true; // [&g+r]
2860 }
2861 } else if (N.getOpcode() == ISD::OR) {
2862 int16_t imm = 0;
2863 if (isIntS16Immediate(N.getOperand(1), imm) &&
2864 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2865 // If this is an or of disjoint bitfields, we can codegen this as an add
2866 // (for better address arithmetic) if the LHS and RHS of the OR are
2867 // provably disjoint.
2868 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2869
2870 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2871 // If all of the bits are known zero on the LHS or RHS, the add won't
2872 // carry.
2873 if (FrameIndexSDNode *FI =
2874 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2875 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2876 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2877 } else {
2878 Base = N.getOperand(0);
2879 }
2880 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2881 return true;
2882 }
2883 }
2884 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2885 // Loading from a constant address.
2886
2887 // If this address fits entirely in a 16-bit sext immediate field, codegen
2888 // this as "d, 0"
2889 int16_t Imm;
2890 if (isIntS16Immediate(CN, Imm) &&
2891 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2892 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2893 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2894 CN->getValueType(0));
2895 return true;
2896 }
2897
2898 // Handle 32-bit sext immediates with LIS + addr mode.
2899 if ((CN->getValueType(0) == MVT::i32 ||
2900 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2901 (!EncodingAlignment ||
2902 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2903 int Addr = (int)CN->getZExtValue();
2904
2905 // Otherwise, break this down into an LIS + disp.
2906 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2907
2908 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2909 MVT::i32);
2910 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2911 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2912 return true;
2913 }
2914 }
2915
2916 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2918 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2919 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2920 } else
2921 Base = N;
2922 return true; // [r+0]
2923}
2924
2925/// Similar to the 16-bit case but for instructions that take a 34-bit
2926/// displacement field (prefixed loads/stores).
2928 SDValue &Base,
2929 SelectionDAG &DAG) const {
2930 // Only on 64-bit targets.
2931 if (N.getValueType() != MVT::i64)
2932 return false;
2933
2934 SDLoc dl(N);
2935 int64_t Imm = 0;
2936
2937 if (N.getOpcode() == ISD::ADD) {
2938 if (!isIntS34Immediate(N.getOperand(1), Imm))
2939 return false;
2940 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2941 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2942 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2943 else
2944 Base = N.getOperand(0);
2945 return true;
2946 }
2947
2948 if (N.getOpcode() == ISD::OR) {
2949 if (!isIntS34Immediate(N.getOperand(1), Imm))
2950 return false;
2951 // If this is an or of disjoint bitfields, we can codegen this as an add
2952 // (for better address arithmetic) if the LHS and RHS of the OR are
2953 // provably disjoint.
2954 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2955 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2956 return false;
2957 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2958 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2959 else
2960 Base = N.getOperand(0);
2961 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2962 return true;
2963 }
2964
2965 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2966 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2967 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2968 return true;
2969 }
2970
2971 return false;
2972}
2973
2974/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2975/// represented as an indexed [r+r] operation.
2977 SDValue &Index,
2978 SelectionDAG &DAG) const {
2979 // Check to see if we can easily represent this as an [r+r] address. This
2980 // will fail if it thinks that the address is more profitably represented as
2981 // reg+imm, e.g. where imm = 0.
2982 if (SelectAddressRegReg(N, Base, Index, DAG))
2983 return true;
2984
2985 // If the address is the result of an add, we will utilize the fact that the
2986 // address calculation includes an implicit add. However, we can reduce
2987 // register pressure if we do not materialize a constant just for use as the
2988 // index register. We only get rid of the add if it is not an add of a
2989 // value and a 16-bit signed constant and both have a single use.
2990 int16_t imm = 0;
2991 if (N.getOpcode() == ISD::ADD &&
2992 (!isIntS16Immediate(N.getOperand(1), imm) ||
2993 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2994 Base = N.getOperand(0);
2995 Index = N.getOperand(1);
2996 return true;
2997 }
2998
2999 // Otherwise, do it the hard way, using R0 as the base register.
3000 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
3001 N.getValueType());
3002 Index = N;
3003 return true;
3004}
3005
3006template <typename Ty> static bool isValidPCRelNode(SDValue N) {
3007 Ty *PCRelCand = dyn_cast<Ty>(N);
3008 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
3009}
3010
3011/// Returns true if this address is a PC Relative address.
3012/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3013/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3015 // This is a materialize PC Relative node. Always select this as PC Relative.
3016 Base = N;
3017 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3018 return true;
3023 return true;
3024 return false;
3025}
3026
3027/// Returns true if we should use a direct load into vector instruction
3028/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3029static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3030
3031 // If there are any other uses other than scalar to vector, then we should
3032 // keep it as a scalar load -> direct move pattern to prevent multiple
3033 // loads.
3035 if (!LD)
3036 return false;
3037
3038 EVT MemVT = LD->getMemoryVT();
3039 if (!MemVT.isSimple())
3040 return false;
3041 switch(MemVT.getSimpleVT().SimpleTy) {
3042 case MVT::i64:
3043 break;
3044 case MVT::i32:
3045 if (!ST.hasP8Vector())
3046 return false;
3047 break;
3048 case MVT::i16:
3049 case MVT::i8:
3050 if (!ST.hasP9Vector())
3051 return false;
3052 break;
3053 default:
3054 return false;
3055 }
3056
3057 SDValue LoadedVal(N, 0);
3058 if (!LoadedVal.hasOneUse())
3059 return false;
3060
3061 for (SDUse &Use : LD->uses())
3062 if (Use.getResNo() == 0 &&
3063 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3065 return false;
3066
3067 return true;
3068}
3069
3070/// getPreIndexedAddressParts - returns true by value, base pointer and
3071/// offset pointer and addressing mode by reference if the node's address
3072/// can be legally represented as pre-indexed load / store address.
3074 SDValue &Offset,
3076 SelectionDAG &DAG) const {
3077 if (DisablePPCPreinc) return false;
3078
3079 bool isLoad = true;
3080 SDValue Ptr;
3081 EVT VT;
3082 Align Alignment;
3083 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3084 Ptr = LD->getBasePtr();
3085 VT = LD->getMemoryVT();
3086 Alignment = LD->getAlign();
3087 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3088 Ptr = ST->getBasePtr();
3089 VT = ST->getMemoryVT();
3090 Alignment = ST->getAlign();
3091 isLoad = false;
3092 } else
3093 return false;
3094
3095 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3096 // instructions because we can fold these into a more efficient instruction
3097 // instead, (such as LXSD).
3098 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3099 return false;
3100 }
3101
3102 // PowerPC doesn't have preinc load/store instructions for vectors
3103 if (VT.isVector())
3104 return false;
3105
3106 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3107 // Common code will reject creating a pre-inc form if the base pointer
3108 // is a frame index, or if N is a store and the base pointer is either
3109 // the same as or a predecessor of the value being stored. Check for
3110 // those situations here, and try with swapped Base/Offset instead.
3111 bool Swap = false;
3112
3114 Swap = true;
3115 else if (!isLoad) {
3116 SDValue Val = cast<StoreSDNode>(N)->getValue();
3117 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3118 Swap = true;
3119 }
3120
3121 if (Swap)
3123
3124 AM = ISD::PRE_INC;
3125 return true;
3126 }
3127
3128 // LDU/STU can only handle immediates that are a multiple of 4.
3129 if (VT != MVT::i64) {
3130 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3131 return false;
3132 } else {
3133 // LDU/STU need an address with at least 4-byte alignment.
3134 if (Alignment < Align(4))
3135 return false;
3136
3137 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3138 return false;
3139 }
3140
3141 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3142 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3143 // sext i32 to i64 when addr mode is r+i.
3144 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3145 LD->getExtensionType() == ISD::SEXTLOAD &&
3147 return false;
3148 }
3149
3150 AM = ISD::PRE_INC;
3151 return true;
3152}
3153
3154//===----------------------------------------------------------------------===//
3155// LowerOperation implementation
3156//===----------------------------------------------------------------------===//
3157
3158/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3159/// and LoOpFlags to the target MO flags.
3160static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3161 unsigned &HiOpFlags, unsigned &LoOpFlags,
3162 const GlobalValue *GV = nullptr) {
3163 HiOpFlags = PPCII::MO_HA;
3164 LoOpFlags = PPCII::MO_LO;
3165
3166 // Don't use the pic base if not in PIC relocation model.
3167 if (IsPIC) {
3168 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3169 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3170 }
3171}
3172
3173static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3174 SelectionDAG &DAG) {
3175 SDLoc DL(HiPart);
3176 EVT PtrVT = HiPart.getValueType();
3177 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3178
3179 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3180 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3181
3182 // With PIC, the first instruction is actually "GR+hi(&G)".
3183 if (isPIC)
3184 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3185 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3186
3187 // Generate non-pic code that has direct accesses to the constant pool.
3188 // The address of the global is just (hi(&g)+lo(&g)).
3189 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3190}
3191
3193 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3194 FuncInfo->setUsesTOCBasePtr();
3195}
3196
3200
3201SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3202 SDValue GA) const {
3203 EVT VT = Subtarget.getScalarIntVT();
3204 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3205 : Subtarget.isAIXABI()
3206 ? DAG.getRegister(PPC::R2, VT)
3207 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3208 SDValue Ops[] = { GA, Reg };
3209 return DAG.getMemIntrinsicNode(
3210 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3213}
3214
3215SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3216 SelectionDAG &DAG) const {
3217 EVT PtrVT = Op.getValueType();
3218 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3219 const Constant *C = CP->getConstVal();
3220
3221 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3222 // The actual address of the GlobalValue is stored in the TOC.
3223 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3224 if (Subtarget.isUsingPCRelativeCalls()) {
3225 SDLoc DL(CP);
3226 EVT Ty = getPointerTy(DAG.getDataLayout());
3227 SDValue ConstPool = DAG.getTargetConstantPool(
3228 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3229 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3230 }
3231 setUsesTOCBasePtr(DAG);
3232 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3233 return getTOCEntry(DAG, SDLoc(CP), GA);
3234 }
3235
3236 unsigned MOHiFlag, MOLoFlag;
3237 bool IsPIC = isPositionIndependent();
3238 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3239
3240 if (IsPIC && Subtarget.isSVR4ABI()) {
3241 SDValue GA =
3242 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3243 return getTOCEntry(DAG, SDLoc(CP), GA);
3244 }
3245
3246 SDValue CPIHi =
3247 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3248 SDValue CPILo =
3249 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3250 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3251}
3252
3253// For 64-bit PowerPC, prefer the more compact relative encodings.
3254// This trades 32 bits per jump table entry for one or two instructions
3255// on the jump site.
3262
3265 return false;
3266 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3267 return true;
3269}
3270
3272 SelectionDAG &DAG) const {
3273 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3275
3276 switch (getTargetMachine().getCodeModel()) {
3277 case CodeModel::Small:
3278 case CodeModel::Medium:
3280 default:
3281 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3283 }
3284}
3285
3286const MCExpr *
3288 unsigned JTI,
3289 MCContext &Ctx) const {
3290 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3292
3293 switch (getTargetMachine().getCodeModel()) {
3294 case CodeModel::Small:
3295 case CodeModel::Medium:
3297 default:
3298 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3299 }
3300}
3301
3302SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3303 EVT PtrVT = Op.getValueType();
3305
3306 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3307 if (Subtarget.isUsingPCRelativeCalls()) {
3308 SDLoc DL(JT);
3309 EVT Ty = getPointerTy(DAG.getDataLayout());
3310 SDValue GA =
3311 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3312 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3313 return MatAddr;
3314 }
3315
3316 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3317 // The actual address of the GlobalValue is stored in the TOC.
3318 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3319 setUsesTOCBasePtr(DAG);
3320 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3321 return getTOCEntry(DAG, SDLoc(JT), GA);
3322 }
3323
3324 unsigned MOHiFlag, MOLoFlag;
3325 bool IsPIC = isPositionIndependent();
3326 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3327
3328 if (IsPIC && Subtarget.isSVR4ABI()) {
3329 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3331 return getTOCEntry(DAG, SDLoc(GA), GA);
3332 }
3333
3334 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3335 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3336 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3337}
3338
3339SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3340 SelectionDAG &DAG) const {
3341 EVT PtrVT = Op.getValueType();
3342 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3343 const BlockAddress *BA = BASDN->getBlockAddress();
3344
3345 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3346 if (Subtarget.isUsingPCRelativeCalls()) {
3347 SDLoc DL(BASDN);
3348 EVT Ty = getPointerTy(DAG.getDataLayout());
3349 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3351 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3352 return MatAddr;
3353 }
3354
3355 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3356 // The actual BlockAddress is stored in the TOC.
3357 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3358 setUsesTOCBasePtr(DAG);
3359 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3360 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3361 }
3362
3363 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3364 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3365 return getTOCEntry(
3366 DAG, SDLoc(BASDN),
3367 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3368
3369 unsigned MOHiFlag, MOLoFlag;
3370 bool IsPIC = isPositionIndependent();
3371 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3372 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3373 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3374 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3375}
3376
3377SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3378 SelectionDAG &DAG) const {
3379 if (Subtarget.isAIXABI())
3380 return LowerGlobalTLSAddressAIX(Op, DAG);
3381
3382 return LowerGlobalTLSAddressLinux(Op, DAG);
3383}
3384
3385/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3386/// and then apply the update.
3388 SelectionDAG &DAG,
3389 const TargetMachine &TM) {
3390 // Initialize TLS model opt setting lazily:
3391 // (1) Use initial-exec for single TLS var references within current function.
3392 // (2) Use local-dynamic for multiple TLS var references within current
3393 // function.
3394 PPCFunctionInfo *FuncInfo =
3396 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3398 // Iterate over all instructions within current function, collect all TLS
3399 // global variables (global variables taken as the first parameter to
3400 // Intrinsic::threadlocal_address).
3401 const Function &Func = DAG.getMachineFunction().getFunction();
3402 for (const BasicBlock &BB : Func)
3403 for (const Instruction &I : BB)
3404 if (I.getOpcode() == Instruction::Call)
3405 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3406 if (Function *CF = CI->getCalledFunction())
3407 if (CF->isDeclaration() &&
3408 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3409 if (const GlobalValue *GV =
3410 dyn_cast<GlobalValue>(I.getOperand(0))) {
3411 TLSModel::Model GVModel = TM.getTLSModel(GV);
3412 if (GVModel == TLSModel::LocalDynamic)
3413 TLSGV.insert(GV);
3414 }
3415
3416 unsigned TLSGVCnt = TLSGV.size();
3417 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3418 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3419 FuncInfo->setAIXFuncUseTLSIEForLD();
3421 }
3422
3423 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3424 LLVM_DEBUG(
3425 dbgs() << DAG.getMachineFunction().getName()
3426 << " function is using the TLS-IE model for TLS-LD access.\n");
3427 Model = TLSModel::InitialExec;
3428 }
3429}
3430
3431SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3432 SelectionDAG &DAG) const {
3433 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3434
3435 if (DAG.getTarget().useEmulatedTLS())
3436 report_fatal_error("Emulated TLS is not yet supported on AIX");
3437
3438 SDLoc dl(GA);
3439 const GlobalValue *GV = GA->getGlobal();
3440 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3441 bool Is64Bit = Subtarget.isPPC64();
3443
3444 // Apply update to the TLS model.
3445 if (Subtarget.hasAIXShLibTLSModelOpt())
3447
3448 // TLS variables are accessed through TOC entries.
3449 // To support this, set the DAG to use the TOC base pointer.
3450 setUsesTOCBasePtr(DAG);
3451
3452 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3453
3454 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3455 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3456 bool HasAIXSmallTLSGlobalAttr = false;
3457 SDValue VariableOffsetTGA =
3458 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3459 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3460 SDValue TLSReg;
3461
3462 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3463 if (GVar->hasAttribute("aix-small-tls"))
3464 HasAIXSmallTLSGlobalAttr = true;
3465
3466 if (Is64Bit) {
3467 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3468 // involves a load of the variable offset (from the TOC), followed by an
3469 // add of the loaded variable offset to R13 (the thread pointer).
3470 // This code sequence looks like:
3471 // ld reg1,var[TC](2)
3472 // add reg2, reg1, r13 // r13 contains the thread pointer
3473 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3474
3475 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3476 // global variable attribute, produce a faster access sequence for
3477 // local-exec TLS variables where the offset from the TLS base is encoded
3478 // as an immediate operand.
3479 //
3480 // We only utilize the faster local-exec access sequence when the TLS
3481 // variable has a size within the policy limit. We treat types that are
3482 // not sized or are empty as being over the policy size limit.
3483 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3484 IsTLSLocalExecModel) {
3485 Type *GVType = GV->getValueType();
3486 if (GVType->isSized() && !GVType->isEmptyTy() &&
3487 GV->getDataLayout().getTypeAllocSize(GVType) <=
3489 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3490 }
3491 } else {
3492 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3493 // involves loading the variable offset from the TOC, generating a call to
3494 // .__get_tpointer to get the thread pointer (which will be in R3), and
3495 // adding the two together:
3496 // lwz reg1,var[TC](2)
3497 // bla .__get_tpointer
3498 // add reg2, reg1, r3
3499 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3500
3501 // We do not implement the 32-bit version of the faster access sequence
3502 // for local-exec that is controlled by the -maix-small-local-exec-tls
3503 // option, or the "aix-small-tls" global variable attribute.
3504 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3505 report_fatal_error("The small-local-exec TLS access sequence is "
3506 "currently only supported on AIX (64-bit mode).");
3507 }
3508 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3509 }
3510
3511 if (Model == TLSModel::LocalDynamic) {
3512 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3513
3514 // We do not implement the 32-bit version of the faster access sequence
3515 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3516 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3517 report_fatal_error("The small-local-dynamic TLS access sequence is "
3518 "currently only supported on AIX (64-bit mode).");
3519
3520 // For local-dynamic on AIX, we need to generate one TOC entry for each
3521 // variable offset, and a single module-handle TOC entry for the entire
3522 // file.
3523
3524 SDValue VariableOffsetTGA =
3525 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3526 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3527
3529 GlobalVariable *TLSGV =
3530 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3531 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3533 assert(TLSGV && "Not able to create GV for _$TLSML.");
3534 SDValue ModuleHandleTGA =
3535 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3536 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3537 SDValue ModuleHandle =
3538 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3539
3540 // With the -maix-small-local-dynamic-tls option, produce a faster access
3541 // sequence for local-dynamic TLS variables where the offset from the
3542 // module-handle is encoded as an immediate operand.
3543 //
3544 // We only utilize the faster local-dynamic access sequence when the TLS
3545 // variable has a size within the policy limit. We treat types that are
3546 // not sized or are empty as being over the policy size limit.
3547 if (HasAIXSmallLocalDynamicTLS) {
3548 Type *GVType = GV->getValueType();
3549 if (GVType->isSized() && !GVType->isEmptyTy() &&
3550 GV->getDataLayout().getTypeAllocSize(GVType) <=
3552 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3553 ModuleHandle);
3554 }
3555
3556 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3557 }
3558
3559 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3560 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3561 // need to generate two TOC entries, one for the variable offset, one for the
3562 // region handle. The global address for the TOC entry of the region handle is
3563 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3564 // entry of the variable offset is created with MO_TLSGD_FLAG.
3565 SDValue VariableOffsetTGA =
3566 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3567 SDValue RegionHandleTGA =
3568 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3569 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3570 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3571 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3572 RegionHandle);
3573}
3574
3575SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3576 SelectionDAG &DAG) const {
3577 // FIXME: TLS addresses currently use medium model code sequences,
3578 // which is the most useful form. Eventually support for small and
3579 // large models could be added if users need it, at the cost of
3580 // additional complexity.
3581 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3582 if (DAG.getTarget().useEmulatedTLS())
3583 return LowerToTLSEmulatedModel(GA, DAG);
3584
3585 SDLoc dl(GA);
3586 const GlobalValue *GV = GA->getGlobal();
3587 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3588 bool is64bit = Subtarget.isPPC64();
3589 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3590 PICLevel::Level picLevel = M->getPICLevel();
3591
3592 const TargetMachine &TM = getTargetMachine();
3593 TLSModel::Model Model = TM.getTLSModel(GV);
3594
3595 if (Model == TLSModel::LocalExec) {
3596 if (Subtarget.isUsingPCRelativeCalls()) {
3597 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3598 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3600 SDValue MatAddr =
3601 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3602 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3603 }
3604
3605 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3607 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3609 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3610 : DAG.getRegister(PPC::R2, MVT::i32);
3611
3612 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3613 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3614 }
3615
3616 if (Model == TLSModel::InitialExec) {
3617 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3619 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3620 SDValue TGATLS = DAG.getTargetGlobalAddress(
3621 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3622 SDValue TPOffset;
3623 if (IsPCRel) {
3624 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3625 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3626 MachinePointerInfo());
3627 } else {
3628 SDValue GOTPtr;
3629 if (is64bit) {
3630 setUsesTOCBasePtr(DAG);
3631 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3632 GOTPtr =
3633 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3634 } else {
3635 if (!TM.isPositionIndependent())
3636 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3637 else if (picLevel == PICLevel::SmallPIC)
3638 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3639 else
3640 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3641 }
3642 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3643 }
3644 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3645 }
3646
3647 if (Model == TLSModel::GeneralDynamic) {
3648 if (Subtarget.isUsingPCRelativeCalls()) {
3649 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3651 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3652 }
3653
3654 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3655 SDValue GOTPtr;
3656 if (is64bit) {
3657 setUsesTOCBasePtr(DAG);
3658 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3659 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3660 GOTReg, TGA);
3661 } else {
3662 if (picLevel == PICLevel::SmallPIC)
3663 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3664 else
3665 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3666 }
3667 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3668 GOTPtr, TGA, TGA);
3669 }
3670
3671 if (Model == TLSModel::LocalDynamic) {
3672 if (Subtarget.isUsingPCRelativeCalls()) {
3673 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3675 SDValue MatPCRel =
3676 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3677 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3678 }
3679
3680 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3681 SDValue GOTPtr;
3682 if (is64bit) {
3683 setUsesTOCBasePtr(DAG);
3684 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3685 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3686 GOTReg, TGA);
3687 } else {
3688 if (picLevel == PICLevel::SmallPIC)
3689 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3690 else
3691 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3692 }
3693 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3694 PtrVT, GOTPtr, TGA, TGA);
3695 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3696 PtrVT, TLSAddr, TGA);
3697 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3698 }
3699
3700 llvm_unreachable("Unknown TLS model!");
3701}
3702
3703SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3704 SelectionDAG &DAG) const {
3705 EVT PtrVT = Op.getValueType();
3706 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3707 SDLoc DL(GSDN);
3708 const GlobalValue *GV = GSDN->getGlobal();
3709
3710 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3711 // The actual address of the GlobalValue is stored in the TOC.
3712 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3713 if (Subtarget.isUsingPCRelativeCalls()) {
3714 EVT Ty = getPointerTy(DAG.getDataLayout());
3716 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3718 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3719 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3720 MachinePointerInfo());
3721 return Load;
3722 } else {
3723 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3725 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3726 }
3727 }
3728 setUsesTOCBasePtr(DAG);
3729 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3730 return getTOCEntry(DAG, DL, GA);
3731 }
3732
3733 unsigned MOHiFlag, MOLoFlag;
3734 bool IsPIC = isPositionIndependent();
3735 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3736
3737 if (IsPIC && Subtarget.isSVR4ABI()) {
3738 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3739 GSDN->getOffset(),
3741 return getTOCEntry(DAG, DL, GA);
3742 }
3743
3744 SDValue GAHi =
3745 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3746 SDValue GALo =
3747 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3748
3749 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3750}
3751
3752SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3753 bool IsStrict = Op->isStrictFPOpcode();
3754 ISD::CondCode CC =
3755 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3756 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3757 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3758 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3759 EVT LHSVT = LHS.getValueType();
3760 SDLoc dl(Op);
3761
3762 // Soften the setcc with libcall if it is fp128.
3763 if (LHSVT == MVT::f128) {
3764 assert(!Subtarget.hasP9Vector() &&
3765 "SETCC for f128 is already legal under Power9!");
3766 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3767 Op->getOpcode() == ISD::STRICT_FSETCCS);
3768 if (RHS.getNode())
3769 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3770 DAG.getCondCode(CC));
3771 if (IsStrict)
3772 return DAG.getMergeValues({LHS, Chain}, dl);
3773 return LHS;
3774 }
3775
3776 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3777
3778 if (Op.getValueType() == MVT::v2i64) {
3779 // When the operands themselves are v2i64 values, we need to do something
3780 // special because VSX has no underlying comparison operations for these.
3781 if (LHS.getValueType() == MVT::v2i64) {
3782 // Equality can be handled by casting to the legal type for Altivec
3783 // comparisons, everything else needs to be expanded.
3784 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3785 return SDValue();
3786 SDValue SetCC32 = DAG.getSetCC(
3787 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3788 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3789 int ShuffV[] = {1, 0, 3, 2};
3790 SDValue Shuff =
3791 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3792 return DAG.getBitcast(MVT::v2i64,
3793 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3794 dl, MVT::v4i32, Shuff, SetCC32));
3795 }
3796
3797 // We handle most of these in the usual way.
3798 return Op;
3799 }
3800
3801 // If we're comparing for equality to zero, expose the fact that this is
3802 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3803 // fold the new nodes.
3804 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3805 return V;
3806
3807 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3808 // Leave comparisons against 0 and -1 alone for now, since they're usually
3809 // optimized. FIXME: revisit this when we can custom lower all setcc
3810 // optimizations.
3811 if (C->isAllOnes() || C->isZero())
3812 return SDValue();
3813 }
3814
3815 // If we have an integer seteq/setne, turn it into a compare against zero
3816 // by xor'ing the rhs with the lhs, which is faster than setting a
3817 // condition register, reading it back out, and masking the correct bit. The
3818 // normal approach here uses sub to do this instead of xor. Using xor exposes
3819 // the result to other bit-twiddling opportunities.
3820 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3821 EVT VT = Op.getValueType();
3822 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3823 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3824 }
3825 return SDValue();
3826}
3827
3828SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3829 SDNode *Node = Op.getNode();
3830 EVT VT = Node->getValueType(0);
3831 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3832 SDValue InChain = Node->getOperand(0);
3833 SDValue VAListPtr = Node->getOperand(1);
3834 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3835 SDLoc dl(Node);
3836
3837 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3838
3839 // gpr_index
3840 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3841 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3842 InChain = GprIndex.getValue(1);
3843
3844 if (VT == MVT::i64) {
3845 // Check if GprIndex is even
3846 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3847 DAG.getConstant(1, dl, MVT::i32));
3848 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3849 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3850 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3851 DAG.getConstant(1, dl, MVT::i32));
3852 // Align GprIndex to be even if it isn't
3853 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3854 GprIndex);
3855 }
3856
3857 // fpr index is 1 byte after gpr
3858 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3859 DAG.getConstant(1, dl, MVT::i32));
3860
3861 // fpr
3862 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3863 FprPtr, MachinePointerInfo(SV), MVT::i8);
3864 InChain = FprIndex.getValue(1);
3865
3866 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3867 DAG.getConstant(8, dl, MVT::i32));
3868
3869 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3870 DAG.getConstant(4, dl, MVT::i32));
3871
3872 // areas
3873 SDValue OverflowArea =
3874 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3875 InChain = OverflowArea.getValue(1);
3876
3877 SDValue RegSaveArea =
3878 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3879 InChain = RegSaveArea.getValue(1);
3880
3881 // select overflow_area if index > 8
3882 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3883 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3884
3885 // adjustment constant gpr_index * 4/8
3886 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3887 VT.isInteger() ? GprIndex : FprIndex,
3888 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3889 MVT::i32));
3890
3891 // OurReg = RegSaveArea + RegConstant
3892 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3893 RegConstant);
3894
3895 // Floating types are 32 bytes into RegSaveArea
3896 if (VT.isFloatingPoint())
3897 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3898 DAG.getConstant(32, dl, MVT::i32));
3899
3900 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3901 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3902 VT.isInteger() ? GprIndex : FprIndex,
3903 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3904 MVT::i32));
3905
3906 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3907 VT.isInteger() ? VAListPtr : FprPtr,
3908 MachinePointerInfo(SV), MVT::i8);
3909
3910 // determine if we should load from reg_save_area or overflow_area
3911 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3912
3913 // increase overflow_area by 4/8 if gpr/fpr > 8
3914 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3915 DAG.getConstant(VT.isInteger() ? 4 : 8,
3916 dl, MVT::i32));
3917
3918 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3919 OverflowAreaPlusN);
3920
3921 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3922 MachinePointerInfo(), MVT::i32);
3923
3924 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3925}
3926
3927SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3928 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3929
3930 // We have to copy the entire va_list struct:
3931 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3932 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3933 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3934 false, true, /*CI=*/nullptr, std::nullopt,
3935 MachinePointerInfo(), MachinePointerInfo());
3936}
3937
3938SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3939 SelectionDAG &DAG) const {
3940 return Op.getOperand(0);
3941}
3942
3943SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3944 MachineFunction &MF = DAG.getMachineFunction();
3945 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3946
3947 assert((Op.getOpcode() == ISD::INLINEASM ||
3948 Op.getOpcode() == ISD::INLINEASM_BR) &&
3949 "Expecting Inline ASM node.");
3950
3951 // If an LR store is already known to be required then there is not point in
3952 // checking this ASM as well.
3953 if (MFI.isLRStoreRequired())
3954 return Op;
3955
3956 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3957 // type MVT::Glue. We want to ignore this last operand if that is the case.
3958 unsigned NumOps = Op.getNumOperands();
3959 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3960 --NumOps;
3961
3962 // Check all operands that may contain the LR.
3963 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3964 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3965 unsigned NumVals = Flags.getNumOperandRegisters();
3966 ++i; // Skip the ID value.
3967
3968 switch (Flags.getKind()) {
3969 default:
3970 llvm_unreachable("Bad flags!");
3974 i += NumVals;
3975 break;
3979 for (; NumVals; --NumVals, ++i) {
3980 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3981 if (Reg != PPC::LR && Reg != PPC::LR8)
3982 continue;
3983 MFI.setLRStoreRequired();
3984 return Op;
3985 }
3986 break;
3987 }
3988 }
3989 }
3990
3991 return Op;
3992}
3993
3994SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3995 SelectionDAG &DAG) const {
3996 SDValue Chain = Op.getOperand(0);
3997 SDValue Trmp = Op.getOperand(1); // trampoline
3998 SDValue FPtr = Op.getOperand(2); // nested function
3999 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
4000 SDLoc dl(Op);
4001
4002 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4003
4004 if (Subtarget.isAIXABI()) {
4005 // On AIX we create a trampoline descriptor by combining the
4006 // entry point and TOC from the global descriptor (FPtr) with the
4007 // nest argument as the environment pointer.
4008 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
4009 MaybeAlign PointerAlign(PointerSize);
4010 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4013 : MachineMemOperand::MONone;
4014
4015 uint64_t TOCPointerOffset = 1 * PointerSize;
4016 uint64_t EnvPointerOffset = 2 * PointerSize;
4017 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
4018 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
4019
4020 const Value *TrampolineAddr =
4021 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4022 const Function *Func =
4023 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
4024
4025 SDValue OutChains[3];
4026
4027 // Copy the entry point address from the global descriptor to the
4028 // trampoline buffer.
4029 SDValue LoadEntryPoint =
4030 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
4031 PointerAlign, MMOFlags);
4032 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
4033 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
4034 MachinePointerInfo(TrampolineAddr, 0));
4035
4036 // Copy the TOC pointer from the global descriptor to the trampoline
4037 // buffer.
4038 SDValue TOCFromDescriptorPtr =
4039 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
4040 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
4041 MachinePointerInfo(Func, TOCPointerOffset),
4042 PointerAlign, MMOFlags);
4043 SDValue TrampolineTOCPointer =
4044 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
4045 SDValue TOCLoadChain = TOCReg.getValue(1);
4046 OutChains[1] =
4047 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
4048 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
4049
4050 // Store the nest argument into the environment pointer in the trampoline
4051 // buffer.
4052 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
4053 OutChains[2] =
4054 DAG.getStore(Chain, dl, Nest, EnvPointer,
4055 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
4056
4058 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
4059 return TokenFactor;
4060 }
4061
4062 bool isPPC64 = (PtrVT == MVT::i64);
4063 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4064
4066 Args.emplace_back(Trmp, IntPtrTy);
4067 // TrampSize == (isPPC64 ? 48 : 40);
4068 Args.emplace_back(
4069 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
4070 IntPtrTy);
4071 Args.emplace_back(FPtr, IntPtrTy);
4072 Args.emplace_back(Nest, IntPtrTy);
4073
4074 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4075 TargetLowering::CallLoweringInfo CLI(DAG);
4076 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4078 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4079
4080 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4081 return CallResult.second;
4082}
4083
4084SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4085 MachineFunction &MF = DAG.getMachineFunction();
4086 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4087 EVT PtrVT = getPointerTy(MF.getDataLayout());
4088
4089 SDLoc dl(Op);
4090
4091 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4092 // vastart just stores the address of the VarArgsFrameIndex slot into the
4093 // memory location argument.
4094 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4095 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4096 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4097 MachinePointerInfo(SV));
4098 }
4099
4100 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4101 // We suppose the given va_list is already allocated.
4102 //
4103 // typedef struct {
4104 // char gpr; /* index into the array of 8 GPRs
4105 // * stored in the register save area
4106 // * gpr=0 corresponds to r3,
4107 // * gpr=1 to r4, etc.
4108 // */
4109 // char fpr; /* index into the array of 8 FPRs
4110 // * stored in the register save area
4111 // * fpr=0 corresponds to f1,
4112 // * fpr=1 to f2, etc.
4113 // */
4114 // char *overflow_arg_area;
4115 // /* location on stack that holds
4116 // * the next overflow argument
4117 // */
4118 // char *reg_save_area;
4119 // /* where r3:r10 and f1:f8 (if saved)
4120 // * are stored
4121 // */
4122 // } va_list[1];
4123
4124 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4125 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4126 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4127 PtrVT);
4128 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4129 PtrVT);
4130
4131 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4132 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4133
4134 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4135 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4136
4137 uint64_t FPROffset = 1;
4138 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4139
4140 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4141
4142 // Store first byte : number of int regs
4143 SDValue firstStore =
4144 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4145 MachinePointerInfo(SV), MVT::i8);
4146 uint64_t nextOffset = FPROffset;
4147 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4148 ConstFPROffset);
4149
4150 // Store second byte : number of float regs
4151 SDValue secondStore =
4152 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4153 MachinePointerInfo(SV, nextOffset), MVT::i8);
4154 nextOffset += StackOffset;
4155 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4156
4157 // Store second word : arguments given on stack
4158 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4159 MachinePointerInfo(SV, nextOffset));
4160 nextOffset += FrameOffset;
4161 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4162
4163 // Store third word : arguments given in registers
4164 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4165 MachinePointerInfo(SV, nextOffset));
4166}
4167
4168/// FPR - The set of FP registers that should be allocated for arguments
4169/// on Darwin and AIX.
4170static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4171 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4172 PPC::F11, PPC::F12, PPC::F13};
4173
4174/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4175/// the stack.
4176static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4177 unsigned PtrByteSize) {
4178 unsigned ArgSize = ArgVT.getStoreSize();
4179 if (Flags.isByVal())
4180 ArgSize = Flags.getByValSize();
4181
4182 // Round up to multiples of the pointer size, except for array members,
4183 // which are always packed.
4184 if (!Flags.isInConsecutiveRegs())
4185 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4186
4187 return ArgSize;
4188}
4189
4190/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4191/// on the stack.
4193 ISD::ArgFlagsTy Flags,
4194 unsigned PtrByteSize) {
4195 Align Alignment(PtrByteSize);
4196
4197 // Altivec parameters are padded to a 16 byte boundary.
4198 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4199 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4200 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4201 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4202 Alignment = Align(16);
4203
4204 // ByVal parameters are aligned as requested.
4205 if (Flags.isByVal()) {
4206 auto BVAlign = Flags.getNonZeroByValAlign();
4207 if (BVAlign > PtrByteSize) {
4208 if (BVAlign.value() % PtrByteSize != 0)
4210 "ByVal alignment is not a multiple of the pointer size");
4211
4212 Alignment = BVAlign;
4213 }
4214 }
4215
4216 // Array members are always packed to their original alignment.
4217 if (Flags.isInConsecutiveRegs()) {
4218 // If the array member was split into multiple registers, the first
4219 // needs to be aligned to the size of the full type. (Except for
4220 // ppcf128, which is only aligned as its f64 components.)
4221 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4222 Alignment = Align(OrigVT.getStoreSize());
4223 else
4224 Alignment = Align(ArgVT.getStoreSize());
4225 }
4226
4227 return Alignment;
4228}
4229
4230/// CalculateStackSlotUsed - Return whether this argument will use its
4231/// stack slot (instead of being passed in registers). ArgOffset,
4232/// AvailableFPRs, and AvailableVRs must hold the current argument
4233/// position, and will be updated to account for this argument.
4234static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4235 unsigned PtrByteSize, unsigned LinkageSize,
4236 unsigned ParamAreaSize, unsigned &ArgOffset,
4237 unsigned &AvailableFPRs,
4238 unsigned &AvailableVRs) {
4239 bool UseMemory = false;
4240
4241 // Respect alignment of argument on the stack.
4242 Align Alignment =
4243 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4244 ArgOffset = alignTo(ArgOffset, Alignment);
4245 // If there's no space left in the argument save area, we must
4246 // use memory (this check also catches zero-sized arguments).
4247 if (ArgOffset >= LinkageSize + ParamAreaSize)
4248 UseMemory = true;
4249
4250 // Allocate argument on the stack.
4251 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4252 if (Flags.isInConsecutiveRegsLast())
4253 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4254 // If we overran the argument save area, we must use memory
4255 // (this check catches arguments passed partially in memory)
4256 if (ArgOffset > LinkageSize + ParamAreaSize)
4257 UseMemory = true;
4258
4259 // However, if the argument is actually passed in an FPR or a VR,
4260 // we don't use memory after all.
4261 if (!Flags.isByVal()) {
4262 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4263 if (AvailableFPRs > 0) {
4264 --AvailableFPRs;
4265 return false;
4266 }
4267 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4268 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4269 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4270 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4271 if (AvailableVRs > 0) {
4272 --AvailableVRs;
4273 return false;
4274 }
4275 }
4276
4277 return UseMemory;
4278}
4279
4280/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4281/// ensure minimum alignment required for target.
4283 unsigned NumBytes) {
4284 return alignTo(NumBytes, Lowering->getStackAlign());
4285}
4286
4287SDValue PPCTargetLowering::LowerFormalArguments(
4288 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4289 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4290 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4291 if (Subtarget.isAIXABI())
4292 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4293 InVals);
4294 if (Subtarget.is64BitELFABI())
4295 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4296 InVals);
4297 assert(Subtarget.is32BitELFABI());
4298 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4299 InVals);
4300}
4301
4302SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4303 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4304 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4305 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4306
4307 // 32-bit SVR4 ABI Stack Frame Layout:
4308 // +-----------------------------------+
4309 // +--> | Back chain |
4310 // | +-----------------------------------+
4311 // | | Floating-point register save area |
4312 // | +-----------------------------------+
4313 // | | General register save area |
4314 // | +-----------------------------------+
4315 // | | CR save word |
4316 // | +-----------------------------------+
4317 // | | VRSAVE save word |
4318 // | +-----------------------------------+
4319 // | | Alignment padding |
4320 // | +-----------------------------------+
4321 // | | Vector register save area |
4322 // | +-----------------------------------+
4323 // | | Local variable space |
4324 // | +-----------------------------------+
4325 // | | Parameter list area |
4326 // | +-----------------------------------+
4327 // | | LR save word |
4328 // | +-----------------------------------+
4329 // SP--> +--- | Back chain |
4330 // +-----------------------------------+
4331 //
4332 // Specifications:
4333 // System V Application Binary Interface PowerPC Processor Supplement
4334 // AltiVec Technology Programming Interface Manual
4335
4336 MachineFunction &MF = DAG.getMachineFunction();
4337 MachineFrameInfo &MFI = MF.getFrameInfo();
4338 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4339
4340 EVT PtrVT = getPointerTy(MF.getDataLayout());
4341 // Potential tail calls could cause overwriting of argument stack slots.
4342 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4343 (CallConv == CallingConv::Fast));
4344 const Align PtrAlign(4);
4345
4346 // Assign locations to all of the incoming arguments.
4348 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4349 *DAG.getContext());
4350
4351 // Reserve space for the linkage area on the stack.
4352 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4353 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4354 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4355
4356 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4357 CCValAssign &VA = ArgLocs[i];
4358
4359 // Arguments stored in registers.
4360 if (VA.isRegLoc()) {
4361 const TargetRegisterClass *RC;
4362 EVT ValVT = VA.getValVT();
4363
4364 switch (ValVT.getSimpleVT().SimpleTy) {
4365 default:
4366 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4367 case MVT::i1:
4368 case MVT::i32:
4369 RC = &PPC::GPRCRegClass;
4370 break;
4371 case MVT::f32:
4372 if (Subtarget.hasP8Vector())
4373 RC = &PPC::VSSRCRegClass;
4374 else if (Subtarget.hasSPE())
4375 RC = &PPC::GPRCRegClass;
4376 else
4377 RC = &PPC::F4RCRegClass;
4378 break;
4379 case MVT::f64:
4380 if (Subtarget.hasVSX())
4381 RC = &PPC::VSFRCRegClass;
4382 else if (Subtarget.hasSPE())
4383 // SPE passes doubles in GPR pairs.
4384 RC = &PPC::GPRCRegClass;
4385 else
4386 RC = &PPC::F8RCRegClass;
4387 break;
4388 case MVT::v16i8:
4389 case MVT::v8i16:
4390 case MVT::v4i32:
4391 RC = &PPC::VRRCRegClass;
4392 break;
4393 case MVT::v4f32:
4394 RC = &PPC::VRRCRegClass;
4395 break;
4396 case MVT::v2f64:
4397 case MVT::v2i64:
4398 RC = &PPC::VRRCRegClass;
4399 break;
4400 }
4401
4402 SDValue ArgValue;
4403 // Transform the arguments stored in physical registers into
4404 // virtual ones.
4405 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4406 assert(i + 1 < e && "No second half of double precision argument");
4407 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4408 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4409 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4410 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4411 if (!Subtarget.isLittleEndian())
4412 std::swap (ArgValueLo, ArgValueHi);
4413 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4414 ArgValueHi);
4415 } else {
4416 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4417 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4418 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4419 if (ValVT == MVT::i1)
4420 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4421 }
4422
4423 InVals.push_back(ArgValue);
4424 } else {
4425 // Argument stored in memory.
4426 assert(VA.isMemLoc());
4427
4428 // Get the extended size of the argument type in stack
4429 unsigned ArgSize = VA.getLocVT().getStoreSize();
4430 // Get the actual size of the argument type
4431 unsigned ObjSize = VA.getValVT().getStoreSize();
4432 unsigned ArgOffset = VA.getLocMemOffset();
4433 // Stack objects in PPC32 are right justified.
4434 ArgOffset += ArgSize - ObjSize;
4435 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4436
4437 // Create load nodes to retrieve arguments from the stack.
4438 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4439 InVals.push_back(
4440 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4441 }
4442 }
4443
4444 // Assign locations to all of the incoming aggregate by value arguments.
4445 // Aggregates passed by value are stored in the local variable space of the
4446 // caller's stack frame, right above the parameter list area.
4447 SmallVector<CCValAssign, 16> ByValArgLocs;
4448 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4449 ByValArgLocs, *DAG.getContext());
4450
4451 // Reserve stack space for the allocations in CCInfo.
4452 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4453
4454 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4455
4456 // Area that is at least reserved in the caller of this function.
4457 unsigned MinReservedArea = CCByValInfo.getStackSize();
4458 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4459
4460 // Set the size that is at least reserved in caller of this function. Tail
4461 // call optimized function's reserved stack space needs to be aligned so that
4462 // taking the difference between two stack areas will result in an aligned
4463 // stack.
4464 MinReservedArea =
4465 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4466 FuncInfo->setMinReservedArea(MinReservedArea);
4467
4469
4470 // If the function takes variable number of arguments, make a frame index for
4471 // the start of the first vararg value... for expansion of llvm.va_start.
4472 if (isVarArg) {
4473 static const MCPhysReg GPArgRegs[] = {
4474 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4475 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4476 };
4477 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4478
4479 static const MCPhysReg FPArgRegs[] = {
4480 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4481 PPC::F8
4482 };
4483 unsigned NumFPArgRegs = std::size(FPArgRegs);
4484
4485 if (useSoftFloat() || hasSPE())
4486 NumFPArgRegs = 0;
4487
4488 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4489 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4490
4491 // Make room for NumGPArgRegs and NumFPArgRegs.
4492 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4493 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4494
4496 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4497
4498 FuncInfo->setVarArgsFrameIndex(
4499 MFI.CreateStackObject(Depth, Align(8), false));
4500 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4501
4502 // The fixed integer arguments of a variadic function are stored to the
4503 // VarArgsFrameIndex on the stack so that they may be loaded by
4504 // dereferencing the result of va_next.
4505 for (MCPhysReg GPArgReg : GPArgRegs) {
4506 // Get an existing live-in vreg, or add a new one.
4507 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4508 if (!VReg)
4509 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4510
4511 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4512 SDValue Store =
4513 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4514 MemOps.push_back(Store);
4515 // Increment the address by four for the next argument to store
4516 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4517 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4518 }
4519
4520 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4521 // is set.
4522 // The double arguments are stored to the VarArgsFrameIndex
4523 // on the stack.
4524 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4525 // Get an existing live-in vreg, or add a new one.
4526 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4527 if (!VReg)
4528 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4529
4530 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4531 SDValue Store =
4532 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4533 MemOps.push_back(Store);
4534 // Increment the address by eight for the next argument to store
4535 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4536 PtrVT);
4537 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4538 }
4539 }
4540
4541 if (!MemOps.empty())
4542 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4543
4544 return Chain;
4545}
4546
4547// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4548// value to MVT::i64 and then truncate to the correct register size.
4549SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4550 EVT ObjectVT, SelectionDAG &DAG,
4551 SDValue ArgVal,
4552 const SDLoc &dl) const {
4553 if (Flags.isSExt())
4554 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4555 DAG.getValueType(ObjectVT));
4556 else if (Flags.isZExt())
4557 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4558 DAG.getValueType(ObjectVT));
4559
4560 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4561}
4562
4563SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4564 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4565 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4566 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4567 // TODO: add description of PPC stack frame format, or at least some docs.
4568 //
4569 bool isELFv2ABI = Subtarget.isELFv2ABI();
4570 bool isLittleEndian = Subtarget.isLittleEndian();
4571 MachineFunction &MF = DAG.getMachineFunction();
4572 MachineFrameInfo &MFI = MF.getFrameInfo();
4573 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4574
4575 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4576 "fastcc not supported on varargs functions");
4577
4578 EVT PtrVT = getPointerTy(MF.getDataLayout());
4579 // Potential tail calls could cause overwriting of argument stack slots.
4580 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4581 (CallConv == CallingConv::Fast));
4582 unsigned PtrByteSize = 8;
4583 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4584
4585 static const MCPhysReg GPR[] = {
4586 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4587 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4588 };
4589 static const MCPhysReg VR[] = {
4590 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4591 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4592 };
4593
4594 const unsigned Num_GPR_Regs = std::size(GPR);
4595 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4596 const unsigned Num_VR_Regs = std::size(VR);
4597
4598 // Do a first pass over the arguments to determine whether the ABI
4599 // guarantees that our caller has allocated the parameter save area
4600 // on its stack frame. In the ELFv1 ABI, this is always the case;
4601 // in the ELFv2 ABI, it is true if this is a vararg function or if
4602 // any parameter is located in a stack slot.
4603
4604 bool HasParameterArea = !isELFv2ABI || isVarArg;
4605 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4606 unsigned NumBytes = LinkageSize;
4607 unsigned AvailableFPRs = Num_FPR_Regs;
4608 unsigned AvailableVRs = Num_VR_Regs;
4609 for (const ISD::InputArg &In : Ins) {
4610 if (In.Flags.isNest())
4611 continue;
4612
4613 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4614 LinkageSize, ParamAreaSize, NumBytes,
4615 AvailableFPRs, AvailableVRs))
4616 HasParameterArea = true;
4617 }
4618
4619 // Add DAG nodes to load the arguments or copy them out of registers. On
4620 // entry to a function on PPC, the arguments start after the linkage area,
4621 // although the first ones are often in registers.
4622
4623 unsigned ArgOffset = LinkageSize;
4624 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4627 unsigned CurArgIdx = 0;
4628 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4629 SDValue ArgVal;
4630 bool needsLoad = false;
4631 EVT ObjectVT = Ins[ArgNo].VT;
4632 EVT OrigVT = Ins[ArgNo].ArgVT;
4633 unsigned ObjSize = ObjectVT.getStoreSize();
4634 unsigned ArgSize = ObjSize;
4635 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4636 if (Ins[ArgNo].isOrigArg()) {
4637 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4638 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4639 }
4640 // We re-align the argument offset for each argument, except when using the
4641 // fast calling convention, when we need to make sure we do that only when
4642 // we'll actually use a stack slot.
4643 unsigned CurArgOffset;
4644 Align Alignment;
4645 auto ComputeArgOffset = [&]() {
4646 /* Respect alignment of argument on the stack. */
4647 Alignment =
4648 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4649 ArgOffset = alignTo(ArgOffset, Alignment);
4650 CurArgOffset = ArgOffset;
4651 };
4652
4653 if (CallConv != CallingConv::Fast) {
4654 ComputeArgOffset();
4655
4656 /* Compute GPR index associated with argument offset. */
4657 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4658 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4659 }
4660
4661 // FIXME the codegen can be much improved in some cases.
4662 // We do not have to keep everything in memory.
4663 if (Flags.isByVal()) {
4664 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4665
4666 if (CallConv == CallingConv::Fast)
4667 ComputeArgOffset();
4668
4669 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4670 ObjSize = Flags.getByValSize();
4671 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4672 // Empty aggregate parameters do not take up registers. Examples:
4673 // struct { } a;
4674 // union { } b;
4675 // int c[0];
4676 // etc. However, we have to provide a place-holder in InVals, so
4677 // pretend we have an 8-byte item at the current address for that
4678 // purpose.
4679 if (!ObjSize) {
4680 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4681 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4682 InVals.push_back(FIN);
4683 continue;
4684 }
4685
4686 // Create a stack object covering all stack doublewords occupied
4687 // by the argument. If the argument is (fully or partially) on
4688 // the stack, or if the argument is fully in registers but the
4689 // caller has allocated the parameter save anyway, we can refer
4690 // directly to the caller's stack frame. Otherwise, create a
4691 // local copy in our own frame.
4692 int FI;
4693 if (HasParameterArea ||
4694 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4695 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4696 else
4697 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4698 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4699
4700 // Handle aggregates smaller than 8 bytes.
4701 if (ObjSize < PtrByteSize) {
4702 // The value of the object is its address, which differs from the
4703 // address of the enclosing doubleword on big-endian systems.
4704 SDValue Arg = FIN;
4705 if (!isLittleEndian) {
4706 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4707 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4708 }
4709 InVals.push_back(Arg);
4710
4711 if (GPR_idx != Num_GPR_Regs) {
4712 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4713 FuncInfo->addLiveInAttr(VReg, Flags);
4714 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4715 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4716 SDValue Store =
4717 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4718 MachinePointerInfo(&*FuncArg), ObjType);
4719 MemOps.push_back(Store);
4720 }
4721 // Whether we copied from a register or not, advance the offset
4722 // into the parameter save area by a full doubleword.
4723 ArgOffset += PtrByteSize;
4724 continue;
4725 }
4726
4727 // The value of the object is its address, which is the address of
4728 // its first stack doubleword.
4729 InVals.push_back(FIN);
4730
4731 // Store whatever pieces of the object are in registers to memory.
4732 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4733 if (GPR_idx == Num_GPR_Regs)
4734 break;
4735
4736 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4737 FuncInfo->addLiveInAttr(VReg, Flags);
4738 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4739 SDValue Addr = FIN;
4740 if (j) {
4741 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4742 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4743 }
4744 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4745 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4746 SDValue Store =
4747 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4748 MachinePointerInfo(&*FuncArg, j), ObjType);
4749 MemOps.push_back(Store);
4750 ++GPR_idx;
4751 }
4752 ArgOffset += ArgSize;
4753 continue;
4754 }
4755
4756 switch (ObjectVT.getSimpleVT().SimpleTy) {
4757 default: llvm_unreachable("Unhandled argument type!");
4758 case MVT::i1:
4759 case MVT::i32:
4760 case MVT::i64:
4761 if (Flags.isNest()) {
4762 // The 'nest' parameter, if any, is passed in R11.
4763 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4764 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4765
4766 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4767 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4768
4769 break;
4770 }
4771
4772 // These can be scalar arguments or elements of an integer array type
4773 // passed directly. Clang may use those instead of "byval" aggregate
4774 // types to avoid forcing arguments to memory unnecessarily.
4775 if (GPR_idx != Num_GPR_Regs) {
4776 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4777 FuncInfo->addLiveInAttr(VReg, Flags);
4778 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4779
4780 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4781 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4782 // value to MVT::i64 and then truncate to the correct register size.
4783 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4784 } else {
4785 if (CallConv == CallingConv::Fast)
4786 ComputeArgOffset();
4787
4788 needsLoad = true;
4789 ArgSize = PtrByteSize;
4790 }
4791 if (CallConv != CallingConv::Fast || needsLoad)
4792 ArgOffset += 8;
4793 break;
4794
4795 case MVT::f32:
4796 case MVT::f64:
4797 // These can be scalar arguments or elements of a float array type
4798 // passed directly. The latter are used to implement ELFv2 homogenous
4799 // float aggregates.
4800 if (FPR_idx != Num_FPR_Regs) {
4801 unsigned VReg;
4802
4803 if (ObjectVT == MVT::f32)
4804 VReg = MF.addLiveIn(FPR[FPR_idx],
4805 Subtarget.hasP8Vector()
4806 ? &PPC::VSSRCRegClass
4807 : &PPC::F4RCRegClass);
4808 else
4809 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4810 ? &PPC::VSFRCRegClass
4811 : &PPC::F8RCRegClass);
4812
4813 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4814 ++FPR_idx;
4815 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4816 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4817 // once we support fp <-> gpr moves.
4818
4819 // This can only ever happen in the presence of f32 array types,
4820 // since otherwise we never run out of FPRs before running out
4821 // of GPRs.
4822 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4823 FuncInfo->addLiveInAttr(VReg, Flags);
4824 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4825
4826 if (ObjectVT == MVT::f32) {
4827 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4828 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4829 DAG.getConstant(32, dl, MVT::i32));
4830 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4831 }
4832
4833 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4834 } else {
4835 if (CallConv == CallingConv::Fast)
4836 ComputeArgOffset();
4837
4838 needsLoad = true;
4839 }
4840
4841 // When passing an array of floats, the array occupies consecutive
4842 // space in the argument area; only round up to the next doubleword
4843 // at the end of the array. Otherwise, each float takes 8 bytes.
4844 if (CallConv != CallingConv::Fast || needsLoad) {
4845 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4846 ArgOffset += ArgSize;
4847 if (Flags.isInConsecutiveRegsLast())
4848 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4849 }
4850 break;
4851 case MVT::v4f32:
4852 case MVT::v4i32:
4853 case MVT::v8i16:
4854 case MVT::v16i8:
4855 case MVT::v2f64:
4856 case MVT::v2i64:
4857 case MVT::v1i128:
4858 case MVT::f128:
4859 // These can be scalar arguments or elements of a vector array type
4860 // passed directly. The latter are used to implement ELFv2 homogenous
4861 // vector aggregates.
4862 if (VR_idx != Num_VR_Regs) {
4863 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4864 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4865 ++VR_idx;
4866 } else {
4867 if (CallConv == CallingConv::Fast)
4868 ComputeArgOffset();
4869 needsLoad = true;
4870 }
4871 if (CallConv != CallingConv::Fast || needsLoad)
4872 ArgOffset += 16;
4873 break;
4874 }
4875
4876 // We need to load the argument to a virtual register if we determined
4877 // above that we ran out of physical registers of the appropriate type.
4878 if (needsLoad) {
4879 if (ObjSize < ArgSize && !isLittleEndian)
4880 CurArgOffset += ArgSize - ObjSize;
4881 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4882 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4883 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4884 }
4885
4886 InVals.push_back(ArgVal);
4887 }
4888
4889 // Area that is at least reserved in the caller of this function.
4890 unsigned MinReservedArea;
4891 if (HasParameterArea)
4892 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4893 else
4894 MinReservedArea = LinkageSize;
4895
4896 // Set the size that is at least reserved in caller of this function. Tail
4897 // call optimized functions' reserved stack space needs to be aligned so that
4898 // taking the difference between two stack areas will result in an aligned
4899 // stack.
4900 MinReservedArea =
4901 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4902 FuncInfo->setMinReservedArea(MinReservedArea);
4903
4904 // If the function takes variable number of arguments, make a frame index for
4905 // the start of the first vararg value... for expansion of llvm.va_start.
4906 // On ELFv2ABI spec, it writes:
4907 // C programs that are intended to be *portable* across different compilers
4908 // and architectures must use the header file <stdarg.h> to deal with variable
4909 // argument lists.
4910 if (isVarArg && MFI.hasVAStart()) {
4911 int Depth = ArgOffset;
4912
4913 FuncInfo->setVarArgsFrameIndex(
4914 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4915 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4916
4917 // If this function is vararg, store any remaining integer argument regs
4918 // to their spots on the stack so that they may be loaded by dereferencing
4919 // the result of va_next.
4920 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4921 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4922 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4923 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4924 SDValue Store =
4925 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4926 MemOps.push_back(Store);
4927 // Increment the address by four for the next argument to store
4928 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4929 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4930 }
4931 }
4932
4933 if (!MemOps.empty())
4934 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4935
4936 return Chain;
4937}
4938
4939/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4940/// adjusted to accommodate the arguments for the tailcall.
4941static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4942 unsigned ParamSize) {
4943
4944 if (!isTailCall) return 0;
4945
4947 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4948 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4949 // Remember only if the new adjustment is bigger.
4950 if (SPDiff < FI->getTailCallSPDelta())
4951 FI->setTailCallSPDelta(SPDiff);
4952
4953 return SPDiff;
4954}
4955
4956static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4957
4958static bool callsShareTOCBase(const Function *Caller,
4959 const GlobalValue *CalleeGV,
4960 const TargetMachine &TM) {
4961 // It does not make sense to call callsShareTOCBase() with a caller that
4962 // is PC Relative since PC Relative callers do not have a TOC.
4963#ifndef NDEBUG
4964 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4965 assert(!STICaller->isUsingPCRelativeCalls() &&
4966 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4967#endif
4968
4969 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4970 // don't have enough information to determine if the caller and callee share
4971 // the same TOC base, so we have to pessimistically assume they don't for
4972 // correctness.
4973 if (!CalleeGV)
4974 return false;
4975
4976 // If the callee is preemptable, then the static linker will use a plt-stub
4977 // which saves the toc to the stack, and needs a nop after the call
4978 // instruction to convert to a toc-restore.
4979 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4980 return false;
4981
4982 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4983 // We may need a TOC restore in the situation where the caller requires a
4984 // valid TOC but the callee is PC Relative and does not.
4985 const Function *F = dyn_cast<Function>(CalleeGV);
4986 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4987
4988 // If we have an Alias we can try to get the function from there.
4989 if (Alias) {
4990 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4991 F = dyn_cast<Function>(GlobalObj);
4992 }
4993
4994 // If we still have no valid function pointer we do not have enough
4995 // information to determine if the callee uses PC Relative calls so we must
4996 // assume that it does.
4997 if (!F)
4998 return false;
4999
5000 // If the callee uses PC Relative we cannot guarantee that the callee won't
5001 // clobber the TOC of the caller and so we must assume that the two
5002 // functions do not share a TOC base.
5003 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
5004 if (STICallee->isUsingPCRelativeCalls())
5005 return false;
5006
5007 // If the GV is not a strong definition then we need to assume it can be
5008 // replaced by another function at link time. The function that replaces
5009 // it may not share the same TOC as the caller since the callee may be
5010 // replaced by a PC Relative version of the same function.
5011 if (!CalleeGV->isStrongDefinitionForLinker())
5012 return false;
5013
5014 // The medium and large code models are expected to provide a sufficiently
5015 // large TOC to provide all data addressing needs of a module with a
5016 // single TOC.
5017 if (CodeModel::Medium == TM.getCodeModel() ||
5018 CodeModel::Large == TM.getCodeModel())
5019 return true;
5020
5021 // Any explicitly-specified sections and section prefixes must also match.
5022 // Also, if we're using -ffunction-sections, then each function is always in
5023 // a different section (the same is true for COMDAT functions).
5024 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
5025 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
5026 return false;
5027 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
5028 if (F->getSectionPrefix() != Caller->getSectionPrefix())
5029 return false;
5030 }
5031
5032 return true;
5033}
5034
5035static bool
5037 const SmallVectorImpl<ISD::OutputArg> &Outs) {
5038 assert(Subtarget.is64BitELFABI());
5039
5040 const unsigned PtrByteSize = 8;
5041 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5042
5043 static const MCPhysReg GPR[] = {
5044 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5045 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5046 };
5047 static const MCPhysReg VR[] = {
5048 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5049 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5050 };
5051
5052 const unsigned NumGPRs = std::size(GPR);
5053 const unsigned NumFPRs = 13;
5054 const unsigned NumVRs = std::size(VR);
5055 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5056
5057 unsigned NumBytes = LinkageSize;
5058 unsigned AvailableFPRs = NumFPRs;
5059 unsigned AvailableVRs = NumVRs;
5060
5061 for (const ISD::OutputArg& Param : Outs) {
5062 if (Param.Flags.isNest()) continue;
5063
5064 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5065 LinkageSize, ParamAreaSize, NumBytes,
5066 AvailableFPRs, AvailableVRs))
5067 return true;
5068 }
5069 return false;
5070}
5071
5072static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5073 if (CB.arg_size() != CallerFn->arg_size())
5074 return false;
5075
5076 auto CalleeArgIter = CB.arg_begin();
5077 auto CalleeArgEnd = CB.arg_end();
5078 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5079
5080 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5081 const Value* CalleeArg = *CalleeArgIter;
5082 const Value* CallerArg = &(*CallerArgIter);
5083 if (CalleeArg == CallerArg)
5084 continue;
5085
5086 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5087 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5088 // }
5089 // 1st argument of callee is undef and has the same type as caller.
5090 if (CalleeArg->getType() == CallerArg->getType() &&
5091 isa<UndefValue>(CalleeArg))
5092 continue;
5093
5094 return false;
5095 }
5096
5097 return true;
5098}
5099
5100// Returns true if TCO is possible between the callers and callees
5101// calling conventions.
5102static bool
5104 CallingConv::ID CalleeCC) {
5105 // Tail calls are possible with fastcc and ccc.
5106 auto isTailCallableCC = [] (CallingConv::ID CC){
5107 return CC == CallingConv::C || CC == CallingConv::Fast;
5108 };
5109 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5110 return false;
5111
5112 // We can safely tail call both fastcc and ccc callees from a c calling
5113 // convention caller. If the caller is fastcc, we may have less stack space
5114 // than a non-fastcc caller with the same signature so disable tail-calls in
5115 // that case.
5116 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5117}
5118
5119bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5120 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5121 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5123 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5124 bool isCalleeExternalSymbol) const {
5125 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5126
5127 if (DisableSCO && !TailCallOpt) return false;
5128
5129 // Variadic argument functions are not supported.
5130 if (isVarArg) return false;
5131
5132 // Check that the calling conventions are compatible for tco.
5133 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5134 return false;
5135
5136 // Caller contains any byval parameter is not supported.
5137 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5138 return false;
5139
5140 // Callee contains any byval parameter is not supported, too.
5141 // Note: This is a quick work around, because in some cases, e.g.
5142 // caller's stack size > callee's stack size, we are still able to apply
5143 // sibling call optimization. For example, gcc is able to do SCO for caller1
5144 // in the following example, but not for caller2.
5145 // struct test {
5146 // long int a;
5147 // char ary[56];
5148 // } gTest;
5149 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5150 // b->a = v.a;
5151 // return 0;
5152 // }
5153 // void caller1(struct test a, struct test c, struct test *b) {
5154 // callee(gTest, b); }
5155 // void caller2(struct test *b) { callee(gTest, b); }
5156 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5157 return false;
5158
5159 // If callee and caller use different calling conventions, we cannot pass
5160 // parameters on stack since offsets for the parameter area may be different.
5161 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5162 return false;
5163
5164 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5165 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5166 // callee potentially have different TOC bases then we cannot tail call since
5167 // we need to restore the TOC pointer after the call.
5168 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5169 // We cannot guarantee this for indirect calls or calls to external functions.
5170 // When PC-Relative addressing is used, the concept of the TOC is no longer
5171 // applicable so this check is not required.
5172 // Check first for indirect calls.
5173 if (!Subtarget.isUsingPCRelativeCalls() &&
5174 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5175 return false;
5176
5177 // Check if we share the TOC base.
5178 if (!Subtarget.isUsingPCRelativeCalls() &&
5179 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5180 return false;
5181
5182 // TCO allows altering callee ABI, so we don't have to check further.
5183 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5184 return true;
5185
5186 if (DisableSCO) return false;
5187
5188 // If callee use the same argument list that caller is using, then we can
5189 // apply SCO on this case. If it is not, then we need to check if callee needs
5190 // stack for passing arguments.
5191 // PC Relative tail calls may not have a CallBase.
5192 // If there is no CallBase we cannot verify if we have the same argument
5193 // list so assume that we don't have the same argument list.
5194 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5195 needStackSlotPassParameters(Subtarget, Outs))
5196 return false;
5197 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5198 return false;
5199
5200 return true;
5201}
5202
5203/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5204/// for tail call optimization. Targets which want to do tail call
5205/// optimization should implement this function.
5206bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5207 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5208 CallingConv::ID CallerCC, bool isVarArg,
5209 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5210 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5211 return false;
5212
5213 // Variable argument functions are not supported.
5214 if (isVarArg)
5215 return false;
5216
5217 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5218 // Functions containing by val parameters are not supported.
5219 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5220 return false;
5221
5222 // Non-PIC/GOT tail calls are supported.
5223 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5224 return true;
5225
5226 // At the moment we can only do local tail calls (in same module, hidden
5227 // or protected) if we are generating PIC.
5228 if (CalleeGV)
5229 return CalleeGV->hasHiddenVisibility() ||
5230 CalleeGV->hasProtectedVisibility();
5231 }
5232
5233 return false;
5234}
5235
5236/// isCallCompatibleAddress - Return the immediate to use if the specified
5237/// 32-bit value is representable in the immediate field of a BxA instruction.
5240 if (!C) return nullptr;
5241
5242 int Addr = C->getZExtValue();
5243 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5244 SignExtend32<26>(Addr) != Addr)
5245 return nullptr; // Top 6 bits have to be sext of immediate.
5246
5247 return DAG
5249 (int)C->getZExtValue() >> 2, SDLoc(Op),
5251 .getNode();
5252}
5253
5254namespace {
5255
5256struct TailCallArgumentInfo {
5257 SDValue Arg;
5258 SDValue FrameIdxOp;
5259 int FrameIdx = 0;
5260
5261 TailCallArgumentInfo() = default;
5262};
5263
5264} // end anonymous namespace
5265
5266/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5268 SelectionDAG &DAG, SDValue Chain,
5269 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5270 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5271 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5272 SDValue Arg = TailCallArgs[i].Arg;
5273 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5274 int FI = TailCallArgs[i].FrameIdx;
5275 // Store relative to framepointer.
5276 MemOpChains.push_back(DAG.getStore(
5277 Chain, dl, Arg, FIN,
5279 }
5280}
5281
5282/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5283/// the appropriate stack slot for the tail call optimized function call.
5285 SDValue OldRetAddr, SDValue OldFP,
5286 int SPDiff, const SDLoc &dl) {
5287 if (SPDiff) {
5288 // Calculate the new stack slot for the return address.
5290 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5291 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5292 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5293 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5294 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5295 NewRetAddrLoc, true);
5296 SDValue NewRetAddrFrIdx =
5297 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5298 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5299 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5300 }
5301 return Chain;
5302}
5303
5304/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5305/// the position of the argument.
5307 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5308 int SPDiff, unsigned ArgOffset,
5309 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5310 int Offset = ArgOffset + SPDiff;
5311 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5312 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5313 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5314 SDValue FIN = DAG.getFrameIndex(FI, VT);
5315 TailCallArgumentInfo Info;
5316 Info.Arg = Arg;
5317 Info.FrameIdxOp = FIN;
5318 Info.FrameIdx = FI;
5319 TailCallArguments.push_back(Info);
5320}
5321
5322/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5323/// stack slot. Returns the chain as result and the loaded frame pointers in
5324/// LROpOut/FPOpout. Used when tail calling.
5325SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5326 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5327 SDValue &FPOpOut, const SDLoc &dl) const {
5328 if (SPDiff) {
5329 // Load the LR and FP stack slot for later adjusting.
5330 LROpOut = getReturnAddrFrameIndex(DAG);
5331 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5332 MachinePointerInfo());
5333 Chain = SDValue(LROpOut.getNode(), 1);
5334 }
5335 return Chain;
5336}
5337
5338/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5339/// by "Src" to address "Dst" of size "Size". Alignment information is
5340/// specified by the specific parameter attribute. The copy will be passed as
5341/// a byval function parameter.
5342/// Sometimes what we are copying is the end of a larger object, the part that
5343/// does not fit in registers.
5345 SDValue Chain, ISD::ArgFlagsTy Flags,
5346 SelectionDAG &DAG, const SDLoc &dl) {
5347 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5348 return DAG.getMemcpy(
5349 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5350 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5351}
5352
5353/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5354/// tail calls.
5356 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5357 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5358 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5359 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5361 if (!isTailCall) {
5362 if (isVector) {
5363 SDValue StackPtr;
5364 if (isPPC64)
5365 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5366 else
5367 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5368 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5369 DAG.getConstant(ArgOffset, dl, PtrVT));
5370 }
5371 MemOpChains.push_back(
5372 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5373 // Calculate and remember argument location.
5374 } else
5375 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5376 TailCallArguments);
5377}
5378
5379static void
5381 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5382 SDValue FPOp,
5383 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5384 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5385 // might overwrite each other in case of tail call optimization.
5386 SmallVector<SDValue, 8> MemOpChains2;
5387 // Do not flag preceding copytoreg stuff together with the following stuff.
5388 InGlue = SDValue();
5389 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5390 MemOpChains2, dl);
5391 if (!MemOpChains2.empty())
5392 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5393
5394 // Store the return address to the appropriate stack slot.
5395 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5396
5397 // Emit callseq_end just before tailcall node.
5398 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5399 InGlue = Chain.getValue(1);
5400}
5401
5402// Is this global address that of a function that can be called by name? (as
5403// opposed to something that must hold a descriptor for an indirect call).
5404static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5405 if (GV) {
5406 if (GV->isThreadLocal())
5407 return false;
5408
5409 return GV->getValueType()->isFunctionTy();
5410 }
5411
5412 return false;
5413}
5414
5415SDValue PPCTargetLowering::LowerCallResult(
5416 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5417 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5418 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5420 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5421 *DAG.getContext());
5422
5423 CCRetInfo.AnalyzeCallResult(
5424 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5426 : RetCC_PPC);
5427
5428 // Copy all of the result registers out of their specified physreg.
5429 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5430 CCValAssign &VA = RVLocs[i];
5431 assert(VA.isRegLoc() && "Can only return in registers!");
5432
5433 SDValue Val;
5434
5435 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5436 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5437 InGlue);
5438 Chain = Lo.getValue(1);
5439 InGlue = Lo.getValue(2);
5440 VA = RVLocs[++i]; // skip ahead to next loc
5441 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5442 InGlue);
5443 Chain = Hi.getValue(1);
5444 InGlue = Hi.getValue(2);
5445 if (!Subtarget.isLittleEndian())
5446 std::swap (Lo, Hi);
5447 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5448 } else {
5449 Val = DAG.getCopyFromReg(Chain, dl,
5450 VA.getLocReg(), VA.getLocVT(), InGlue);
5451 Chain = Val.getValue(1);
5452 InGlue = Val.getValue(2);
5453 }
5454
5455 switch (VA.getLocInfo()) {
5456 default: llvm_unreachable("Unknown loc info!");
5457 case CCValAssign::Full: break;
5458 case CCValAssign::AExt:
5459 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5460 break;
5461 case CCValAssign::ZExt:
5462 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5463 DAG.getValueType(VA.getValVT()));
5464 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5465 break;
5466 case CCValAssign::SExt:
5467 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5468 DAG.getValueType(VA.getValVT()));
5469 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5470 break;
5471 }
5472
5473 InVals.push_back(Val);
5474 }
5475
5476 return Chain;
5477}
5478
5479static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5480 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5481 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5482 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5483
5484 // PatchPoint calls are not indirect.
5485 if (isPatchPoint)
5486 return false;
5487
5489 return false;
5490
5491 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5492 // becuase the immediate function pointer points to a descriptor instead of
5493 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5494 // pointer immediate points to the global entry point, while the BLA would
5495 // need to jump to the local entry point (see rL211174).
5496 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5497 isBLACompatibleAddress(Callee, DAG))
5498 return false;
5499
5500 return true;
5501}
5502
5503// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5504static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5505 return Subtarget.isAIXABI() ||
5506 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5507}
5508
5510 const Function &Caller, const SDValue &Callee,
5511 const PPCSubtarget &Subtarget,
5512 const TargetMachine &TM,
5513 bool IsStrictFPCall = false) {
5514 if (CFlags.IsTailCall)
5515 return PPCISD::TC_RETURN;
5516
5517 unsigned RetOpc = 0;
5518 // This is a call through a function pointer.
5519 if (CFlags.IsIndirect) {
5520 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5521 // indirect calls. The save of the caller's TOC pointer to the stack will be
5522 // inserted into the DAG as part of call lowering. The restore of the TOC
5523 // pointer is modeled by using a pseudo instruction for the call opcode that
5524 // represents the 2 instruction sequence of an indirect branch and link,
5525 // immediately followed by a load of the TOC pointer from the stack save
5526 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5527 // as it is not saved or used.
5529 : PPCISD::BCTRL;
5530 } else if (Subtarget.isUsingPCRelativeCalls()) {
5531 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5532 RetOpc = PPCISD::CALL_NOTOC;
5533 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5534 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5535 // immediately following the call instruction if the caller and callee may
5536 // have different TOC bases. At link time if the linker determines the calls
5537 // may not share a TOC base, the call is redirected to a trampoline inserted
5538 // by the linker. The trampoline will (among other things) save the callers
5539 // TOC pointer at an ABI designated offset in the linkage area and the
5540 // linker will rewrite the nop to be a load of the TOC pointer from the
5541 // linkage area into gpr2.
5542 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5543 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5544 RetOpc =
5545 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5546 } else
5547 RetOpc = PPCISD::CALL;
5548 if (IsStrictFPCall) {
5549 switch (RetOpc) {
5550 default:
5551 llvm_unreachable("Unknown call opcode");
5554 break;
5555 case PPCISD::BCTRL:
5556 RetOpc = PPCISD::BCTRL_RM;
5557 break;
5558 case PPCISD::CALL_NOTOC:
5559 RetOpc = PPCISD::CALL_NOTOC_RM;
5560 break;
5561 case PPCISD::CALL:
5562 RetOpc = PPCISD::CALL_RM;
5563 break;
5564 case PPCISD::CALL_NOP:
5565 RetOpc = PPCISD::CALL_NOP_RM;
5566 break;
5567 }
5568 }
5569 return RetOpc;
5570}
5571
5572static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5573 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5574 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5575 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5576 return SDValue(Dest, 0);
5577
5578 // Returns true if the callee is local, and false otherwise.
5579 auto isLocalCallee = [&]() {
5581 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5582
5583 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5585 };
5586
5587 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5588 // a static relocation model causes some versions of GNU LD (2.17.50, at
5589 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5590 // built with secure-PLT.
5591 bool UsePlt =
5592 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5594
5595 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5596 const TargetMachine &TM = Subtarget.getTargetMachine();
5597 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5598 auto *S =
5599 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5600
5602 return DAG.getMCSymbol(S, PtrVT);
5603 };
5604
5605 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5606 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5607 if (isFunctionGlobalAddress(GV)) {
5608 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5609
5610 if (Subtarget.isAIXABI()) {
5611 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5612 return getAIXFuncEntryPointSymbolSDNode(GV);
5613 }
5614 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5615 UsePlt ? PPCII::MO_PLT : 0);
5616 }
5617
5619 const char *SymName = S->getSymbol();
5620 if (Subtarget.isAIXABI()) {
5621 // If there exists a user-declared function whose name is the same as the
5622 // ExternalSymbol's, then we pick up the user-declared version.
5624 if (const Function *F =
5625 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5626 return getAIXFuncEntryPointSymbolSDNode(F);
5627
5628 // On AIX, direct function calls reference the symbol for the function's
5629 // entry point, which is named by prepending a "." before the function's
5630 // C-linkage name. A Qualname is returned here because an external
5631 // function entry point is a csect with XTY_ER property.
5632 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5633 auto &Context = DAG.getMachineFunction().getContext();
5634 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5635 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5637 return Sec->getQualNameSymbol();
5638 };
5639
5640 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5641 }
5642 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5643 UsePlt ? PPCII::MO_PLT : 0);
5644 }
5645
5646 // No transformation needed.
5647 assert(Callee.getNode() && "What no callee?");
5648 return Callee;
5649}
5650
5652 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5653 "Expected a CALLSEQ_STARTSDNode.");
5654
5655 // The last operand is the chain, except when the node has glue. If the node
5656 // has glue, then the last operand is the glue, and the chain is the second
5657 // last operand.
5658 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5659 if (LastValue.getValueType() != MVT::Glue)
5660 return LastValue;
5661
5662 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5663}
5664
5665// Creates the node that moves a functions address into the count register
5666// to prepare for an indirect call instruction.
5667static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5668 SDValue &Glue, SDValue &Chain,
5669 const SDLoc &dl) {
5670 SDValue MTCTROps[] = {Chain, Callee, Glue};
5671 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5672 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5673 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5674 // The glue is the second value produced.
5675 Glue = Chain.getValue(1);
5676}
5677
5679 SDValue &Glue, SDValue &Chain,
5680 SDValue CallSeqStart,
5681 const CallBase *CB, const SDLoc &dl,
5682 bool hasNest,
5683 const PPCSubtarget &Subtarget) {
5684 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5685 // entry point, but to the function descriptor (the function entry point
5686 // address is part of the function descriptor though).
5687 // The function descriptor is a three doubleword structure with the
5688 // following fields: function entry point, TOC base address and
5689 // environment pointer.
5690 // Thus for a call through a function pointer, the following actions need
5691 // to be performed:
5692 // 1. Save the TOC of the caller in the TOC save area of its stack
5693 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5694 // 2. Load the address of the function entry point from the function
5695 // descriptor.
5696 // 3. Load the TOC of the callee from the function descriptor into r2.
5697 // 4. Load the environment pointer from the function descriptor into
5698 // r11.
5699 // 5. Branch to the function entry point address.
5700 // 6. On return of the callee, the TOC of the caller needs to be
5701 // restored (this is done in FinishCall()).
5702 //
5703 // The loads are scheduled at the beginning of the call sequence, and the
5704 // register copies are flagged together to ensure that no other
5705 // operations can be scheduled in between. E.g. without flagging the
5706 // copies together, a TOC access in the caller could be scheduled between
5707 // the assignment of the callee TOC and the branch to the callee, which leads
5708 // to incorrect code.
5709
5710 // Start by loading the function address from the descriptor.
5711 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5712 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5716
5717 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5718
5719 // Registers used in building the DAG.
5720 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5721 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5722
5723 // Offsets of descriptor members.
5724 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5725 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5726
5727 const MVT RegVT = Subtarget.getScalarIntVT();
5728 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5729
5730 // One load for the functions entry point address.
5731 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5732 Alignment, MMOFlags);
5733
5734 // One for loading the TOC anchor for the module that contains the called
5735 // function.
5736 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5737 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5738 SDValue TOCPtr =
5739 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5740 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5741
5742 // One for loading the environment pointer.
5743 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5744 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5745 SDValue LoadEnvPtr =
5746 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5747 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5748
5749
5750 // Then copy the newly loaded TOC anchor to the TOC pointer.
5751 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5752 Chain = TOCVal.getValue(0);
5753 Glue = TOCVal.getValue(1);
5754
5755 // If the function call has an explicit 'nest' parameter, it takes the
5756 // place of the environment pointer.
5757 assert((!hasNest || !Subtarget.isAIXABI()) &&
5758 "Nest parameter is not supported on AIX.");
5759 if (!hasNest) {
5760 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5761 Chain = EnvVal.getValue(0);
5762 Glue = EnvVal.getValue(1);
5763 }
5764
5765 // The rest of the indirect call sequence is the same as the non-descriptor
5766 // DAG.
5767 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5768}
5769
5770static void
5772 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5773 SelectionDAG &DAG,
5774 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5775 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5776 const PPCSubtarget &Subtarget) {
5777 const bool IsPPC64 = Subtarget.isPPC64();
5778 // MVT for a general purpose register.
5779 const MVT RegVT = Subtarget.getScalarIntVT();
5780
5781 // First operand is always the chain.
5782 Ops.push_back(Chain);
5783
5784 // If it's a direct call pass the callee as the second operand.
5785 if (!CFlags.IsIndirect)
5786 Ops.push_back(Callee);
5787 else {
5788 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5789
5790 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5791 // on the stack (this would have been done in `LowerCall_64SVR4` or
5792 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5793 // represents both the indirect branch and a load that restores the TOC
5794 // pointer from the linkage area. The operand for the TOC restore is an add
5795 // of the TOC save offset to the stack pointer. This must be the second
5796 // operand: after the chain input but before any other variadic arguments.
5797 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5798 // saved or used.
5799 if (isTOCSaveRestoreRequired(Subtarget)) {
5800 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5801
5802 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5803 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5804 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5805 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5806 Ops.push_back(AddTOC);
5807 }
5808
5809 // Add the register used for the environment pointer.
5810 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5811 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5812 RegVT));
5813
5814
5815 // Add CTR register as callee so a bctr can be emitted later.
5816 if (CFlags.IsTailCall)
5817 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5818 }
5819
5820 // If this is a tail call add stack pointer delta.
5821 if (CFlags.IsTailCall)
5822 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5823
5824 // Add argument registers to the end of the list so that they are known live
5825 // into the call.
5826 for (const auto &[Reg, N] : RegsToPass)
5827 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5828
5829 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5830 // no way to mark dependencies as implicit here.
5831 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5832 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5833 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5834 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5835
5836 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5837 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5838 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5839
5840 // Add a register mask operand representing the call-preserved registers.
5841 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5842 const uint32_t *Mask =
5843 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5844 assert(Mask && "Missing call preserved mask for calling convention");
5845 Ops.push_back(DAG.getRegisterMask(Mask));
5846
5847 // If the glue is valid, it is the last operand.
5848 if (Glue.getNode())
5849 Ops.push_back(Glue);
5850}
5851
5852SDValue PPCTargetLowering::FinishCall(
5853 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5854 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5855 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5856 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5857 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5858
5859 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5860 Subtarget.isAIXABI())
5861 setUsesTOCBasePtr(DAG);
5862
5863 unsigned CallOpc =
5864 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5865 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5866
5867 if (!CFlags.IsIndirect)
5868 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5869 else if (Subtarget.usesFunctionDescriptors())
5870 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5871 dl, CFlags.HasNest, Subtarget);
5872 else
5873 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5874
5875 // Build the operand list for the call instruction.
5877 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5878 SPDiff, Subtarget);
5879
5880 // Emit tail call.
5881 if (CFlags.IsTailCall) {
5882 // Indirect tail call when using PC Relative calls do not have the same
5883 // constraints.
5884 assert(((Callee.getOpcode() == ISD::Register &&
5885 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5886 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5887 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5888 isa<ConstantSDNode>(Callee) ||
5889 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5890 "Expecting a global address, external symbol, absolute value, "
5891 "register or an indirect tail call when PC Relative calls are "
5892 "used.");
5893 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5894 assert(CallOpc == PPCISD::TC_RETURN &&
5895 "Unexpected call opcode for a tail call.");
5897 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5898 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5899 return Ret;
5900 }
5901
5902 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5903 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5904 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5905 Glue = Chain.getValue(1);
5906
5907 // When performing tail call optimization the callee pops its arguments off
5908 // the stack. Account for this here so these bytes can be pushed back on in
5909 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5910 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5912 ? NumBytes
5913 : 0;
5914
5915 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5916 Glue = Chain.getValue(1);
5917
5918 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5919 DAG, InVals);
5920}
5921
5923 CallingConv::ID CalleeCC = CB->getCallingConv();
5924 const Function *CallerFunc = CB->getCaller();
5925 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5926 const Function *CalleeFunc = CB->getCalledFunction();
5927 if (!CalleeFunc)
5928 return false;
5929 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5930
5933
5934 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5935 CalleeFunc->getAttributes(), Outs, *this,
5936 CalleeFunc->getDataLayout());
5937
5938 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5939 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5940 false /*isCalleeExternalSymbol*/);
5941}
5942
5943bool PPCTargetLowering::isEligibleForTCO(
5944 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5945 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5947 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5948 bool isCalleeExternalSymbol) const {
5949 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5950 return false;
5951
5952 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5953 return IsEligibleForTailCallOptimization_64SVR4(
5954 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5955 isCalleeExternalSymbol);
5956 else
5957 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5958 isVarArg, Ins);
5959}
5960
5961SDValue
5962PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5963 SmallVectorImpl<SDValue> &InVals) const {
5964 SelectionDAG &DAG = CLI.DAG;
5965 SDLoc &dl = CLI.DL;
5967 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5969 SDValue Chain = CLI.Chain;
5970 SDValue Callee = CLI.Callee;
5971 bool &isTailCall = CLI.IsTailCall;
5972 CallingConv::ID CallConv = CLI.CallConv;
5973 bool isVarArg = CLI.IsVarArg;
5974 bool isPatchPoint = CLI.IsPatchPoint;
5975 const CallBase *CB = CLI.CB;
5976
5977 if (isTailCall) {
5979 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5980 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5981 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5982 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5983
5984 isTailCall =
5985 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5986 &(MF.getFunction()), IsCalleeExternalSymbol);
5987 if (isTailCall) {
5988 ++NumTailCalls;
5989 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5990 ++NumSiblingCalls;
5991
5992 // PC Relative calls no longer guarantee that the callee is a Global
5993 // Address Node. The callee could be an indirect tail call in which
5994 // case the SDValue for the callee could be a load (to load the address
5995 // of a function pointer) or it may be a register copy (to move the
5996 // address of the callee from a function parameter into a virtual
5997 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5998 assert((Subtarget.isUsingPCRelativeCalls() ||
5999 isa<GlobalAddressSDNode>(Callee)) &&
6000 "Callee should be an llvm::Function object.");
6001
6002 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
6003 << "\nTCO callee: ");
6004 LLVM_DEBUG(Callee.dump());
6005 }
6006 }
6007
6008 if (!isTailCall && CB && CB->isMustTailCall())
6009 report_fatal_error("failed to perform tail call elimination on a call "
6010 "site marked musttail");
6011
6012 // When long calls (i.e. indirect calls) are always used, calls are always
6013 // made via function pointer. If we have a function name, first translate it
6014 // into a pointer.
6015 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
6016 !isTailCall)
6017 Callee = LowerGlobalAddress(Callee, DAG);
6018
6019 CallFlags CFlags(
6020 CallConv, isTailCall, isVarArg, isPatchPoint,
6021 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
6022 // hasNest
6023 Subtarget.is64BitELFABI() &&
6024 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
6025 CLI.NoMerge);
6026
6027 if (Subtarget.isAIXABI())
6028 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6029 InVals, CB);
6030
6031 assert(Subtarget.isSVR4ABI());
6032 if (Subtarget.isPPC64())
6033 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6034 InVals, CB);
6035 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6036 InVals, CB);
6037}
6038
6039SDValue PPCTargetLowering::LowerCall_32SVR4(
6040 SDValue Chain, SDValue Callee, CallFlags CFlags,
6042 const SmallVectorImpl<SDValue> &OutVals,
6043 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6045 const CallBase *CB) const {
6046 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
6047 // of the 32-bit SVR4 ABI stack frame layout.
6048
6049 const CallingConv::ID CallConv = CFlags.CallConv;
6050 const bool IsVarArg = CFlags.IsVarArg;
6051 const bool IsTailCall = CFlags.IsTailCall;
6052
6053 assert((CallConv == CallingConv::C ||
6054 CallConv == CallingConv::Cold ||
6055 CallConv == CallingConv::Fast) && "Unknown calling convention!");
6056
6057 const Align PtrAlign(4);
6058
6059 MachineFunction &MF = DAG.getMachineFunction();
6060
6061 // Mark this function as potentially containing a function that contains a
6062 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6063 // and restoring the callers stack pointer in this functions epilog. This is
6064 // done because by tail calling the called function might overwrite the value
6065 // in this function's (MF) stack pointer stack slot 0(SP).
6066 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6067 CallConv == CallingConv::Fast)
6068 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6069
6070 // Count how many bytes are to be pushed on the stack, including the linkage
6071 // area, parameter list area and the part of the local variable space which
6072 // contains copies of aggregates which are passed by value.
6073
6074 // Assign locations to all of the outgoing arguments.
6076 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6077
6078 // Reserve space for the linkage area on the stack.
6079 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6080 PtrAlign);
6081
6082 if (IsVarArg) {
6083 // Handle fixed and variable vector arguments differently.
6084 // Fixed vector arguments go into registers as long as registers are
6085 // available. Variable vector arguments always go into memory.
6086 unsigned NumArgs = Outs.size();
6087
6088 for (unsigned i = 0; i != NumArgs; ++i) {
6089 MVT ArgVT = Outs[i].VT;
6090 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6091 bool Result;
6092
6093 if (!ArgFlags.isVarArg()) {
6094 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6095 Outs[i].OrigTy, CCInfo);
6096 } else {
6098 ArgFlags, Outs[i].OrigTy, CCInfo);
6099 }
6100
6101 if (Result) {
6102#ifndef NDEBUG
6103 errs() << "Call operand #" << i << " has unhandled type "
6104 << ArgVT << "\n";
6105#endif
6106 llvm_unreachable(nullptr);
6107 }
6108 }
6109 } else {
6110 // All arguments are treated the same.
6111 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6112 }
6113
6114 // Assign locations to all of the outgoing aggregate by value arguments.
6115 SmallVector<CCValAssign, 16> ByValArgLocs;
6116 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6117
6118 // Reserve stack space for the allocations in CCInfo.
6119 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6120
6121 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6122
6123 // Size of the linkage area, parameter list area and the part of the local
6124 // space variable where copies of aggregates which are passed by value are
6125 // stored.
6126 unsigned NumBytes = CCByValInfo.getStackSize();
6127
6128 // Calculate by how many bytes the stack has to be adjusted in case of tail
6129 // call optimization.
6130 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6131
6132 // Adjust the stack pointer for the new arguments...
6133 // These operations are automatically eliminated by the prolog/epilog pass
6134 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6135 SDValue CallSeqStart = Chain;
6136
6137 // Load the return address and frame pointer so it can be moved somewhere else
6138 // later.
6139 SDValue LROp, FPOp;
6140 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6141
6142 // Set up a copy of the stack pointer for use loading and storing any
6143 // arguments that may not fit in the registers available for argument
6144 // passing.
6145 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6146
6148 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6149 SmallVector<SDValue, 8> MemOpChains;
6150
6151 bool seenFloatArg = false;
6152 // Walk the register/memloc assignments, inserting copies/loads.
6153 // i - Tracks the index into the list of registers allocated for the call
6154 // RealArgIdx - Tracks the index into the list of actual function arguments
6155 // j - Tracks the index into the list of byval arguments
6156 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6157 i != e;
6158 ++i, ++RealArgIdx) {
6159 CCValAssign &VA = ArgLocs[i];
6160 SDValue Arg = OutVals[RealArgIdx];
6161 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6162
6163 if (Flags.isByVal()) {
6164 // Argument is an aggregate which is passed by value, thus we need to
6165 // create a copy of it in the local variable space of the current stack
6166 // frame (which is the stack frame of the caller) and pass the address of
6167 // this copy to the callee.
6168 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6169 CCValAssign &ByValVA = ByValArgLocs[j++];
6170 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6171
6172 // Memory reserved in the local variable space of the callers stack frame.
6173 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6174
6175 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6176 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6177 StackPtr, PtrOff);
6178
6179 // Create a copy of the argument in the local area of the current
6180 // stack frame.
6181 SDValue MemcpyCall =
6182 CreateCopyOfByValArgument(Arg, PtrOff,
6183 CallSeqStart.getNode()->getOperand(0),
6184 Flags, DAG, dl);
6185
6186 // This must go outside the CALLSEQ_START..END.
6187 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6188 SDLoc(MemcpyCall));
6189 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6190 NewCallSeqStart.getNode());
6191 Chain = CallSeqStart = NewCallSeqStart;
6192
6193 // Pass the address of the aggregate copy on the stack either in a
6194 // physical register or in the parameter list area of the current stack
6195 // frame to the callee.
6196 Arg = PtrOff;
6197 }
6198
6199 // When useCRBits() is true, there can be i1 arguments.
6200 // It is because getRegisterType(MVT::i1) => MVT::i1,
6201 // and for other integer types getRegisterType() => MVT::i32.
6202 // Extend i1 and ensure callee will get i32.
6203 if (Arg.getValueType() == MVT::i1)
6204 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6205 dl, MVT::i32, Arg);
6206
6207 if (VA.isRegLoc()) {
6208 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6209 // Put argument in a physical register.
6210 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6211 bool IsLE = Subtarget.isLittleEndian();
6212 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6213 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6214 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6215 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6216 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6217 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6218 SVal.getValue(0)));
6219 } else
6220 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6221 } else {
6222 // Put argument in the parameter list area of the current stack frame.
6223 assert(VA.isMemLoc());
6224 unsigned LocMemOffset = VA.getLocMemOffset();
6225
6226 if (!IsTailCall) {
6227 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6228 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6229 StackPtr, PtrOff);
6230
6231 MemOpChains.push_back(
6232 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6233 } else {
6234 // Calculate and remember argument location.
6235 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6236 TailCallArguments);
6237 }
6238 }
6239 }
6240
6241 if (!MemOpChains.empty())
6242 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6243
6244 // Build a sequence of copy-to-reg nodes chained together with token chain
6245 // and flag operands which copy the outgoing args into the appropriate regs.
6246 SDValue InGlue;
6247 for (const auto &[Reg, N] : RegsToPass) {
6248 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6249 InGlue = Chain.getValue(1);
6250 }
6251
6252 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6253 // registers.
6254 if (IsVarArg) {
6255 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6256 SDValue Ops[] = { Chain, InGlue };
6257
6258 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6259 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6260
6261 InGlue = Chain.getValue(1);
6262 }
6263
6264 if (IsTailCall)
6265 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6266 TailCallArguments);
6267
6268 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6269 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6270}
6271
6272// Copy an argument into memory, being careful to do this outside the
6273// call sequence for the call to which the argument belongs.
6274SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6275 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6276 SelectionDAG &DAG, const SDLoc &dl) const {
6277 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6278 CallSeqStart.getNode()->getOperand(0),
6279 Flags, DAG, dl);
6280 // The MEMCPY must go outside the CALLSEQ_START..END.
6281 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6282 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6283 SDLoc(MemcpyCall));
6284 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6285 NewCallSeqStart.getNode());
6286 return NewCallSeqStart;
6287}
6288
6289SDValue PPCTargetLowering::LowerCall_64SVR4(
6290 SDValue Chain, SDValue Callee, CallFlags CFlags,
6292 const SmallVectorImpl<SDValue> &OutVals,
6293 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6295 const CallBase *CB) const {
6296 bool isELFv2ABI = Subtarget.isELFv2ABI();
6297 bool isLittleEndian = Subtarget.isLittleEndian();
6298 unsigned NumOps = Outs.size();
6299 bool IsSibCall = false;
6300 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6301
6302 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6303 unsigned PtrByteSize = 8;
6304
6305 MachineFunction &MF = DAG.getMachineFunction();
6306
6307 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6308 IsSibCall = true;
6309
6310 // Mark this function as potentially containing a function that contains a
6311 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6312 // and restoring the callers stack pointer in this functions epilog. This is
6313 // done because by tail calling the called function might overwrite the value
6314 // in this function's (MF) stack pointer stack slot 0(SP).
6315 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6316 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6317
6318 assert(!(IsFastCall && CFlags.IsVarArg) &&
6319 "fastcc not supported on varargs functions");
6320
6321 // Count how many bytes are to be pushed on the stack, including the linkage
6322 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6323 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6324 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6325 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6326 unsigned NumBytes = LinkageSize;
6327 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6328
6329 static const MCPhysReg GPR[] = {
6330 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6331 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6332 };
6333 static const MCPhysReg VR[] = {
6334 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6335 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6336 };
6337
6338 const unsigned NumGPRs = std::size(GPR);
6339 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6340 const unsigned NumVRs = std::size(VR);
6341
6342 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6343 // can be passed to the callee in registers.
6344 // For the fast calling convention, there is another check below.
6345 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6346 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6347 if (!HasParameterArea) {
6348 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6349 unsigned AvailableFPRs = NumFPRs;
6350 unsigned AvailableVRs = NumVRs;
6351 unsigned NumBytesTmp = NumBytes;
6352 for (unsigned i = 0; i != NumOps; ++i) {
6353 if (Outs[i].Flags.isNest()) continue;
6354 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6355 PtrByteSize, LinkageSize, ParamAreaSize,
6356 NumBytesTmp, AvailableFPRs, AvailableVRs))
6357 HasParameterArea = true;
6358 }
6359 }
6360
6361 // When using the fast calling convention, we don't provide backing for
6362 // arguments that will be in registers.
6363 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6364
6365 // Avoid allocating parameter area for fastcc functions if all the arguments
6366 // can be passed in the registers.
6367 if (IsFastCall)
6368 HasParameterArea = false;
6369
6370 // Add up all the space actually used.
6371 for (unsigned i = 0; i != NumOps; ++i) {
6372 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6373 EVT ArgVT = Outs[i].VT;
6374 EVT OrigVT = Outs[i].ArgVT;
6375
6376 if (Flags.isNest())
6377 continue;
6378
6379 if (IsFastCall) {
6380 if (Flags.isByVal()) {
6381 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6382 if (NumGPRsUsed > NumGPRs)
6383 HasParameterArea = true;
6384 } else {
6385 switch (ArgVT.getSimpleVT().SimpleTy) {
6386 default: llvm_unreachable("Unexpected ValueType for argument!");
6387 case MVT::i1:
6388 case MVT::i32:
6389 case MVT::i64:
6390 if (++NumGPRsUsed <= NumGPRs)
6391 continue;
6392 break;
6393 case MVT::v4i32:
6394 case MVT::v8i16:
6395 case MVT::v16i8:
6396 case MVT::v2f64:
6397 case MVT::v2i64:
6398 case MVT::v1i128:
6399 case MVT::f128:
6400 if (++NumVRsUsed <= NumVRs)
6401 continue;
6402 break;
6403 case MVT::v4f32:
6404 if (++NumVRsUsed <= NumVRs)
6405 continue;
6406 break;
6407 case MVT::f32:
6408 case MVT::f64:
6409 if (++NumFPRsUsed <= NumFPRs)
6410 continue;
6411 break;
6412 }
6413 HasParameterArea = true;
6414 }
6415 }
6416
6417 /* Respect alignment of argument on the stack. */
6418 auto Alignement =
6419 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6420 NumBytes = alignTo(NumBytes, Alignement);
6421
6422 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6423 if (Flags.isInConsecutiveRegsLast())
6424 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6425 }
6426
6427 unsigned NumBytesActuallyUsed = NumBytes;
6428
6429 // In the old ELFv1 ABI,
6430 // the prolog code of the callee may store up to 8 GPR argument registers to
6431 // the stack, allowing va_start to index over them in memory if its varargs.
6432 // Because we cannot tell if this is needed on the caller side, we have to
6433 // conservatively assume that it is needed. As such, make sure we have at
6434 // least enough stack space for the caller to store the 8 GPRs.
6435 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6436 // really requires memory operands, e.g. a vararg function.
6437 if (HasParameterArea)
6438 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6439 else
6440 NumBytes = LinkageSize;
6441
6442 // Tail call needs the stack to be aligned.
6443 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6444 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6445
6446 int SPDiff = 0;
6447
6448 // Calculate by how many bytes the stack has to be adjusted in case of tail
6449 // call optimization.
6450 if (!IsSibCall)
6451 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6452
6453 // To protect arguments on the stack from being clobbered in a tail call,
6454 // force all the loads to happen before doing any other lowering.
6455 if (CFlags.IsTailCall)
6456 Chain = DAG.getStackArgumentTokenFactor(Chain);
6457
6458 // Adjust the stack pointer for the new arguments...
6459 // These operations are automatically eliminated by the prolog/epilog pass
6460 if (!IsSibCall)
6461 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6462 SDValue CallSeqStart = Chain;
6463
6464 // Load the return address and frame pointer so it can be move somewhere else
6465 // later.
6466 SDValue LROp, FPOp;
6467 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6468
6469 // Set up a copy of the stack pointer for use loading and storing any
6470 // arguments that may not fit in the registers available for argument
6471 // passing.
6472 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6473
6474 // Figure out which arguments are going to go in registers, and which in
6475 // memory. Also, if this is a vararg function, floating point operations
6476 // must be stored to our stack, and loaded into integer regs as well, if
6477 // any integer regs are available for argument passing.
6478 unsigned ArgOffset = LinkageSize;
6479
6481 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6482
6483 SmallVector<SDValue, 8> MemOpChains;
6484 for (unsigned i = 0; i != NumOps; ++i) {
6485 SDValue Arg = OutVals[i];
6486 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6487 EVT ArgVT = Outs[i].VT;
6488 EVT OrigVT = Outs[i].ArgVT;
6489
6490 // PtrOff will be used to store the current argument to the stack if a
6491 // register cannot be found for it.
6492 SDValue PtrOff;
6493
6494 // We re-align the argument offset for each argument, except when using the
6495 // fast calling convention, when we need to make sure we do that only when
6496 // we'll actually use a stack slot.
6497 auto ComputePtrOff = [&]() {
6498 /* Respect alignment of argument on the stack. */
6499 auto Alignment =
6500 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6501 ArgOffset = alignTo(ArgOffset, Alignment);
6502
6503 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6504
6505 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6506 };
6507
6508 if (!IsFastCall) {
6509 ComputePtrOff();
6510
6511 /* Compute GPR index associated with argument offset. */
6512 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6513 GPR_idx = std::min(GPR_idx, NumGPRs);
6514 }
6515
6516 // Promote integers to 64-bit values.
6517 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6518 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6519 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6520 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6521 }
6522
6523 // FIXME memcpy is used way more than necessary. Correctness first.
6524 // Note: "by value" is code for passing a structure by value, not
6525 // basic types.
6526 if (Flags.isByVal()) {
6527 // Note: Size includes alignment padding, so
6528 // struct x { short a; char b; }
6529 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6530 // These are the proper values we need for right-justifying the
6531 // aggregate in a parameter register.
6532 unsigned Size = Flags.getByValSize();
6533
6534 // An empty aggregate parameter takes up no storage and no
6535 // registers.
6536 if (Size == 0)
6537 continue;
6538
6539 if (IsFastCall)
6540 ComputePtrOff();
6541
6542 // All aggregates smaller than 8 bytes must be passed right-justified.
6543 if (Size==1 || Size==2 || Size==4) {
6544 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6545 if (GPR_idx != NumGPRs) {
6546 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6547 MachinePointerInfo(), VT);
6548 MemOpChains.push_back(Load.getValue(1));
6549 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6550
6551 ArgOffset += PtrByteSize;
6552 continue;
6553 }
6554 }
6555
6556 if (GPR_idx == NumGPRs && Size < 8) {
6557 SDValue AddPtr = PtrOff;
6558 if (!isLittleEndian) {
6559 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6560 PtrOff.getValueType());
6561 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6562 }
6563 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6564 CallSeqStart,
6565 Flags, DAG, dl);
6566 ArgOffset += PtrByteSize;
6567 continue;
6568 }
6569 // Copy the object to parameter save area if it can not be entirely passed
6570 // by registers.
6571 // FIXME: we only need to copy the parts which need to be passed in
6572 // parameter save area. For the parts passed by registers, we don't need
6573 // to copy them to the stack although we need to allocate space for them
6574 // in parameter save area.
6575 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6576 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6577 CallSeqStart,
6578 Flags, DAG, dl);
6579
6580 // When a register is available, pass a small aggregate right-justified.
6581 if (Size < 8 && GPR_idx != NumGPRs) {
6582 // The easiest way to get this right-justified in a register
6583 // is to copy the structure into the rightmost portion of a
6584 // local variable slot, then load the whole slot into the
6585 // register.
6586 // FIXME: The memcpy seems to produce pretty awful code for
6587 // small aggregates, particularly for packed ones.
6588 // FIXME: It would be preferable to use the slot in the
6589 // parameter save area instead of a new local variable.
6590 SDValue AddPtr = PtrOff;
6591 if (!isLittleEndian) {
6592 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6593 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6594 }
6595 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6596 CallSeqStart,
6597 Flags, DAG, dl);
6598
6599 // Load the slot into the register.
6600 SDValue Load =
6601 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6602 MemOpChains.push_back(Load.getValue(1));
6603 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6604
6605 // Done with this argument.
6606 ArgOffset += PtrByteSize;
6607 continue;
6608 }
6609
6610 // For aggregates larger than PtrByteSize, copy the pieces of the
6611 // object that fit into registers from the parameter save area.
6612 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6613 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6614 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6615 if (GPR_idx != NumGPRs) {
6616 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6617 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6618 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6619 MachinePointerInfo(), ObjType);
6620
6621 MemOpChains.push_back(Load.getValue(1));
6622 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6623 ArgOffset += PtrByteSize;
6624 } else {
6625 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6626 break;
6627 }
6628 }
6629 continue;
6630 }
6631
6632 switch (Arg.getSimpleValueType().SimpleTy) {
6633 default: llvm_unreachable("Unexpected ValueType for argument!");
6634 case MVT::i1:
6635 case MVT::i32:
6636 case MVT::i64:
6637 if (Flags.isNest()) {
6638 // The 'nest' parameter, if any, is passed in R11.
6639 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6640 break;
6641 }
6642
6643 // These can be scalar arguments or elements of an integer array type
6644 // passed directly. Clang may use those instead of "byval" aggregate
6645 // types to avoid forcing arguments to memory unnecessarily.
6646 if (GPR_idx != NumGPRs) {
6647 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6648 } else {
6649 if (IsFastCall)
6650 ComputePtrOff();
6651
6652 assert(HasParameterArea &&
6653 "Parameter area must exist to pass an argument in memory.");
6654 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6655 true, CFlags.IsTailCall, false, MemOpChains,
6656 TailCallArguments, dl);
6657 if (IsFastCall)
6658 ArgOffset += PtrByteSize;
6659 }
6660 if (!IsFastCall)
6661 ArgOffset += PtrByteSize;
6662 break;
6663 case MVT::f32:
6664 case MVT::f64: {
6665 // These can be scalar arguments or elements of a float array type
6666 // passed directly. The latter are used to implement ELFv2 homogenous
6667 // float aggregates.
6668
6669 // Named arguments go into FPRs first, and once they overflow, the
6670 // remaining arguments go into GPRs and then the parameter save area.
6671 // Unnamed arguments for vararg functions always go to GPRs and
6672 // then the parameter save area. For now, put all arguments to vararg
6673 // routines always in both locations (FPR *and* GPR or stack slot).
6674 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6675 bool NeededLoad = false;
6676
6677 // First load the argument into the next available FPR.
6678 if (FPR_idx != NumFPRs)
6679 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6680
6681 // Next, load the argument into GPR or stack slot if needed.
6682 if (!NeedGPROrStack)
6683 ;
6684 else if (GPR_idx != NumGPRs && !IsFastCall) {
6685 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6686 // once we support fp <-> gpr moves.
6687
6688 // In the non-vararg case, this can only ever happen in the
6689 // presence of f32 array types, since otherwise we never run
6690 // out of FPRs before running out of GPRs.
6691 SDValue ArgVal;
6692
6693 // Double values are always passed in a single GPR.
6694 if (Arg.getValueType() != MVT::f32) {
6695 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6696
6697 // Non-array float values are extended and passed in a GPR.
6698 } else if (!Flags.isInConsecutiveRegs()) {
6699 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6700 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6701
6702 // If we have an array of floats, we collect every odd element
6703 // together with its predecessor into one GPR.
6704 } else if (ArgOffset % PtrByteSize != 0) {
6705 SDValue Lo, Hi;
6706 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6707 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6708 if (!isLittleEndian)
6709 std::swap(Lo, Hi);
6710 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6711
6712 // The final element, if even, goes into the first half of a GPR.
6713 } else if (Flags.isInConsecutiveRegsLast()) {
6714 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6715 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6716 if (!isLittleEndian)
6717 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6718 DAG.getConstant(32, dl, MVT::i32));
6719
6720 // Non-final even elements are skipped; they will be handled
6721 // together the with subsequent argument on the next go-around.
6722 } else
6723 ArgVal = SDValue();
6724
6725 if (ArgVal.getNode())
6726 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6727 } else {
6728 if (IsFastCall)
6729 ComputePtrOff();
6730
6731 // Single-precision floating-point values are mapped to the
6732 // second (rightmost) word of the stack doubleword.
6733 if (Arg.getValueType() == MVT::f32 &&
6734 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6735 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6736 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6737 }
6738
6739 assert(HasParameterArea &&
6740 "Parameter area must exist to pass an argument in memory.");
6741 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6742 true, CFlags.IsTailCall, false, MemOpChains,
6743 TailCallArguments, dl);
6744
6745 NeededLoad = true;
6746 }
6747 // When passing an array of floats, the array occupies consecutive
6748 // space in the argument area; only round up to the next doubleword
6749 // at the end of the array. Otherwise, each float takes 8 bytes.
6750 if (!IsFastCall || NeededLoad) {
6751 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6752 Flags.isInConsecutiveRegs()) ? 4 : 8;
6753 if (Flags.isInConsecutiveRegsLast())
6754 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6755 }
6756 break;
6757 }
6758 case MVT::v4f32:
6759 case MVT::v4i32:
6760 case MVT::v8i16:
6761 case MVT::v16i8:
6762 case MVT::v2f64:
6763 case MVT::v2i64:
6764 case MVT::v1i128:
6765 case MVT::f128:
6766 // These can be scalar arguments or elements of a vector array type
6767 // passed directly. The latter are used to implement ELFv2 homogenous
6768 // vector aggregates.
6769
6770 // For a varargs call, named arguments go into VRs or on the stack as
6771 // usual; unnamed arguments always go to the stack or the corresponding
6772 // GPRs when within range. For now, we always put the value in both
6773 // locations (or even all three).
6774 if (CFlags.IsVarArg) {
6775 assert(HasParameterArea &&
6776 "Parameter area must exist if we have a varargs call.");
6777 // We could elide this store in the case where the object fits
6778 // entirely in R registers. Maybe later.
6779 SDValue Store =
6780 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6781 MemOpChains.push_back(Store);
6782 if (VR_idx != NumVRs) {
6783 SDValue Load =
6784 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6785 MemOpChains.push_back(Load.getValue(1));
6786 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6787 }
6788 ArgOffset += 16;
6789 for (unsigned i=0; i<16; i+=PtrByteSize) {
6790 if (GPR_idx == NumGPRs)
6791 break;
6792 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6793 DAG.getConstant(i, dl, PtrVT));
6794 SDValue Load =
6795 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6796 MemOpChains.push_back(Load.getValue(1));
6797 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6798 }
6799 break;
6800 }
6801
6802 // Non-varargs Altivec params go into VRs or on the stack.
6803 if (VR_idx != NumVRs) {
6804 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6805 } else {
6806 if (IsFastCall)
6807 ComputePtrOff();
6808
6809 assert(HasParameterArea &&
6810 "Parameter area must exist to pass an argument in memory.");
6811 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6812 true, CFlags.IsTailCall, true, MemOpChains,
6813 TailCallArguments, dl);
6814 if (IsFastCall)
6815 ArgOffset += 16;
6816 }
6817
6818 if (!IsFastCall)
6819 ArgOffset += 16;
6820 break;
6821 }
6822 }
6823
6824 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6825 "mismatch in size of parameter area");
6826 (void)NumBytesActuallyUsed;
6827
6828 if (!MemOpChains.empty())
6829 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6830
6831 // Check if this is an indirect call (MTCTR/BCTRL).
6832 // See prepareDescriptorIndirectCall and buildCallOperands for more
6833 // information about calls through function pointers in the 64-bit SVR4 ABI.
6834 if (CFlags.IsIndirect) {
6835 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6836 // caller in the TOC save area.
6837 if (isTOCSaveRestoreRequired(Subtarget)) {
6838 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6839 // Load r2 into a virtual register and store it to the TOC save area.
6840 setUsesTOCBasePtr(DAG);
6841 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6842 // TOC save area offset.
6843 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6844 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6845 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6846 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6848 DAG.getMachineFunction(), TOCSaveOffset));
6849 }
6850 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6851 // This does not mean the MTCTR instruction must use R12; it's easier
6852 // to model this as an extra parameter, so do that.
6853 if (isELFv2ABI && !CFlags.IsPatchPoint)
6854 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6855 }
6856
6857 // Build a sequence of copy-to-reg nodes chained together with token chain
6858 // and flag operands which copy the outgoing args into the appropriate regs.
6859 SDValue InGlue;
6860 for (const auto &[Reg, N] : RegsToPass) {
6861 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6862 InGlue = Chain.getValue(1);
6863 }
6864
6865 if (CFlags.IsTailCall && !IsSibCall)
6866 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6867 TailCallArguments);
6868
6869 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6870 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6871}
6872
6873// Returns true when the shadow of a general purpose argument register
6874// in the parameter save area is aligned to at least 'RequiredAlign'.
6875static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6876 assert(RequiredAlign.value() <= 16 &&
6877 "Required alignment greater than stack alignment.");
6878 switch (Reg) {
6879 default:
6880 report_fatal_error("called on invalid register.");
6881 case PPC::R5:
6882 case PPC::R9:
6883 case PPC::X3:
6884 case PPC::X5:
6885 case PPC::X7:
6886 case PPC::X9:
6887 // These registers are 16 byte aligned which is the most strict aligment
6888 // we can support.
6889 return true;
6890 case PPC::R3:
6891 case PPC::R7:
6892 case PPC::X4:
6893 case PPC::X6:
6894 case PPC::X8:
6895 case PPC::X10:
6896 // The shadow of these registers in the PSA is 8 byte aligned.
6897 return RequiredAlign <= 8;
6898 case PPC::R4:
6899 case PPC::R6:
6900 case PPC::R8:
6901 case PPC::R10:
6902 return RequiredAlign <= 4;
6903 }
6904}
6905
6906static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6907 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6908 Type *OrigTy, CCState &State) {
6909 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6910 State.getMachineFunction().getSubtarget());
6911 const bool IsPPC64 = Subtarget.isPPC64();
6912 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6913 const Align PtrAlign(PtrSize);
6914 const Align StackAlign(16);
6915 const MVT RegVT = Subtarget.getScalarIntVT();
6916
6917 if (ValVT == MVT::f128)
6918 report_fatal_error("f128 is unimplemented on AIX.");
6919
6920 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6921 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6922 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6923 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6924 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6925 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6926
6927 static const MCPhysReg VR[] = {// Vector registers.
6928 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6929 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6930 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6931
6932 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6933
6934 if (ArgFlags.isNest()) {
6935 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6936 if (!EnvReg)
6937 report_fatal_error("More then one nest argument.");
6938 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6939 return false;
6940 }
6941
6942 if (ArgFlags.isByVal()) {
6943 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6944 if (ByValAlign > StackAlign)
6945 report_fatal_error("Pass-by-value arguments with alignment greater than "
6946 "16 are not supported.");
6947
6948 const unsigned ByValSize = ArgFlags.getByValSize();
6949 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6950
6951 // An empty aggregate parameter takes up no storage and no registers,
6952 // but needs a MemLoc for a stack slot for the formal arguments side.
6953 if (ByValSize == 0) {
6955 State.getStackSize(), RegVT, LocInfo));
6956 return false;
6957 }
6958
6959 // Shadow allocate any registers that are not properly aligned.
6960 unsigned NextReg = State.getFirstUnallocated(GPRs);
6961 while (NextReg != GPRs.size() &&
6962 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6963 // Shadow allocate next registers since its aligment is not strict enough.
6964 MCRegister Reg = State.AllocateReg(GPRs);
6965 // Allocate the stack space shadowed by said register.
6966 State.AllocateStack(PtrSize, PtrAlign);
6967 assert(Reg && "Alocating register unexpectedly failed.");
6968 (void)Reg;
6969 NextReg = State.getFirstUnallocated(GPRs);
6970 }
6971
6972 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6973 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6974 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6975 if (MCRegister Reg = State.AllocateReg(GPRs))
6976 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6977 else {
6980 LocInfo));
6981 break;
6982 }
6983 }
6984 return false;
6985 }
6986
6987 // Arguments always reserve parameter save area.
6988 switch (ValVT.SimpleTy) {
6989 default:
6990 report_fatal_error("Unhandled value type for argument.");
6991 case MVT::i64:
6992 // i64 arguments should have been split to i32 for PPC32.
6993 assert(IsPPC64 && "PPC32 should have split i64 values.");
6994 [[fallthrough]];
6995 case MVT::i1:
6996 case MVT::i32: {
6997 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6998 // AIX integer arguments are always passed in register width.
6999 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
7000 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
7002 if (MCRegister Reg = State.AllocateReg(GPRs))
7003 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7004 else
7005 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
7006
7007 return false;
7008 }
7009 case MVT::f32:
7010 case MVT::f64: {
7011 // Parameter save area (PSA) is reserved even if the float passes in fpr.
7012 const unsigned StoreSize = LocVT.getStoreSize();
7013 // Floats are always 4-byte aligned in the PSA on AIX.
7014 // This includes f64 in 64-bit mode for ABI compatibility.
7015 const unsigned Offset =
7016 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
7017 MCRegister FReg = State.AllocateReg(FPR);
7018 if (FReg)
7019 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7020
7021 // Reserve and initialize GPRs or initialize the PSA as required.
7022 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
7023 if (MCRegister Reg = State.AllocateReg(GPRs)) {
7024 assert(FReg && "An FPR should be available when a GPR is reserved.");
7025 if (State.isVarArg()) {
7026 // Successfully reserved GPRs are only initialized for vararg calls.
7027 // Custom handling is required for:
7028 // f64 in PPC32 needs to be split into 2 GPRs.
7029 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7030 State.addLoc(
7031 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7032 }
7033 } else {
7034 // If there are insufficient GPRs, the PSA needs to be initialized.
7035 // Initialization occurs even if an FPR was initialized for
7036 // compatibility with the AIX XL compiler. The full memory for the
7037 // argument will be initialized even if a prior word is saved in GPR.
7038 // A custom memLoc is used when the argument also passes in FPR so
7039 // that the callee handling can skip over it easily.
7040 State.addLoc(
7041 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7042 LocInfo)
7043 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7044 break;
7045 }
7046 }
7047
7048 return false;
7049 }
7050 case MVT::v4f32:
7051 case MVT::v4i32:
7052 case MVT::v8i16:
7053 case MVT::v16i8:
7054 case MVT::v2i64:
7055 case MVT::v2f64:
7056 case MVT::v1i128: {
7057 const unsigned VecSize = 16;
7058 const Align VecAlign(VecSize);
7059
7060 if (!State.isVarArg()) {
7061 // If there are vector registers remaining we don't consume any stack
7062 // space.
7063 if (MCRegister VReg = State.AllocateReg(VR)) {
7064 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7065 return false;
7066 }
7067 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7068 // might be allocated in the portion of the PSA that is shadowed by the
7069 // GPRs.
7070 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7071 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7072 return false;
7073 }
7074
7075 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7076 // Burn any underaligned registers and their shadowed stack space until
7077 // we reach the required alignment.
7078 while (NextRegIndex != GPRs.size() &&
7079 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7080 // Shadow allocate register and its stack shadow.
7081 MCRegister Reg = State.AllocateReg(GPRs);
7082 State.AllocateStack(PtrSize, PtrAlign);
7083 assert(Reg && "Allocating register unexpectedly failed.");
7084 (void)Reg;
7085 NextRegIndex = State.getFirstUnallocated(GPRs);
7086 }
7087
7088 // Vectors that are passed as fixed arguments are handled differently.
7089 // They are passed in VRs if any are available (unlike arguments passed
7090 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7091 // functions)
7092 if (!ArgFlags.isVarArg()) {
7093 if (MCRegister VReg = State.AllocateReg(VR)) {
7094 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7095 // Shadow allocate GPRs and stack space even though we pass in a VR.
7096 for (unsigned I = 0; I != VecSize; I += PtrSize)
7097 State.AllocateReg(GPRs);
7098 State.AllocateStack(VecSize, VecAlign);
7099 return false;
7100 }
7101 // No vector registers remain so pass on the stack.
7102 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7103 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7104 return false;
7105 }
7106
7107 // If all GPRS are consumed then we pass the argument fully on the stack.
7108 if (NextRegIndex == GPRs.size()) {
7109 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7110 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7111 return false;
7112 }
7113
7114 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7115 // half of the argument, and then need to pass the remaining half on the
7116 // stack.
7117 if (GPRs[NextRegIndex] == PPC::R9) {
7118 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7119 State.addLoc(
7120 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7121
7122 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7123 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7124 assert(FirstReg && SecondReg &&
7125 "Allocating R9 or R10 unexpectedly failed.");
7126 State.addLoc(
7127 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7128 State.addLoc(
7129 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7130 return false;
7131 }
7132
7133 // We have enough GPRs to fully pass the vector argument, and we have
7134 // already consumed any underaligned registers. Start with the custom
7135 // MemLoc and then the custom RegLocs.
7136 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7137 State.addLoc(
7138 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7139 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7140 const MCRegister Reg = State.AllocateReg(GPRs);
7141 assert(Reg && "Failed to allocated register for vararg vector argument");
7142 State.addLoc(
7143 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7144 }
7145 return false;
7146 }
7147 }
7148 return true;
7149}
7150
7151// So far, this function is only used by LowerFormalArguments_AIX()
7153 bool IsPPC64,
7154 bool HasP8Vector,
7155 bool HasVSX) {
7156 assert((IsPPC64 || SVT != MVT::i64) &&
7157 "i64 should have been split for 32-bit codegen.");
7158
7159 switch (SVT) {
7160 default:
7161 report_fatal_error("Unexpected value type for formal argument");
7162 case MVT::i1:
7163 case MVT::i32:
7164 case MVT::i64:
7165 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7166 case MVT::f32:
7167 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7168 case MVT::f64:
7169 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7170 case MVT::v4f32:
7171 case MVT::v4i32:
7172 case MVT::v8i16:
7173 case MVT::v16i8:
7174 case MVT::v2i64:
7175 case MVT::v2f64:
7176 case MVT::v1i128:
7177 return &PPC::VRRCRegClass;
7178 }
7179}
7180
7182 SelectionDAG &DAG, SDValue ArgValue,
7183 MVT LocVT, const SDLoc &dl) {
7184 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7185 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7186
7187 if (Flags.isSExt())
7188 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7189 DAG.getValueType(ValVT));
7190 else if (Flags.isZExt())
7191 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7192 DAG.getValueType(ValVT));
7193
7194 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7195}
7196
7197static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7198 const unsigned LASize = FL->getLinkageSize();
7199
7200 if (PPC::GPRCRegClass.contains(Reg)) {
7201 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7202 "Reg must be a valid argument register!");
7203 return LASize + 4 * (Reg - PPC::R3);
7204 }
7205
7206 if (PPC::G8RCRegClass.contains(Reg)) {
7207 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7208 "Reg must be a valid argument register!");
7209 return LASize + 8 * (Reg - PPC::X3);
7210 }
7211
7212 llvm_unreachable("Only general purpose registers expected.");
7213}
7214
7215// AIX ABI Stack Frame Layout:
7216//
7217// Low Memory +--------------------------------------------+
7218// SP +---> | Back chain | ---+
7219// | +--------------------------------------------+ |
7220// | | Saved Condition Register | |
7221// | +--------------------------------------------+ |
7222// | | Saved Linkage Register | |
7223// | +--------------------------------------------+ | Linkage Area
7224// | | Reserved for compilers | |
7225// | +--------------------------------------------+ |
7226// | | Reserved for binders | |
7227// | +--------------------------------------------+ |
7228// | | Saved TOC pointer | ---+
7229// | +--------------------------------------------+
7230// | | Parameter save area |
7231// | +--------------------------------------------+
7232// | | Alloca space |
7233// | +--------------------------------------------+
7234// | | Local variable space |
7235// | +--------------------------------------------+
7236// | | Float/int conversion temporary |
7237// | +--------------------------------------------+
7238// | | Save area for AltiVec registers |
7239// | +--------------------------------------------+
7240// | | AltiVec alignment padding |
7241// | +--------------------------------------------+
7242// | | Save area for VRSAVE register |
7243// | +--------------------------------------------+
7244// | | Save area for General Purpose registers |
7245// | +--------------------------------------------+
7246// | | Save area for Floating Point registers |
7247// | +--------------------------------------------+
7248// +---- | Back chain |
7249// High Memory +--------------------------------------------+
7250//
7251// Specifications:
7252// AIX 7.2 Assembler Language Reference
7253// Subroutine linkage convention
7254
7255SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7256 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7257 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7258 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7259
7260 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7261 CallConv == CallingConv::Fast) &&
7262 "Unexpected calling convention!");
7263
7264 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7265 report_fatal_error("Tail call support is unimplemented on AIX.");
7266
7267 if (useSoftFloat())
7268 report_fatal_error("Soft float support is unimplemented on AIX.");
7269
7270 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7271
7272 const bool IsPPC64 = Subtarget.isPPC64();
7273 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7274
7275 // Assign locations to all of the incoming arguments.
7277 MachineFunction &MF = DAG.getMachineFunction();
7278 MachineFrameInfo &MFI = MF.getFrameInfo();
7279 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7280 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7281
7282 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7283 // Reserve space for the linkage area on the stack.
7284 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7285 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7286 uint64_t SaveStackPos = CCInfo.getStackSize();
7287 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7288 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7289
7291
7292 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7293 CCValAssign &VA = ArgLocs[I++];
7294 MVT LocVT = VA.getLocVT();
7295 MVT ValVT = VA.getValVT();
7296 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7297
7298 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7299 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7300 // For compatibility with the AIX XL compiler, the float args in the
7301 // parameter save area are initialized even if the argument is available
7302 // in register. The caller is required to initialize both the register
7303 // and memory, however, the callee can choose to expect it in either.
7304 // The memloc is dismissed here because the argument is retrieved from
7305 // the register.
7306 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7307 continue;
7308
7309 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7310 const TargetRegisterClass *RegClass = getRegClassForSVT(
7311 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7312 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7313 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7314 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7315 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7316 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7317 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7318 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7319 MachinePointerInfo(), Align(PtrByteSize));
7320 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7321 MemOps.push_back(StoreReg);
7322 }
7323
7324 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7325 unsigned StoreSize =
7326 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7327 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7328 }
7329
7330 auto HandleMemLoc = [&]() {
7331 const unsigned LocSize = LocVT.getStoreSize();
7332 const unsigned ValSize = ValVT.getStoreSize();
7333 assert((ValSize <= LocSize) &&
7334 "Object size is larger than size of MemLoc");
7335 int CurArgOffset = VA.getLocMemOffset();
7336 // Objects are right-justified because AIX is big-endian.
7337 if (LocSize > ValSize)
7338 CurArgOffset += LocSize - ValSize;
7339 // Potential tail calls could cause overwriting of argument stack slots.
7340 const bool IsImmutable =
7342 (CallConv == CallingConv::Fast));
7343 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7344 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7345 SDValue ArgValue =
7346 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7347
7348 // While the ABI specifies the argument type is (sign or zero) extended
7349 // out to register width, not all code is compliant. We truncate and
7350 // re-extend to be more forgiving of these callers when the argument type
7351 // is smaller than register width.
7352 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7353 ValVT.isInteger() &&
7354 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7355 // It is possible to have either real integer values
7356 // or integers that were not originally integers.
7357 // In the latter case, these could have came from structs,
7358 // and these integers would not have an extend on the parameter.
7359 // Since these types of integers do not have an extend specified
7360 // in the first place, the type of extend that we do should not matter.
7361 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7362 ? MVT::i8
7363 : ArgVT;
7364 SDValue ArgValueTrunc =
7365 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7366 SDValue ArgValueExt =
7367 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7368 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7369 InVals.push_back(ArgValueExt);
7370 } else {
7371 InVals.push_back(ArgValue);
7372 }
7373 };
7374
7375 // Vector arguments to VaArg functions are passed both on the stack, and
7376 // in any available GPRs. Load the value from the stack and add the GPRs
7377 // as live ins.
7378 if (VA.isMemLoc() && VA.needsCustom()) {
7379 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7380 assert(isVarArg && "Only use custom memloc for vararg.");
7381 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7382 // matching custom RegLocs.
7383 const unsigned OriginalValNo = VA.getValNo();
7384 (void)OriginalValNo;
7385
7386 auto HandleCustomVecRegLoc = [&]() {
7387 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7388 "Missing custom RegLoc.");
7389 VA = ArgLocs[I++];
7390 assert(VA.getValVT().isVector() &&
7391 "Unexpected Val type for custom RegLoc.");
7392 assert(VA.getValNo() == OriginalValNo &&
7393 "ValNo mismatch between custom MemLoc and RegLoc.");
7395 MF.addLiveIn(VA.getLocReg(),
7396 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7397 Subtarget.hasVSX()));
7398 };
7399
7400 HandleMemLoc();
7401 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7402 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7403 // R10.
7404 HandleCustomVecRegLoc();
7405 HandleCustomVecRegLoc();
7406
7407 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7408 // we passed the vector in R5, R6, R7 and R8.
7409 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7410 assert(!IsPPC64 &&
7411 "Only 2 custom RegLocs expected for 64-bit codegen.");
7412 HandleCustomVecRegLoc();
7413 HandleCustomVecRegLoc();
7414 }
7415
7416 continue;
7417 }
7418
7419 if (VA.isRegLoc()) {
7420 if (VA.getValVT().isScalarInteger())
7422 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7423 switch (VA.getValVT().SimpleTy) {
7424 default:
7425 report_fatal_error("Unhandled value type for argument.");
7426 case MVT::f32:
7428 break;
7429 case MVT::f64:
7431 break;
7432 }
7433 } else if (VA.getValVT().isVector()) {
7434 switch (VA.getValVT().SimpleTy) {
7435 default:
7436 report_fatal_error("Unhandled value type for argument.");
7437 case MVT::v16i8:
7439 break;
7440 case MVT::v8i16:
7442 break;
7443 case MVT::v4i32:
7444 case MVT::v2i64:
7445 case MVT::v1i128:
7447 break;
7448 case MVT::v4f32:
7449 case MVT::v2f64:
7451 break;
7452 }
7453 }
7454 }
7455
7456 if (Flags.isByVal() && VA.isMemLoc()) {
7457 const unsigned Size =
7458 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7459 PtrByteSize);
7460 const int FI = MF.getFrameInfo().CreateFixedObject(
7461 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7462 /* IsAliased */ true);
7463 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7464 InVals.push_back(FIN);
7465
7466 continue;
7467 }
7468
7469 if (Flags.isByVal()) {
7470 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7471
7472 const MCPhysReg ArgReg = VA.getLocReg();
7473 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7474
7475 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7476 const int FI = MF.getFrameInfo().CreateFixedObject(
7477 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7478 /* IsAliased */ true);
7479 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7480 InVals.push_back(FIN);
7481
7482 // Add live ins for all the RegLocs for the same ByVal.
7483 const TargetRegisterClass *RegClass =
7484 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7485
7486 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7487 unsigned Offset) {
7488 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7489 // Since the callers side has left justified the aggregate in the
7490 // register, we can simply store the entire register into the stack
7491 // slot.
7492 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7493 // The store to the fixedstack object is needed becuase accessing a
7494 // field of the ByVal will use a gep and load. Ideally we will optimize
7495 // to extracting the value from the register directly, and elide the
7496 // stores when the arguments address is not taken, but that will need to
7497 // be future work.
7498 SDValue Store = DAG.getStore(
7499 CopyFrom.getValue(1), dl, CopyFrom,
7502
7503 MemOps.push_back(Store);
7504 };
7505
7506 unsigned Offset = 0;
7507 HandleRegLoc(VA.getLocReg(), Offset);
7508 Offset += PtrByteSize;
7509 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7510 Offset += PtrByteSize) {
7511 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7512 "RegLocs should be for ByVal argument.");
7513
7514 const CCValAssign RL = ArgLocs[I++];
7515 HandleRegLoc(RL.getLocReg(), Offset);
7517 }
7518
7519 if (Offset != StackSize) {
7520 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7521 "Expected MemLoc for remaining bytes.");
7522 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7523 // Consume the MemLoc.The InVal has already been emitted, so nothing
7524 // more needs to be done.
7525 ++I;
7526 }
7527
7528 continue;
7529 }
7530
7531 if (VA.isRegLoc() && !VA.needsCustom()) {
7532 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7533 Register VReg =
7534 MF.addLiveIn(VA.getLocReg(),
7535 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7536 Subtarget.hasVSX()));
7537 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7538 if (ValVT.isScalarInteger() &&
7539 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7540 ArgValue =
7541 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7542 }
7543 InVals.push_back(ArgValue);
7544 continue;
7545 }
7546 if (VA.isMemLoc()) {
7547 HandleMemLoc();
7548 continue;
7549 }
7550 }
7551
7552 // On AIX a minimum of 8 words is saved to the parameter save area.
7553 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7554 // Area that is at least reserved in the caller of this function.
7555 unsigned CallerReservedArea = std::max<unsigned>(
7556 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7557
7558 // Set the size that is at least reserved in caller of this function. Tail
7559 // call optimized function's reserved stack space needs to be aligned so
7560 // that taking the difference between two stack areas will result in an
7561 // aligned stack.
7562 CallerReservedArea =
7563 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7564 FuncInfo->setMinReservedArea(CallerReservedArea);
7565
7566 if (isVarArg) {
7567 FuncInfo->setVarArgsFrameIndex(
7568 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7569 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7570
7571 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7572 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7573
7574 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7575 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7576 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7577
7578 // The fixed integer arguments of a variadic function are stored to the
7579 // VarArgsFrameIndex on the stack so that they may be loaded by
7580 // dereferencing the result of va_next.
7581 for (unsigned GPRIndex =
7582 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7583 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7584
7585 const Register VReg =
7586 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7587 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7588
7589 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7590 SDValue Store =
7591 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7592 MemOps.push_back(Store);
7593 // Increment the address for the next argument to store.
7594 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7595 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7596 }
7597 }
7598
7599 if (!MemOps.empty())
7600 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7601
7602 return Chain;
7603}
7604
7605SDValue PPCTargetLowering::LowerCall_AIX(
7606 SDValue Chain, SDValue Callee, CallFlags CFlags,
7608 const SmallVectorImpl<SDValue> &OutVals,
7609 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7611 const CallBase *CB) const {
7612 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7613 // AIX ABI stack frame layout.
7614
7615 assert((CFlags.CallConv == CallingConv::C ||
7616 CFlags.CallConv == CallingConv::Cold ||
7617 CFlags.CallConv == CallingConv::Fast) &&
7618 "Unexpected calling convention!");
7619
7620 if (CFlags.IsPatchPoint)
7621 report_fatal_error("This call type is unimplemented on AIX.");
7622
7623 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7624
7625 MachineFunction &MF = DAG.getMachineFunction();
7627 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7628 *DAG.getContext());
7629
7630 // Reserve space for the linkage save area (LSA) on the stack.
7631 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7632 // [SP][CR][LR][2 x reserved][TOC].
7633 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7634 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7635 const bool IsPPC64 = Subtarget.isPPC64();
7636 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7637 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7638 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7639 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7640
7641 // The prolog code of the callee may store up to 8 GPR argument registers to
7642 // the stack, allowing va_start to index over them in memory if the callee
7643 // is variadic.
7644 // Because we cannot tell if this is needed on the caller side, we have to
7645 // conservatively assume that it is needed. As such, make sure we have at
7646 // least enough stack space for the caller to store the 8 GPRs.
7647 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7648 const unsigned NumBytes = std::max<unsigned>(
7649 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7650
7651 // Adjust the stack pointer for the new arguments...
7652 // These operations are automatically eliminated by the prolog/epilog pass.
7653 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7654 SDValue CallSeqStart = Chain;
7655
7657 SmallVector<SDValue, 8> MemOpChains;
7658
7659 // Set up a copy of the stack pointer for loading and storing any
7660 // arguments that may not fit in the registers available for argument
7661 // passing.
7662 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7663 : DAG.getRegister(PPC::R1, MVT::i32);
7664
7665 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7666 const unsigned ValNo = ArgLocs[I].getValNo();
7667 SDValue Arg = OutVals[ValNo];
7668 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7669
7670 if (Flags.isByVal()) {
7671 const unsigned ByValSize = Flags.getByValSize();
7672
7673 // Nothing to do for zero-sized ByVals on the caller side.
7674 if (!ByValSize) {
7675 ++I;
7676 continue;
7677 }
7678
7679 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7680 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7681 (LoadOffset != 0)
7682 ? DAG.getObjectPtrOffset(
7683 dl, Arg, TypeSize::getFixed(LoadOffset))
7684 : Arg,
7685 MachinePointerInfo(), VT);
7686 };
7687
7688 unsigned LoadOffset = 0;
7689
7690 // Initialize registers, which are fully occupied by the by-val argument.
7691 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7692 SDValue Load = GetLoad(PtrVT, LoadOffset);
7693 MemOpChains.push_back(Load.getValue(1));
7694 LoadOffset += PtrByteSize;
7695 const CCValAssign &ByValVA = ArgLocs[I++];
7696 assert(ByValVA.getValNo() == ValNo &&
7697 "Unexpected location for pass-by-value argument.");
7698 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7699 }
7700
7701 if (LoadOffset == ByValSize)
7702 continue;
7703
7704 // There must be one more loc to handle the remainder.
7705 assert(ArgLocs[I].getValNo() == ValNo &&
7706 "Expected additional location for by-value argument.");
7707
7708 if (ArgLocs[I].isMemLoc()) {
7709 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7710 const CCValAssign &ByValVA = ArgLocs[I++];
7711 ISD::ArgFlagsTy MemcpyFlags = Flags;
7712 // Only memcpy the bytes that don't pass in register.
7713 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7714 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7715 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7716 dl, Arg, TypeSize::getFixed(LoadOffset))
7717 : Arg,
7719 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7720 CallSeqStart, MemcpyFlags, DAG, dl);
7721 continue;
7722 }
7723
7724 // Initialize the final register residue.
7725 // Any residue that occupies the final by-val arg register must be
7726 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7727 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7728 // 2 and 1 byte loads.
7729 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7730 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7731 "Unexpected register residue for by-value argument.");
7732 SDValue ResidueVal;
7733 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7734 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7735 const MVT VT =
7736 N == 1 ? MVT::i8
7737 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7738 SDValue Load = GetLoad(VT, LoadOffset);
7739 MemOpChains.push_back(Load.getValue(1));
7740 LoadOffset += N;
7741 Bytes += N;
7742
7743 // By-val arguments are passed left-justfied in register.
7744 // Every load here needs to be shifted, otherwise a full register load
7745 // should have been used.
7746 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7747 "Unexpected load emitted during handling of pass-by-value "
7748 "argument.");
7749 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7750 EVT ShiftAmountTy =
7751 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7752 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7753 SDValue ShiftedLoad =
7754 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7755 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7756 ShiftedLoad)
7757 : ShiftedLoad;
7758 }
7759
7760 const CCValAssign &ByValVA = ArgLocs[I++];
7761 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7762 continue;
7763 }
7764
7765 CCValAssign &VA = ArgLocs[I++];
7766 const MVT LocVT = VA.getLocVT();
7767 const MVT ValVT = VA.getValVT();
7768
7769 switch (VA.getLocInfo()) {
7770 default:
7771 report_fatal_error("Unexpected argument extension type.");
7772 case CCValAssign::Full:
7773 break;
7774 case CCValAssign::ZExt:
7775 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7776 break;
7777 case CCValAssign::SExt:
7778 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7779 break;
7780 }
7781
7782 if (VA.isRegLoc() && !VA.needsCustom()) {
7783 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7784 continue;
7785 }
7786
7787 // Vector arguments passed to VarArg functions need custom handling when
7788 // they are passed (at least partially) in GPRs.
7789 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7790 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7791 // Store value to its stack slot.
7792 SDValue PtrOff =
7793 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7794 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7795 SDValue Store =
7796 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7797 MemOpChains.push_back(Store);
7798 const unsigned OriginalValNo = VA.getValNo();
7799 // Then load the GPRs from the stack
7800 unsigned LoadOffset = 0;
7801 auto HandleCustomVecRegLoc = [&]() {
7802 assert(I != E && "Unexpected end of CCvalAssigns.");
7803 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7804 "Expected custom RegLoc.");
7805 CCValAssign RegVA = ArgLocs[I++];
7806 assert(RegVA.getValNo() == OriginalValNo &&
7807 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7808 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7809 DAG.getConstant(LoadOffset, dl, PtrVT));
7810 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7811 MemOpChains.push_back(Load.getValue(1));
7812 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7813 LoadOffset += PtrByteSize;
7814 };
7815
7816 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7817 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7818 // R10.
7819 HandleCustomVecRegLoc();
7820 HandleCustomVecRegLoc();
7821
7822 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7823 ArgLocs[I].getValNo() == OriginalValNo) {
7824 assert(!IsPPC64 &&
7825 "Only 2 custom RegLocs expected for 64-bit codegen.");
7826 HandleCustomVecRegLoc();
7827 HandleCustomVecRegLoc();
7828 }
7829
7830 continue;
7831 }
7832
7833 if (VA.isMemLoc()) {
7834 SDValue PtrOff =
7835 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7836 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7837 MemOpChains.push_back(
7838 DAG.getStore(Chain, dl, Arg, PtrOff,
7840 Subtarget.getFrameLowering()->getStackAlign()));
7841
7842 continue;
7843 }
7844
7845 if (!ValVT.isFloatingPoint())
7847 "Unexpected register handling for calling convention.");
7848
7849 // Custom handling is used for GPR initializations for vararg float
7850 // arguments.
7851 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7852 LocVT.isInteger() &&
7853 "Custom register handling only expected for VarArg.");
7854
7855 SDValue ArgAsInt =
7856 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7857
7858 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7859 // f32 in 32-bit GPR
7860 // f64 in 64-bit GPR
7861 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7862 else if (Arg.getValueType().getFixedSizeInBits() <
7863 LocVT.getFixedSizeInBits())
7864 // f32 in 64-bit GPR.
7865 RegsToPass.push_back(std::make_pair(
7866 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7867 else {
7868 // f64 in two 32-bit GPRs
7869 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7870 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7871 "Unexpected custom register for argument!");
7872 CCValAssign &GPR1 = VA;
7873 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7874 DAG.getConstant(32, dl, MVT::i8));
7875 RegsToPass.push_back(std::make_pair(
7876 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7877
7878 if (I != E) {
7879 // If only 1 GPR was available, there will only be one custom GPR and
7880 // the argument will also pass in memory.
7881 CCValAssign &PeekArg = ArgLocs[I];
7882 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7883 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7884 CCValAssign &GPR2 = ArgLocs[I++];
7885 RegsToPass.push_back(std::make_pair(
7886 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7887 }
7888 }
7889 }
7890 }
7891
7892 if (!MemOpChains.empty())
7893 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7894
7895 // For indirect calls, we need to save the TOC base to the stack for
7896 // restoration after the call.
7897 if (CFlags.IsIndirect) {
7898 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7899 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7900 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7901 const MVT PtrVT = Subtarget.getScalarIntVT();
7902 const unsigned TOCSaveOffset =
7903 Subtarget.getFrameLowering()->getTOCSaveOffset();
7904
7905 setUsesTOCBasePtr(DAG);
7906 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7907 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7908 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7909 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7910 Chain = DAG.getStore(
7911 Val.getValue(1), dl, Val, AddPtr,
7912 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7913 }
7914
7915 // Build a sequence of copy-to-reg nodes chained together with token chain
7916 // and flag operands which copy the outgoing args into the appropriate regs.
7917 SDValue InGlue;
7918 for (auto Reg : RegsToPass) {
7919 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7920 InGlue = Chain.getValue(1);
7921 }
7922
7923 const int SPDiff = 0;
7924 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7925 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7926}
7927
7928bool
7929PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7930 MachineFunction &MF, bool isVarArg,
7933 const Type *RetTy) const {
7935 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7936 return CCInfo.CheckReturn(
7937 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7939 : RetCC_PPC);
7940}
7941
7942SDValue
7943PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7944 bool isVarArg,
7946 const SmallVectorImpl<SDValue> &OutVals,
7947 const SDLoc &dl, SelectionDAG &DAG) const {
7949 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7950 *DAG.getContext());
7951 CCInfo.AnalyzeReturn(Outs,
7952 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7954 : RetCC_PPC);
7955
7956 SDValue Glue;
7957 SmallVector<SDValue, 4> RetOps(1, Chain);
7958
7959 // Copy the result values into the output registers.
7960 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7961 CCValAssign &VA = RVLocs[i];
7962 assert(VA.isRegLoc() && "Can only return in registers!");
7963
7964 SDValue Arg = OutVals[RealResIdx];
7965
7966 switch (VA.getLocInfo()) {
7967 default: llvm_unreachable("Unknown loc info!");
7968 case CCValAssign::Full: break;
7969 case CCValAssign::AExt:
7970 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7971 break;
7972 case CCValAssign::ZExt:
7973 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7974 break;
7975 case CCValAssign::SExt:
7976 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7977 break;
7978 }
7979 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7980 bool isLittleEndian = Subtarget.isLittleEndian();
7981 // Legalize ret f64 -> ret 2 x i32.
7982 SDValue SVal =
7983 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7984 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7985 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7986 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7987 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7988 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7989 Glue = Chain.getValue(1);
7990 VA = RVLocs[++i]; // skip ahead to next loc
7991 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7992 } else
7993 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7994 Glue = Chain.getValue(1);
7995 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7996 }
7997
7998 RetOps[0] = Chain; // Update chain.
7999
8000 // Add the glue if we have it.
8001 if (Glue.getNode())
8002 RetOps.push_back(Glue);
8003
8004 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
8005}
8006
8007SDValue
8008PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
8009 SelectionDAG &DAG) const {
8010 SDLoc dl(Op);
8011
8012 // Get the correct type for integers.
8013 EVT IntVT = Op.getValueType();
8014
8015 // Get the inputs.
8016 SDValue Chain = Op.getOperand(0);
8017 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8018 // Build a DYNAREAOFFSET node.
8019 SDValue Ops[2] = {Chain, FPSIdx};
8020 SDVTList VTs = DAG.getVTList(IntVT);
8021 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
8022}
8023
8024SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
8025 SelectionDAG &DAG) const {
8026 // When we pop the dynamic allocation we need to restore the SP link.
8027 SDLoc dl(Op);
8028
8029 // Get the correct type for pointers.
8030 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8031
8032 // Construct the stack pointer operand.
8033 bool isPPC64 = Subtarget.isPPC64();
8034 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
8035 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
8036
8037 // Get the operands for the STACKRESTORE.
8038 SDValue Chain = Op.getOperand(0);
8039 SDValue SaveSP = Op.getOperand(1);
8040
8041 // Load the old link SP.
8042 SDValue LoadLinkSP =
8043 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
8044
8045 // Restore the stack pointer.
8046 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
8047
8048 // Store the old link SP.
8049 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
8050}
8051
8052SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
8053 MachineFunction &MF = DAG.getMachineFunction();
8054 bool isPPC64 = Subtarget.isPPC64();
8055 EVT PtrVT = getPointerTy(MF.getDataLayout());
8056
8057 // Get current frame pointer save index. The users of this index will be
8058 // primarily DYNALLOC instructions.
8059 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8060 int RASI = FI->getReturnAddrSaveIndex();
8061
8062 // If the frame pointer save index hasn't been defined yet.
8063 if (!RASI) {
8064 // Find out what the fix offset of the frame pointer save area.
8065 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8066 // Allocate the frame index for frame pointer save area.
8067 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8068 // Save the result.
8069 FI->setReturnAddrSaveIndex(RASI);
8070 }
8071 return DAG.getFrameIndex(RASI, PtrVT);
8072}
8073
8074SDValue
8075PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8076 MachineFunction &MF = DAG.getMachineFunction();
8077 bool isPPC64 = Subtarget.isPPC64();
8078 EVT PtrVT = getPointerTy(MF.getDataLayout());
8079
8080 // Get current frame pointer save index. The users of this index will be
8081 // primarily DYNALLOC instructions.
8082 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8083 int FPSI = FI->getFramePointerSaveIndex();
8084
8085 // If the frame pointer save index hasn't been defined yet.
8086 if (!FPSI) {
8087 // Find out what the fix offset of the frame pointer save area.
8088 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8089 // Allocate the frame index for frame pointer save area.
8090 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8091 // Save the result.
8092 FI->setFramePointerSaveIndex(FPSI);
8093 }
8094 return DAG.getFrameIndex(FPSI, PtrVT);
8095}
8096
8097SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8098 SelectionDAG &DAG) const {
8099 MachineFunction &MF = DAG.getMachineFunction();
8100 // Get the inputs.
8101 SDValue Chain = Op.getOperand(0);
8102 SDValue Size = Op.getOperand(1);
8103 SDLoc dl(Op);
8104
8105 // Get the correct type for pointers.
8106 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8107 // Negate the size.
8108 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8109 DAG.getConstant(0, dl, PtrVT), Size);
8110 // Construct a node for the frame pointer save index.
8111 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8112 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8113 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8114 if (hasInlineStackProbe(MF))
8115 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8116 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8117}
8118
8119SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8120 SelectionDAG &DAG) const {
8121 MachineFunction &MF = DAG.getMachineFunction();
8122
8123 bool isPPC64 = Subtarget.isPPC64();
8124 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8125
8126 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8127 return DAG.getFrameIndex(FI, PtrVT);
8128}
8129
8130SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8131 SelectionDAG &DAG) const {
8132 SDLoc DL(Op);
8133 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8134 DAG.getVTList(MVT::i32, MVT::Other),
8135 Op.getOperand(0), Op.getOperand(1));
8136}
8137
8138SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8139 SelectionDAG &DAG) const {
8140 SDLoc DL(Op);
8141 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8142 Op.getOperand(0), Op.getOperand(1));
8143}
8144
8145SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8146 if (Op.getValueType().isVector())
8147 return LowerVectorLoad(Op, DAG);
8148
8149 assert(Op.getValueType() == MVT::i1 &&
8150 "Custom lowering only for i1 loads");
8151
8152 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8153
8154 SDLoc dl(Op);
8155 LoadSDNode *LD = cast<LoadSDNode>(Op);
8156
8157 SDValue Chain = LD->getChain();
8158 SDValue BasePtr = LD->getBasePtr();
8159 MachineMemOperand *MMO = LD->getMemOperand();
8160
8161 SDValue NewLD =
8162 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8163 BasePtr, MVT::i8, MMO);
8164 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8165
8166 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8167 return DAG.getMergeValues(Ops, dl);
8168}
8169
8170SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8171 if (Op.getOperand(1).getValueType().isVector())
8172 return LowerVectorStore(Op, DAG);
8173
8174 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8175 "Custom lowering only for i1 stores");
8176
8177 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8178
8179 SDLoc dl(Op);
8180 StoreSDNode *ST = cast<StoreSDNode>(Op);
8181
8182 SDValue Chain = ST->getChain();
8183 SDValue BasePtr = ST->getBasePtr();
8184 SDValue Value = ST->getValue();
8185 MachineMemOperand *MMO = ST->getMemOperand();
8186
8188 Value);
8189 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8190}
8191
8192// FIXME: Remove this once the ANDI glue bug is fixed:
8193SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8194 assert(Op.getValueType() == MVT::i1 &&
8195 "Custom lowering only for i1 results");
8196
8197 SDLoc DL(Op);
8198 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8199}
8200
8201SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8202 SelectionDAG &DAG) const {
8203
8204 // Implements a vector truncate that fits in a vector register as a shuffle.
8205 // We want to legalize vector truncates down to where the source fits in
8206 // a vector register (and target is therefore smaller than vector register
8207 // size). At that point legalization will try to custom lower the sub-legal
8208 // result and get here - where we can contain the truncate as a single target
8209 // operation.
8210
8211 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8212 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8213 //
8214 // We will implement it for big-endian ordering as this (where x denotes
8215 // undefined):
8216 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8217 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8218 //
8219 // The same operation in little-endian ordering will be:
8220 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8221 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8222
8223 EVT TrgVT = Op.getValueType();
8224 assert(TrgVT.isVector() && "Vector type expected.");
8225 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8226 EVT EltVT = TrgVT.getVectorElementType();
8227 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8228 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8230 return SDValue();
8231
8232 SDValue N1 = Op.getOperand(0);
8233 EVT SrcVT = N1.getValueType();
8234 unsigned SrcSize = SrcVT.getSizeInBits();
8235 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8238 return SDValue();
8239 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8240 return SDValue();
8241
8242 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8243 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8244
8245 SDLoc DL(Op);
8246 SDValue Op1, Op2;
8247 if (SrcSize == 256) {
8248 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8249 EVT SplitVT =
8251 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8252 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8253 DAG.getConstant(0, DL, VecIdxTy));
8254 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8255 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8256 }
8257 else {
8258 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8259 Op2 = DAG.getUNDEF(WideVT);
8260 }
8261
8262 // First list the elements we want to keep.
8263 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8264 SmallVector<int, 16> ShuffV;
8265 if (Subtarget.isLittleEndian())
8266 for (unsigned i = 0; i < TrgNumElts; ++i)
8267 ShuffV.push_back(i * SizeMult);
8268 else
8269 for (unsigned i = 1; i <= TrgNumElts; ++i)
8270 ShuffV.push_back(i * SizeMult - 1);
8271
8272 // Populate the remaining elements with undefs.
8273 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8274 // ShuffV.push_back(i + WideNumElts);
8275 ShuffV.push_back(WideNumElts + 1);
8276
8277 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8278 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8279 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8280}
8281
8282/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8283/// possible.
8284SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8285 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8286 EVT ResVT = Op.getValueType();
8287 EVT CmpVT = Op.getOperand(0).getValueType();
8288 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8289 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8290 SDLoc dl(Op);
8291
8292 // Without power9-vector, we don't have native instruction for f128 comparison.
8293 // Following transformation to libcall is needed for setcc:
8294 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8295 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8296 SDValue Z = DAG.getSetCC(
8297 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8298 LHS, RHS, CC);
8299 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8300 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8301 }
8302
8303 // Not FP, or using SPE? Not a fsel.
8304 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8305 Subtarget.hasSPE())
8306 return Op;
8307
8308 SDNodeFlags Flags = Op.getNode()->getFlags();
8309
8310 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8311 // presence of infinities.
8312 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8313 switch (CC) {
8314 default:
8315 break;
8316 case ISD::SETOGT:
8317 case ISD::SETGT:
8318 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8319 case ISD::SETOLT:
8320 case ISD::SETLT:
8321 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8322 }
8323 }
8324
8325 // We might be able to do better than this under some circumstances, but in
8326 // general, fsel-based lowering of select is a finite-math-only optimization.
8327 // For more information, see section F.3 of the 2.06 ISA specification.
8328 // With ISA 3.0
8329 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8330 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8331 ResVT == MVT::f128)
8332 return Op;
8333
8334 // If the RHS of the comparison is a 0.0, we don't need to do the
8335 // subtraction at all.
8336 SDValue Sel1;
8338 switch (CC) {
8339 default: break; // SETUO etc aren't handled by fsel.
8340 case ISD::SETNE:
8341 std::swap(TV, FV);
8342 [[fallthrough]];
8343 case ISD::SETEQ:
8344 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8345 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8346 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8347 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8348 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8349 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8350 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8351 case ISD::SETULT:
8352 case ISD::SETLT:
8353 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8354 [[fallthrough]];
8355 case ISD::SETOGE:
8356 case ISD::SETGE:
8357 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8358 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8359 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8360 case ISD::SETUGT:
8361 case ISD::SETGT:
8362 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8363 [[fallthrough]];
8364 case ISD::SETOLE:
8365 case ISD::SETLE:
8366 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8367 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8368 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8369 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8370 }
8371
8372 SDValue Cmp;
8373 switch (CC) {
8374 default: break; // SETUO etc aren't handled by fsel.
8375 case ISD::SETNE:
8376 std::swap(TV, FV);
8377 [[fallthrough]];
8378 case ISD::SETEQ:
8379 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8380 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8381 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8382 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8383 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8384 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8385 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8386 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8387 case ISD::SETULT:
8388 case ISD::SETLT:
8389 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8390 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8391 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8392 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8393 case ISD::SETOGE:
8394 case ISD::SETGE:
8395 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8396 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8397 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8398 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8399 case ISD::SETUGT:
8400 case ISD::SETGT:
8401 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8402 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8403 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8404 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8405 case ISD::SETOLE:
8406 case ISD::SETLE:
8407 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8408 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8409 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8410 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8411 }
8412 return Op;
8413}
8414
8415static unsigned getPPCStrictOpcode(unsigned Opc) {
8416 switch (Opc) {
8417 default:
8418 llvm_unreachable("No strict version of this opcode!");
8419 case PPCISD::FCTIDZ:
8420 return PPCISD::STRICT_FCTIDZ;
8421 case PPCISD::FCTIWZ:
8422 return PPCISD::STRICT_FCTIWZ;
8423 case PPCISD::FCTIDUZ:
8425 case PPCISD::FCTIWUZ:
8427 case PPCISD::FCFID:
8428 return PPCISD::STRICT_FCFID;
8429 case PPCISD::FCFIDU:
8430 return PPCISD::STRICT_FCFIDU;
8431 case PPCISD::FCFIDS:
8432 return PPCISD::STRICT_FCFIDS;
8433 case PPCISD::FCFIDUS:
8435 }
8436}
8437
8439 const PPCSubtarget &Subtarget) {
8440 SDLoc dl(Op);
8441 bool IsStrict = Op->isStrictFPOpcode();
8442 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8443 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8444
8445 // TODO: Any other flags to propagate?
8446 SDNodeFlags Flags;
8447 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8448
8449 // For strict nodes, source is the second operand.
8450 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8451 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8452 MVT DestTy = Op.getSimpleValueType();
8453 assert(Src.getValueType().isFloatingPoint() &&
8454 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8455 DestTy == MVT::i64) &&
8456 "Invalid FP_TO_INT types");
8457 if (Src.getValueType() == MVT::f32) {
8458 if (IsStrict) {
8459 Src =
8461 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8462 Chain = Src.getValue(1);
8463 } else
8464 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8465 }
8466 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8467 DestTy = Subtarget.getScalarIntVT();
8468 unsigned Opc = ISD::DELETED_NODE;
8469 switch (DestTy.SimpleTy) {
8470 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8471 case MVT::i32:
8472 Opc = IsSigned ? PPCISD::FCTIWZ
8473 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8474 break;
8475 case MVT::i64:
8476 assert((IsSigned || Subtarget.hasFPCVT()) &&
8477 "i64 FP_TO_UINT is supported only with FPCVT");
8478 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8479 }
8480 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8481 SDValue Conv;
8482 if (IsStrict) {
8484 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8485 Flags);
8486 } else {
8487 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8488 }
8489 return Conv;
8490}
8491
8492void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8493 SelectionDAG &DAG,
8494 const SDLoc &dl) const {
8495 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8496 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8497 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8498 bool IsStrict = Op->isStrictFPOpcode();
8499
8500 // Convert the FP value to an int value through memory.
8501 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8502 (IsSigned || Subtarget.hasFPCVT());
8503 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8504 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8505 MachinePointerInfo MPI =
8507
8508 // Emit a store to the stack slot.
8509 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8510 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8511 if (i32Stack) {
8512 MachineFunction &MF = DAG.getMachineFunction();
8513 Alignment = Align(4);
8514 MachineMemOperand *MMO =
8515 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8516 SDValue Ops[] = { Chain, Tmp, FIPtr };
8517 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8518 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8519 } else
8520 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8521
8522 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8523 // add in a bias on big endian.
8524 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8525 !Subtarget.isLittleEndian()) {
8526 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8527 DAG.getConstant(4, dl, FIPtr.getValueType()));
8528 MPI = MPI.getWithOffset(4);
8529 }
8530
8531 RLI.Chain = Chain;
8532 RLI.Ptr = FIPtr;
8533 RLI.MPI = MPI;
8534 RLI.Alignment = Alignment;
8535}
8536
8537/// Custom lowers floating point to integer conversions to use
8538/// the direct move instructions available in ISA 2.07 to avoid the
8539/// need for load/store combinations.
8540SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8541 SelectionDAG &DAG,
8542 const SDLoc &dl) const {
8543 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8544 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8545 if (Op->isStrictFPOpcode())
8546 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8547 else
8548 return Mov;
8549}
8550
8551SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8552 const SDLoc &dl) const {
8553 bool IsStrict = Op->isStrictFPOpcode();
8554 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8555 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8556 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8557 EVT SrcVT = Src.getValueType();
8558 EVT DstVT = Op.getValueType();
8559
8560 // FP to INT conversions are legal for f128.
8561 if (SrcVT == MVT::f128)
8562 return Subtarget.hasP9Vector() ? Op : SDValue();
8563
8564 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8565 // PPC (the libcall is not available).
8566 if (SrcVT == MVT::ppcf128) {
8567 if (DstVT == MVT::i32) {
8568 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8569 // set other fast-math flags to FP operations in both strict and
8570 // non-strict cases. (FP_TO_SINT, FSUB)
8571 SDNodeFlags Flags;
8572 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8573
8574 if (IsSigned) {
8575 SDValue Lo, Hi;
8576 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8577
8578 // Add the two halves of the long double in round-to-zero mode, and use
8579 // a smaller FP_TO_SINT.
8580 if (IsStrict) {
8582 DAG.getVTList(MVT::f64, MVT::Other),
8583 {Op.getOperand(0), Lo, Hi}, Flags);
8584 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8585 DAG.getVTList(MVT::i32, MVT::Other),
8586 {Res.getValue(1), Res}, Flags);
8587 } else {
8588 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8589 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8590 }
8591 } else {
8592 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8593 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8594 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8595 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8596 if (IsStrict) {
8597 // Sel = Src < 0x80000000
8598 // FltOfs = select Sel, 0.0, 0x80000000
8599 // IntOfs = select Sel, 0, 0x80000000
8600 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8601 SDValue Chain = Op.getOperand(0);
8602 EVT SetCCVT =
8603 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8604 EVT DstSetCCVT =
8605 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8606 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8607 Chain, true);
8608 Chain = Sel.getValue(1);
8609
8610 SDValue FltOfs = DAG.getSelect(
8611 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8612 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8613
8614 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8615 DAG.getVTList(SrcVT, MVT::Other),
8616 {Chain, Src, FltOfs}, Flags);
8617 Chain = Val.getValue(1);
8618 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8619 DAG.getVTList(DstVT, MVT::Other),
8620 {Chain, Val}, Flags);
8621 Chain = SInt.getValue(1);
8622 SDValue IntOfs = DAG.getSelect(
8623 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8624 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8625 return DAG.getMergeValues({Result, Chain}, dl);
8626 } else {
8627 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8628 // FIXME: generated code sucks.
8629 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8630 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8631 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8632 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8633 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8634 }
8635 }
8636 }
8637
8638 return SDValue();
8639 }
8640
8641 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8642 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8643
8644 ReuseLoadInfo RLI;
8645 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8646
8647 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8648 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8649}
8650
8651// We're trying to insert a regular store, S, and then a load, L. If the
8652// incoming value, O, is a load, we might just be able to have our load use the
8653// address used by O. However, we don't know if anything else will store to
8654// that address before we can load from it. To prevent this situation, we need
8655// to insert our load, L, into the chain as a peer of O. To do this, we give L
8656// the same chain operand as O, we create a token factor from the chain results
8657// of O and L, and we replace all uses of O's chain result with that token
8658// factor (this last part is handled by makeEquivalentMemoryOrdering).
8659bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8660 ReuseLoadInfo &RLI,
8661 SelectionDAG &DAG,
8662 ISD::LoadExtType ET) const {
8663 // Conservatively skip reusing for constrained FP nodes.
8664 if (Op->isStrictFPOpcode())
8665 return false;
8666
8667 SDLoc dl(Op);
8668 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8669 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8670 if (ET == ISD::NON_EXTLOAD &&
8671 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8672 isOperationLegalOrCustom(Op.getOpcode(),
8673 Op.getOperand(0).getValueType())) {
8674
8675 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8676 return true;
8677 }
8678
8679 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8680 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8681 LD->isNonTemporal())
8682 return false;
8683 if (LD->getMemoryVT() != MemVT)
8684 return false;
8685
8686 // If the result of the load is an illegal type, then we can't build a
8687 // valid chain for reuse since the legalised loads and token factor node that
8688 // ties the legalised loads together uses a different output chain then the
8689 // illegal load.
8690 if (!isTypeLegal(LD->getValueType(0)))
8691 return false;
8692
8693 RLI.Ptr = LD->getBasePtr();
8694 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8695 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8696 "Non-pre-inc AM on PPC?");
8697 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8698 LD->getOffset());
8699 }
8700
8701 RLI.Chain = LD->getChain();
8702 RLI.MPI = LD->getPointerInfo();
8703 RLI.IsDereferenceable = LD->isDereferenceable();
8704 RLI.IsInvariant = LD->isInvariant();
8705 RLI.Alignment = LD->getAlign();
8706 RLI.AAInfo = LD->getAAInfo();
8707 RLI.Ranges = LD->getRanges();
8708
8709 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8710 return true;
8711}
8712
8713/// Analyze profitability of direct move
8714/// prefer float load to int load plus direct move
8715/// when there is no integer use of int load
8716bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8717 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8718 if (Origin->getOpcode() != ISD::LOAD)
8719 return true;
8720
8721 // If there is no LXSIBZX/LXSIHZX, like Power8,
8722 // prefer direct move if the memory size is 1 or 2 bytes.
8723 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8724 if (!Subtarget.hasP9Vector() &&
8725 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8726 return true;
8727
8728 for (SDUse &Use : Origin->uses()) {
8729
8730 // Only look at the users of the loaded value.
8731 if (Use.getResNo() != 0)
8732 continue;
8733
8734 SDNode *User = Use.getUser();
8735 if (User->getOpcode() != ISD::SINT_TO_FP &&
8736 User->getOpcode() != ISD::UINT_TO_FP &&
8737 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8738 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8739 return true;
8740 }
8741
8742 return false;
8743}
8744
8746 const PPCSubtarget &Subtarget,
8747 SDValue Chain = SDValue()) {
8748 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8749 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8750 SDLoc dl(Op);
8751
8752 // TODO: Any other flags to propagate?
8753 SDNodeFlags Flags;
8754 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8755
8756 // If we have FCFIDS, then use it when converting to single-precision.
8757 // Otherwise, convert to double-precision and then round.
8758 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8759 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8760 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8761 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8762 if (Op->isStrictFPOpcode()) {
8763 if (!Chain)
8764 Chain = Op.getOperand(0);
8765 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8766 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8767 } else
8768 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8769}
8770
8771/// Custom lowers integer to floating point conversions to use
8772/// the direct move instructions available in ISA 2.07 to avoid the
8773/// need for load/store combinations.
8774SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8775 SelectionDAG &DAG,
8776 const SDLoc &dl) const {
8777 assert((Op.getValueType() == MVT::f32 ||
8778 Op.getValueType() == MVT::f64) &&
8779 "Invalid floating point type as target of conversion");
8780 assert(Subtarget.hasFPCVT() &&
8781 "Int to FP conversions with direct moves require FPCVT");
8782 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8783 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8784 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8785 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8786 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8787 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8788 return convertIntToFP(Op, Mov, DAG, Subtarget);
8789}
8790
8791static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8792
8793 EVT VecVT = Vec.getValueType();
8794 assert(VecVT.isVector() && "Expected a vector type.");
8795 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8796
8797 EVT EltVT = VecVT.getVectorElementType();
8798 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8799 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8800
8801 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8802 SmallVector<SDValue, 16> Ops(NumConcat);
8803 Ops[0] = Vec;
8804 SDValue UndefVec = DAG.getUNDEF(VecVT);
8805 for (unsigned i = 1; i < NumConcat; ++i)
8806 Ops[i] = UndefVec;
8807
8808 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8809}
8810
8811SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8812 const SDLoc &dl) const {
8813 bool IsStrict = Op->isStrictFPOpcode();
8814 unsigned Opc = Op.getOpcode();
8815 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8818 "Unexpected conversion type");
8819 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8820 "Supports conversions to v2f64/v4f32 only.");
8821
8822 // TODO: Any other flags to propagate?
8823 SDNodeFlags Flags;
8824 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8825
8826 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8827 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8828
8829 SDValue Wide = widenVec(DAG, Src, dl);
8830 EVT WideVT = Wide.getValueType();
8831 unsigned WideNumElts = WideVT.getVectorNumElements();
8832 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8833
8834 SmallVector<int, 16> ShuffV;
8835 for (unsigned i = 0; i < WideNumElts; ++i)
8836 ShuffV.push_back(i + WideNumElts);
8837
8838 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8839 int SaveElts = FourEltRes ? 4 : 2;
8840 if (Subtarget.isLittleEndian())
8841 for (int i = 0; i < SaveElts; i++)
8842 ShuffV[i * Stride] = i;
8843 else
8844 for (int i = 1; i <= SaveElts; i++)
8845 ShuffV[i * Stride - 1] = i - 1;
8846
8847 SDValue ShuffleSrc2 =
8848 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8849 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8850
8851 SDValue Extend;
8852 if (SignedConv) {
8853 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8854 EVT ExtVT = Src.getValueType();
8855 if (Subtarget.hasP9Altivec())
8856 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8857 IntermediateVT.getVectorNumElements());
8858
8859 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8860 DAG.getValueType(ExtVT));
8861 } else
8862 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8863
8864 if (IsStrict)
8865 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8866 {Op.getOperand(0), Extend}, Flags);
8867
8868 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8869}
8870
8871SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8872 SelectionDAG &DAG) const {
8873 SDLoc dl(Op);
8874 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8875 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8876 bool IsStrict = Op->isStrictFPOpcode();
8877 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8878 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8879
8880 // TODO: Any other flags to propagate?
8881 SDNodeFlags Flags;
8882 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8883
8884 EVT InVT = Src.getValueType();
8885 EVT OutVT = Op.getValueType();
8886 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8887 isOperationCustom(Op.getOpcode(), InVT))
8888 return LowerINT_TO_FPVector(Op, DAG, dl);
8889
8890 // Conversions to f128 are legal.
8891 if (Op.getValueType() == MVT::f128)
8892 return Subtarget.hasP9Vector() ? Op : SDValue();
8893
8894 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8895 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8896 return SDValue();
8897
8898 if (Src.getValueType() == MVT::i1) {
8899 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8900 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8901 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8902 if (IsStrict)
8903 return DAG.getMergeValues({Sel, Chain}, dl);
8904 else
8905 return Sel;
8906 }
8907
8908 // If we have direct moves, we can do all the conversion, skip the store/load
8909 // however, without FPCVT we can't do most conversions.
8910 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8911 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8912 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8913
8914 assert((IsSigned || Subtarget.hasFPCVT()) &&
8915 "UINT_TO_FP is supported only with FPCVT");
8916
8917 if (Src.getValueType() == MVT::i64) {
8918 SDValue SINT = Src;
8919 // When converting to single-precision, we actually need to convert
8920 // to double-precision first and then round to single-precision.
8921 // To avoid double-rounding effects during that operation, we have
8922 // to prepare the input operand. Bits that might be truncated when
8923 // converting to double-precision are replaced by a bit that won't
8924 // be lost at this stage, but is below the single-precision rounding
8925 // position.
8926 //
8927 // However, if afn is in effect, accept double
8928 // rounding to avoid the extra overhead.
8929 // FIXME: Currently INT_TO_FP can't support fast math flags because
8930 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8931 // false.
8932 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8933 !Op->getFlags().hasApproximateFuncs()) {
8934
8935 // Twiddle input to make sure the low 11 bits are zero. (If this
8936 // is the case, we are guaranteed the value will fit into the 53 bit
8937 // mantissa of an IEEE double-precision value without rounding.)
8938 // If any of those low 11 bits were not zero originally, make sure
8939 // bit 12 (value 2048) is set instead, so that the final rounding
8940 // to single-precision gets the correct result.
8941 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8942 SINT, DAG.getConstant(2047, dl, MVT::i64));
8943 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8944 Round, DAG.getConstant(2047, dl, MVT::i64));
8945 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8946 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8947 DAG.getSignedConstant(-2048, dl, MVT::i64));
8948
8949 // However, we cannot use that value unconditionally: if the magnitude
8950 // of the input value is small, the bit-twiddling we did above might
8951 // end up visibly changing the output. Fortunately, in that case, we
8952 // don't need to twiddle bits since the original input will convert
8953 // exactly to double-precision floating-point already. Therefore,
8954 // construct a conditional to use the original value if the top 11
8955 // bits are all sign-bit copies, and use the rounded value computed
8956 // above otherwise.
8957 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8958 SINT, DAG.getConstant(53, dl, MVT::i32));
8959 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8960 Cond, DAG.getConstant(1, dl, MVT::i64));
8961 Cond = DAG.getSetCC(
8962 dl,
8963 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8964 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8965
8966 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8967 }
8968
8969 ReuseLoadInfo RLI;
8970 SDValue Bits;
8971
8972 MachineFunction &MF = DAG.getMachineFunction();
8973 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8974 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8975 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8976 if (RLI.ResChain)
8977 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8978 } else if (Subtarget.hasLFIWAX() &&
8979 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8980 MachineMemOperand *MMO =
8982 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8983 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8985 DAG.getVTList(MVT::f64, MVT::Other),
8986 Ops, MVT::i32, MMO);
8987 if (RLI.ResChain)
8988 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8989 } else if (Subtarget.hasFPCVT() &&
8990 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8991 MachineMemOperand *MMO =
8993 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8994 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8996 DAG.getVTList(MVT::f64, MVT::Other),
8997 Ops, MVT::i32, MMO);
8998 if (RLI.ResChain)
8999 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
9000 } else if (((Subtarget.hasLFIWAX() &&
9001 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
9002 (Subtarget.hasFPCVT() &&
9003 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
9004 SINT.getOperand(0).getValueType() == MVT::i32) {
9005 MachineFrameInfo &MFI = MF.getFrameInfo();
9006 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9007
9008 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9009 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9010
9011 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
9013 DAG.getMachineFunction(), FrameIdx));
9014 Chain = Store;
9015
9016 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9017 "Expected an i32 store");
9018
9019 RLI.Ptr = FIdx;
9020 RLI.Chain = Chain;
9021 RLI.MPI =
9023 RLI.Alignment = Align(4);
9024
9025 MachineMemOperand *MMO =
9027 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9028 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9031 dl, DAG.getVTList(MVT::f64, MVT::Other),
9032 Ops, MVT::i32, MMO);
9033 Chain = Bits.getValue(1);
9034 } else
9035 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
9036
9037 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
9038 if (IsStrict)
9039 Chain = FP.getValue(1);
9040
9041 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9042 if (IsStrict)
9043 FP = DAG.getNode(
9044 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9045 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
9046 Flags);
9047 else
9048 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9049 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9050 }
9051 return FP;
9052 }
9053
9054 assert(Src.getValueType() == MVT::i32 &&
9055 "Unhandled INT_TO_FP type in custom expander!");
9056 // Since we only generate this in 64-bit mode, we can take advantage of
9057 // 64-bit registers. In particular, sign extend the input value into the
9058 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
9059 // then lfd it and fcfid it.
9060 MachineFunction &MF = DAG.getMachineFunction();
9061 MachineFrameInfo &MFI = MF.getFrameInfo();
9062 EVT PtrVT = getPointerTy(MF.getDataLayout());
9063
9064 SDValue Ld;
9065 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9066 ReuseLoadInfo RLI;
9067 bool ReusingLoad;
9068 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9069 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9070 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9071
9072 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9074 DAG.getMachineFunction(), FrameIdx));
9075 Chain = Store;
9076
9077 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9078 "Expected an i32 store");
9079
9080 RLI.Ptr = FIdx;
9081 RLI.Chain = Chain;
9082 RLI.MPI =
9084 RLI.Alignment = Align(4);
9085 }
9086
9087 MachineMemOperand *MMO =
9089 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9090 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9091 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9092 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9093 MVT::i32, MMO);
9094 Chain = Ld.getValue(1);
9095 if (ReusingLoad && RLI.ResChain) {
9096 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
9097 }
9098 } else {
9099 assert(Subtarget.isPPC64() &&
9100 "i32->FP without LFIWAX supported only on PPC64");
9101
9102 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9103 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9104
9105 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9106
9107 // STD the extended value into the stack slot.
9108 SDValue Store = DAG.getStore(
9109 Chain, dl, Ext64, FIdx,
9111 Chain = Store;
9112
9113 // Load the value as a double.
9114 Ld = DAG.getLoad(
9115 MVT::f64, dl, Chain, FIdx,
9117 Chain = Ld.getValue(1);
9118 }
9119
9120 // FCFID it and return it.
9121 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9122 if (IsStrict)
9123 Chain = FP.getValue(1);
9124 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9125 if (IsStrict)
9126 FP = DAG.getNode(
9127 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9128 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9129 else
9130 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9131 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9132 }
9133 return FP;
9134}
9135
9136SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9137 SelectionDAG &DAG) const {
9138 SDLoc Dl(Op);
9139 MachineFunction &MF = DAG.getMachineFunction();
9140 EVT PtrVT = getPointerTy(MF.getDataLayout());
9141 SDValue Chain = Op.getOperand(0);
9142
9143 // If requested mode is constant, just use simpler mtfsb/mffscrni
9144 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9145 uint64_t Mode = CVal->getZExtValue();
9146 assert(Mode < 4 && "Unsupported rounding mode!");
9147 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9148 if (Subtarget.isISA3_0())
9149 return SDValue(
9150 DAG.getMachineNode(
9151 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9152 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9153 1);
9154 SDNode *SetHi = DAG.getMachineNode(
9155 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9156 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9157 SDNode *SetLo = DAG.getMachineNode(
9158 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9159 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9160 return SDValue(SetLo, 0);
9161 }
9162
9163 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9164 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9165 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9166 DAG.getConstant(3, Dl, MVT::i32));
9167 SDValue DstFlag = DAG.getNode(
9168 ISD::XOR, Dl, MVT::i32, SrcFlag,
9169 DAG.getNode(ISD::AND, Dl, MVT::i32,
9170 DAG.getNOT(Dl,
9171 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9172 MVT::i32),
9173 One));
9174 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9175 SDValue MFFS;
9176 if (!Subtarget.isISA3_0()) {
9177 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9178 Chain = MFFS.getValue(1);
9179 }
9180 SDValue NewFPSCR;
9181 if (Subtarget.isPPC64()) {
9182 if (Subtarget.isISA3_0()) {
9183 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9184 } else {
9185 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9186 SDNode *InsertRN = DAG.getMachineNode(
9187 PPC::RLDIMI, Dl, MVT::i64,
9188 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9189 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9190 DAG.getTargetConstant(0, Dl, MVT::i32),
9191 DAG.getTargetConstant(62, Dl, MVT::i32)});
9192 NewFPSCR = SDValue(InsertRN, 0);
9193 }
9194 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9195 } else {
9196 // In 32-bit mode, store f64, load and update the lower half.
9197 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9198 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9199 SDValue Addr = Subtarget.isLittleEndian()
9200 ? StackSlot
9201 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9202 DAG.getConstant(4, Dl, PtrVT));
9203 if (Subtarget.isISA3_0()) {
9204 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9205 } else {
9206 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9207 SDValue Tmp =
9208 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9209 Chain = Tmp.getValue(1);
9210 Tmp = SDValue(DAG.getMachineNode(
9211 PPC::RLWIMI, Dl, MVT::i32,
9212 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9213 DAG.getTargetConstant(30, Dl, MVT::i32),
9214 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9215 0);
9216 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9217 }
9218 NewFPSCR =
9219 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9220 Chain = NewFPSCR.getValue(1);
9221 }
9222 if (Subtarget.isISA3_0())
9223 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9224 {NewFPSCR, Chain}),
9225 1);
9226 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9227 SDNode *MTFSF = DAG.getMachineNode(
9228 PPC::MTFSF, Dl, MVT::Other,
9229 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9230 return SDValue(MTFSF, 0);
9231}
9232
9233SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9234 SelectionDAG &DAG) const {
9235 SDLoc dl(Op);
9236 /*
9237 The rounding mode is in bits 30:31 of FPSR, and has the following
9238 settings:
9239 00 Round to nearest
9240 01 Round to 0
9241 10 Round to +inf
9242 11 Round to -inf
9243
9244 GET_ROUNDING, on the other hand, expects the following:
9245 -1 Undefined
9246 0 Round to 0
9247 1 Round to nearest
9248 2 Round to +inf
9249 3 Round to -inf
9250
9251 To perform the conversion, we do:
9252 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9253 */
9254
9255 MachineFunction &MF = DAG.getMachineFunction();
9256 EVT VT = Op.getValueType();
9257 EVT PtrVT = getPointerTy(MF.getDataLayout());
9258
9259 // Save FP Control Word to register
9260 SDValue Chain = Op.getOperand(0);
9261 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9262 Chain = MFFS.getValue(1);
9263
9264 SDValue CWD;
9265 if (isTypeLegal(MVT::i64)) {
9266 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9267 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9268 } else {
9269 // Save FP register to stack slot
9270 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9271 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9272 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9273
9274 // Load FP Control Word from low 32 bits of stack slot.
9276 "Stack slot adjustment is valid only on big endian subtargets!");
9277 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9278 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9279 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9280 Chain = CWD.getValue(1);
9281 }
9282
9283 // Transform as necessary
9284 SDValue CWD1 =
9285 DAG.getNode(ISD::AND, dl, MVT::i32,
9286 CWD, DAG.getConstant(3, dl, MVT::i32));
9287 SDValue CWD2 =
9288 DAG.getNode(ISD::SRL, dl, MVT::i32,
9289 DAG.getNode(ISD::AND, dl, MVT::i32,
9290 DAG.getNode(ISD::XOR, dl, MVT::i32,
9291 CWD, DAG.getConstant(3, dl, MVT::i32)),
9292 DAG.getConstant(3, dl, MVT::i32)),
9293 DAG.getConstant(1, dl, MVT::i32));
9294
9295 SDValue RetVal =
9296 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9297
9298 RetVal =
9300 dl, VT, RetVal);
9301
9302 return DAG.getMergeValues({RetVal, Chain}, dl);
9303}
9304
9305SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9306 EVT VT = Op.getValueType();
9307 uint64_t BitWidth = VT.getSizeInBits();
9308 SDLoc dl(Op);
9309 assert(Op.getNumOperands() == 3 &&
9310 VT == Op.getOperand(1).getValueType() &&
9311 "Unexpected SHL!");
9312
9313 // Expand into a bunch of logical ops. Note that these ops
9314 // depend on the PPC behavior for oversized shift amounts.
9315 SDValue Lo = Op.getOperand(0);
9316 SDValue Hi = Op.getOperand(1);
9317 SDValue Amt = Op.getOperand(2);
9318 EVT AmtVT = Amt.getValueType();
9319
9320 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9321 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9322 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9323 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9324 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9325 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9326 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9327 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9328 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9329 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9330 SDValue OutOps[] = { OutLo, OutHi };
9331 return DAG.getMergeValues(OutOps, dl);
9332}
9333
9334SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9335 EVT VT = Op.getValueType();
9336 SDLoc dl(Op);
9337 uint64_t BitWidth = VT.getSizeInBits();
9338 assert(Op.getNumOperands() == 3 &&
9339 VT == Op.getOperand(1).getValueType() &&
9340 "Unexpected SRL!");
9341
9342 // Expand into a bunch of logical ops. Note that these ops
9343 // depend on the PPC behavior for oversized shift amounts.
9344 SDValue Lo = Op.getOperand(0);
9345 SDValue Hi = Op.getOperand(1);
9346 SDValue Amt = Op.getOperand(2);
9347 EVT AmtVT = Amt.getValueType();
9348
9349 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9350 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9351 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9352 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9353 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9354 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9355 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9356 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9357 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9358 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9359 SDValue OutOps[] = { OutLo, OutHi };
9360 return DAG.getMergeValues(OutOps, dl);
9361}
9362
9363SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9364 SDLoc dl(Op);
9365 EVT VT = Op.getValueType();
9366 uint64_t BitWidth = VT.getSizeInBits();
9367 assert(Op.getNumOperands() == 3 &&
9368 VT == Op.getOperand(1).getValueType() &&
9369 "Unexpected SRA!");
9370
9371 // Expand into a bunch of logical ops, followed by a select_cc.
9372 SDValue Lo = Op.getOperand(0);
9373 SDValue Hi = Op.getOperand(1);
9374 SDValue Amt = Op.getOperand(2);
9375 EVT AmtVT = Amt.getValueType();
9376
9377 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9378 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9379 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9380 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9381 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9382 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9383 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9384 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9385 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9386 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9387 Tmp4, Tmp6, ISD::SETLE);
9388 SDValue OutOps[] = { OutLo, OutHi };
9389 return DAG.getMergeValues(OutOps, dl);
9390}
9391
9392SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9393 SelectionDAG &DAG) const {
9394 SDLoc dl(Op);
9395 EVT VT = Op.getValueType();
9396 unsigned BitWidth = VT.getSizeInBits();
9397
9398 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9399 SDValue X = Op.getOperand(0);
9400 SDValue Y = Op.getOperand(1);
9401 SDValue Z = Op.getOperand(2);
9402 EVT AmtVT = Z.getValueType();
9403
9404 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9405 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9406 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9407 // on PowerPC shift by BW being well defined.
9408 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9409 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9410 SDValue SubZ =
9411 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9412 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9413 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9414 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9415}
9416
9417//===----------------------------------------------------------------------===//
9418// Vector related lowering.
9419//
9420
9421/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9422/// element size of SplatSize. Cast the result to VT.
9423static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9424 SelectionDAG &DAG, const SDLoc &dl) {
9425 static const MVT VTys[] = { // canonical VT to use for each size.
9426 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9427 };
9428
9429 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9430
9431 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9432 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9433 SplatSize = 1;
9434 Val = 0xFF;
9435 }
9436
9437 EVT CanonicalVT = VTys[SplatSize-1];
9438
9439 // Build a canonical splat for this value.
9440 // Explicitly truncate APInt here, as this API is used with a mix of
9441 // signed and unsigned values.
9442 return DAG.getBitcast(
9443 ReqVT,
9444 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9445}
9446
9447/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9448/// specified intrinsic ID.
9450 const SDLoc &dl, EVT DestVT = MVT::Other) {
9451 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9452 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9453 DAG.getConstant(IID, dl, MVT::i32), Op);
9454}
9455
9456/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9457/// specified intrinsic ID.
9459 SelectionDAG &DAG, const SDLoc &dl,
9460 EVT DestVT = MVT::Other) {
9461 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9462 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9463 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9464}
9465
9466/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9467/// specified intrinsic ID.
9468static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9469 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9470 EVT DestVT = MVT::Other) {
9471 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9472 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9473 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9474}
9475
9476/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9477/// amount. The result has the specified value type.
9478static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9479 SelectionDAG &DAG, const SDLoc &dl) {
9480 // Force LHS/RHS to be the right type.
9481 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9482 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9483
9484 int Ops[16];
9485 for (unsigned i = 0; i != 16; ++i)
9486 Ops[i] = i + Amt;
9487 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9488 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9489}
9490
9491/// Do we have an efficient pattern in a .td file for this node?
9492///
9493/// \param V - pointer to the BuildVectorSDNode being matched
9494/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9495///
9496/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9497/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9498/// the opposite is true (expansion is beneficial) are:
9499/// - The node builds a vector out of integers that are not 32 or 64-bits
9500/// - The node builds a vector out of constants
9501/// - The node is a "load-and-splat"
9502/// In all other cases, we will choose to keep the BUILD_VECTOR.
9504 bool HasDirectMove,
9505 bool HasP8Vector) {
9506 EVT VecVT = V->getValueType(0);
9507 bool RightType = VecVT == MVT::v2f64 ||
9508 (HasP8Vector && VecVT == MVT::v4f32) ||
9509 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9510 if (!RightType)
9511 return false;
9512
9513 bool IsSplat = true;
9514 bool IsLoad = false;
9515 SDValue Op0 = V->getOperand(0);
9516
9517 // This function is called in a block that confirms the node is not a constant
9518 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9519 // different constants.
9520 if (V->isConstant())
9521 return false;
9522 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9523 if (V->getOperand(i).isUndef())
9524 return false;
9525 // We want to expand nodes that represent load-and-splat even if the
9526 // loaded value is a floating point truncation or conversion to int.
9527 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9528 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9529 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9530 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9531 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9532 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9533 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9534 IsLoad = true;
9535 // If the operands are different or the input is not a load and has more
9536 // uses than just this BV node, then it isn't a splat.
9537 if (V->getOperand(i) != Op0 ||
9538 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9539 IsSplat = false;
9540 }
9541 return !(IsSplat && IsLoad);
9542}
9543
9544// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9545SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9546
9547 SDLoc dl(Op);
9548 SDValue Op0 = Op->getOperand(0);
9549
9550 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9551 (Op.getValueType() != MVT::f128))
9552 return SDValue();
9553
9554 SDValue Lo = Op0.getOperand(0);
9555 SDValue Hi = Op0.getOperand(1);
9556 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9557 return SDValue();
9558
9559 if (!Subtarget.isLittleEndian())
9560 std::swap(Lo, Hi);
9561
9562 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9563}
9564
9565static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9566 const SDValue *InputLoad = &Op;
9567 while (InputLoad->getOpcode() == ISD::BITCAST)
9568 InputLoad = &InputLoad->getOperand(0);
9569 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9571 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9572 InputLoad = &InputLoad->getOperand(0);
9573 }
9574 if (InputLoad->getOpcode() != ISD::LOAD)
9575 return nullptr;
9576 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9577 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9578}
9579
9580// Convert the argument APFloat to a single precision APFloat if there is no
9581// loss in information during the conversion to single precision APFloat and the
9582// resulting number is not a denormal number. Return true if successful.
9584 APFloat APFloatToConvert = ArgAPFloat;
9585 bool LosesInfo = true;
9587 &LosesInfo);
9588 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9589 if (Success)
9590 ArgAPFloat = APFloatToConvert;
9591 return Success;
9592}
9593
9594// Bitcast the argument APInt to a double and convert it to a single precision
9595// APFloat, bitcast the APFloat to an APInt and assign it to the original
9596// argument if there is no loss in information during the conversion from
9597// double to single precision APFloat and the resulting number is not a denormal
9598// number. Return true if successful.
9600 double DpValue = ArgAPInt.bitsToDouble();
9601 APFloat APFloatDp(DpValue);
9602 bool Success = convertToNonDenormSingle(APFloatDp);
9603 if (Success)
9604 ArgAPInt = APFloatDp.bitcastToAPInt();
9605 return Success;
9606}
9607
9608// Nondestructive check for convertTonNonDenormSingle.
9610 // Only convert if it loses info, since XXSPLTIDP should
9611 // handle the other case.
9612 APFloat APFloatToConvert = ArgAPFloat;
9613 bool LosesInfo = true;
9615 &LosesInfo);
9616
9617 return (!LosesInfo && !APFloatToConvert.isDenormal());
9618}
9619
9620static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9621 unsigned &Opcode) {
9622 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9623 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9624 return false;
9625
9626 EVT Ty = Op->getValueType(0);
9627 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9628 // as we cannot handle extending loads for these types.
9629 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9630 ISD::isNON_EXTLoad(InputNode))
9631 return true;
9632
9633 EVT MemVT = InputNode->getMemoryVT();
9634 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9635 // memory VT is the same vector element VT type.
9636 // The loads feeding into the v8i16 and v16i8 types will be extending because
9637 // scalar i8/i16 are not legal types.
9638 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9639 (MemVT == Ty.getVectorElementType()))
9640 return true;
9641
9642 if (Ty == MVT::v2i64) {
9643 // Check the extend type, when the input type is i32, and the output vector
9644 // type is v2i64.
9645 if (MemVT == MVT::i32) {
9646 if (ISD::isZEXTLoad(InputNode))
9647 Opcode = PPCISD::ZEXT_LD_SPLAT;
9648 if (ISD::isSEXTLoad(InputNode))
9649 Opcode = PPCISD::SEXT_LD_SPLAT;
9650 }
9651 return true;
9652 }
9653 return false;
9654}
9655
9657 bool IsLittleEndian) {
9658 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9659
9660 BitMask.clearAllBits();
9661 EVT VT = BVN.getValueType(0);
9662 unsigned VTSize = VT.getSizeInBits();
9663 APInt ConstValue(VTSize, 0);
9664
9665 unsigned EltWidth = VT.getScalarSizeInBits();
9666
9667 unsigned BitPos = 0;
9668 for (auto OpVal : BVN.op_values()) {
9669 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9670
9671 if (!CN)
9672 return false;
9673 // The elements in a vector register are ordered in reverse byte order
9674 // between little-endian and big-endian modes.
9675 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9676 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9677 BitPos += EltWidth;
9678 }
9679
9680 for (unsigned J = 0; J < 16; ++J) {
9681 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9682 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9683 return false;
9684 if (ExtractValue == 0xFF)
9685 BitMask.setBit(J);
9686 }
9687 return true;
9688}
9689
9690// If this is a case we can't handle, return null and let the default
9691// expansion code take care of it. If we CAN select this case, and if it
9692// selects to a single instruction, return Op. Otherwise, if we can codegen
9693// this case more efficiently than a constant pool load, lower it to the
9694// sequence of ops that should be used.
9695SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9696 SelectionDAG &DAG) const {
9697 SDLoc dl(Op);
9698 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9699 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9700
9701 if (Subtarget.hasP10Vector()) {
9702 APInt BitMask(32, 0);
9703 // If the value of the vector is all zeros or all ones,
9704 // we do not convert it to MTVSRBMI.
9705 // The xxleqv instruction sets a vector with all ones.
9706 // The xxlxor instruction sets a vector with all zeros.
9707 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9708 BitMask != 0 && BitMask != 0xffff) {
9709 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9710 MachineSDNode *MSDNode =
9711 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9712 SDValue SDV = SDValue(MSDNode, 0);
9713 EVT DVT = BVN->getValueType(0);
9714 EVT SVT = SDV.getValueType();
9715 if (SVT != DVT) {
9716 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9717 }
9718 return SDV;
9719 }
9720 // Recognize build vector patterns to emit VSX vector instructions
9721 // instead of loading value from memory.
9722 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9723 return VecPat;
9724 }
9725 // Check if this is a splat of a constant value.
9726 APInt APSplatBits, APSplatUndef;
9727 unsigned SplatBitSize;
9728 bool HasAnyUndefs;
9729 bool BVNIsConstantSplat =
9730 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9731 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9732
9733 // If it is a splat of a double, check if we can shrink it to a 32 bit
9734 // non-denormal float which when converted back to double gives us the same
9735 // double. This is to exploit the XXSPLTIDP instruction.
9736 // If we lose precision, we use XXSPLTI32DX.
9737 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9738 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9739 // Check the type first to short-circuit so we don't modify APSplatBits if
9740 // this block isn't executed.
9741 if ((Op->getValueType(0) == MVT::v2f64) &&
9742 convertToNonDenormSingle(APSplatBits)) {
9743 SDValue SplatNode = DAG.getNode(
9744 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9745 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9746 return DAG.getBitcast(Op.getValueType(), SplatNode);
9747 } else {
9748 // We may lose precision, so we have to use XXSPLTI32DX.
9749
9750 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9751 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9752 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9753
9754 if (!Hi || !Lo)
9755 // If either load is 0, then we should generate XXLXOR to set to 0.
9756 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9757
9758 if (Hi)
9759 SplatNode = DAG.getNode(
9760 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9761 DAG.getTargetConstant(0, dl, MVT::i32),
9762 DAG.getTargetConstant(Hi, dl, MVT::i32));
9763
9764 if (Lo)
9765 SplatNode =
9766 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9767 DAG.getTargetConstant(1, dl, MVT::i32),
9768 DAG.getTargetConstant(Lo, dl, MVT::i32));
9769
9770 return DAG.getBitcast(Op.getValueType(), SplatNode);
9771 }
9772 }
9773
9774 bool IsSplat64 = false;
9775 uint64_t SplatBits = 0;
9776 int32_t SextVal = 0;
9777 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9778 SplatBits = APSplatBits.getZExtValue();
9779 if (SplatBitSize <= 32) {
9780 SextVal = SignExtend32(SplatBits, SplatBitSize);
9781 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9782 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9783 bool P9Vector = Subtarget.hasP9Vector();
9784 int32_t Hi = P9Vector ? 127 : 15;
9785 int32_t Lo = P9Vector ? -128 : -16;
9786 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9787 SextVal = static_cast<int32_t>(SplatBits);
9788 }
9789 }
9790
9791 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9792 unsigned NewOpcode = PPCISD::LD_SPLAT;
9793
9794 // Handle load-and-splat patterns as we have instructions that will do this
9795 // in one go.
9796 if (DAG.isSplatValue(Op, true) &&
9797 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9798 const SDValue *InputLoad = &Op.getOperand(0);
9799 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9800
9801 // If the input load is an extending load, it will be an i32 -> i64
9802 // extending load and isValidSplatLoad() will update NewOpcode.
9803 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9804 unsigned ElementSize =
9805 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9806
9807 assert(((ElementSize == 2 * MemorySize)
9808 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9809 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9810 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9811 "Unmatched element size and opcode!\n");
9812
9813 // Checking for a single use of this load, we have to check for vector
9814 // width (128 bits) / ElementSize uses (since each operand of the
9815 // BUILD_VECTOR is a separate use of the value.
9816 unsigned NumUsesOfInputLD = 128 / ElementSize;
9817 for (SDValue BVInOp : Op->ops())
9818 if (BVInOp.isUndef())
9819 NumUsesOfInputLD--;
9820
9821 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9822 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9823 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9824 // 15", but function IsValidSplatLoad() now will only return true when
9825 // the data at index 0 is not nullptr. So we will not get into trouble for
9826 // these cases.
9827 //
9828 // case 1 - lfiwzx/lfiwax
9829 // 1.1: load result is i32 and is sign/zero extend to i64;
9830 // 1.2: build a v2i64 vector type with above loaded value;
9831 // 1.3: the vector has only one value at index 0, others are all undef;
9832 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9833 if (NumUsesOfInputLD == 1 &&
9834 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9835 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9836 Subtarget.hasLFIWAX()))
9837 return SDValue();
9838
9839 // case 2 - lxvr[hb]x
9840 // 2.1: load result is at most i16;
9841 // 2.2: build a vector with above loaded value;
9842 // 2.3: the vector has only one value at index 0, others are all undef;
9843 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9844 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9845 Subtarget.isISA3_1() && ElementSize <= 16)
9846 return SDValue();
9847
9848 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9849 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9850 Subtarget.hasVSX()) {
9851 SDValue Ops[] = {
9852 LD->getChain(), // Chain
9853 LD->getBasePtr(), // Ptr
9854 DAG.getValueType(Op.getValueType()) // VT
9855 };
9856 SDValue LdSplt = DAG.getMemIntrinsicNode(
9857 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9858 LD->getMemoryVT(), LD->getMemOperand());
9859 // Replace all uses of the output chain of the original load with the
9860 // output chain of the new load.
9861 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9862 LdSplt.getValue(1));
9863 return LdSplt;
9864 }
9865 }
9866
9867 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9868 // 32-bits can be lowered to VSX instructions under certain conditions.
9869 // Without VSX, there is no pattern more efficient than expanding the node.
9870 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9871 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9872 Subtarget.hasP8Vector()))
9873 return Op;
9874 return SDValue();
9875 }
9876
9877 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9878 unsigned SplatSize = SplatBitSize / 8;
9879
9880 // First, handle single instruction cases.
9881
9882 // All zeros?
9883 if (SplatBits == 0) {
9884 // Canonicalize all zero vectors to be v4i32.
9885 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9886 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9887 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9888 }
9889 return Op;
9890 }
9891
9892 // We have XXSPLTIW for constant splats four bytes wide.
9893 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9894 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9895 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9896 // turned into a 4-byte splat of 0xABABABAB.
9897 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9898 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9899 Op.getValueType(), DAG, dl);
9900
9901 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9902 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9903 dl);
9904
9905 // We have XXSPLTIB for constant splats one byte wide.
9906 if (Subtarget.hasP9Vector() && SplatSize == 1)
9907 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9908 dl);
9909
9910 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9911 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9912 if (SextVal >= -16 && SextVal <= 15) {
9913 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9914 // generate a splat word with extend for size 8.
9915 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9916 SDValue Res =
9917 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9918 if (SplatSize != 8)
9919 return Res;
9920 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9921 }
9922
9923 // Two instruction sequences.
9924
9925 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9926 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9928 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9929 unsigned IID;
9930 EVT VT;
9931 switch (SplatSize) {
9932 default:
9933 llvm_unreachable("Unexpected type for vector constant.");
9934 case 2:
9935 IID = Intrinsic::ppc_altivec_vupklsb;
9936 VT = MVT::v8i16;
9937 break;
9938 case 4:
9939 IID = Intrinsic::ppc_altivec_vextsb2w;
9940 VT = MVT::v4i32;
9941 break;
9942 case 8:
9943 IID = Intrinsic::ppc_altivec_vextsb2d;
9944 VT = MVT::v2i64;
9945 break;
9946 }
9947 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9948 return DAG.getBitcast(Op->getValueType(0), Extend);
9949 }
9950 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9951
9952 // If this value is in the range [-32,30] and is even, use:
9953 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9954 // If this value is in the range [17,31] and is odd, use:
9955 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9956 // If this value is in the range [-31,-17] and is odd, use:
9957 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9958 // Note the last two are three-instruction sequences.
9959 if (SextVal >= -32 && SextVal <= 31) {
9960 // To avoid having these optimizations undone by constant folding,
9961 // we convert to a pseudo that will be expanded later into one of
9962 // the above forms.
9963 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9964 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9965 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9966 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9967 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9968 if (VT == Op.getValueType())
9969 return RetVal;
9970 else
9971 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9972 }
9973
9974 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9975 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9976 // for fneg/fabs.
9977 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9978 // Make -1 and vspltisw -1:
9979 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9980
9981 // Make the VSLW intrinsic, computing 0x8000_0000.
9982 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9983 OnesV, DAG, dl);
9984
9985 // xor by OnesV to invert it.
9986 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9987 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9988 }
9989
9990 // Check to see if this is a wide variety of vsplti*, binop self cases.
9991 static const signed char SplatCsts[] = {
9992 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9993 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9994 };
9995
9996 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9997 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9998 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9999 int i = SplatCsts[idx];
10000
10001 // Figure out what shift amount will be used by altivec if shifted by i in
10002 // this splat size.
10003 unsigned TypeShiftAmt = i & (SplatBitSize-1);
10004
10005 // vsplti + shl self.
10006 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
10007 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
10008 static const unsigned IIDs[] = { // Intrinsic to use for each size.
10009 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
10010 Intrinsic::ppc_altivec_vslw
10011 };
10012 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10013 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10014 }
10015
10016 // vsplti + srl self.
10017 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
10018 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
10019 static const unsigned IIDs[] = { // Intrinsic to use for each size.
10020 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
10021 Intrinsic::ppc_altivec_vsrw
10022 };
10023 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10024 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10025 }
10026
10027 // vsplti + rol self.
10028 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
10029 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
10030 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
10031 static const unsigned IIDs[] = { // Intrinsic to use for each size.
10032 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
10033 Intrinsic::ppc_altivec_vrlw
10034 };
10035 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10036 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10037 }
10038
10039 // t = vsplti c, result = vsldoi t, t, 1
10040 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
10041 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10042 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
10043 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10044 }
10045 // t = vsplti c, result = vsldoi t, t, 2
10046 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
10047 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10048 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
10049 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10050 }
10051 // t = vsplti c, result = vsldoi t, t, 3
10052 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
10053 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10054 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
10055 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10056 }
10057 }
10058
10059 return SDValue();
10060}
10061
10062/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
10063/// the specified operations to build the shuffle.
10065 SDValue RHS, SelectionDAG &DAG,
10066 const SDLoc &dl) {
10067 unsigned OpNum = (PFEntry >> 26) & 0x0F;
10068 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
10069 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
10070
10071 enum {
10072 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
10073 OP_VMRGHW,
10074 OP_VMRGLW,
10075 OP_VSPLTISW0,
10076 OP_VSPLTISW1,
10077 OP_VSPLTISW2,
10078 OP_VSPLTISW3,
10079 OP_VSLDOI4,
10080 OP_VSLDOI8,
10081 OP_VSLDOI12
10082 };
10083
10084 if (OpNum == OP_COPY) {
10085 if (LHSID == (1*9+2)*9+3) return LHS;
10086 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
10087 return RHS;
10088 }
10089
10090 SDValue OpLHS, OpRHS;
10091 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
10092 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
10093
10094 int ShufIdxs[16];
10095 switch (OpNum) {
10096 default: llvm_unreachable("Unknown i32 permute!");
10097 case OP_VMRGHW:
10098 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
10099 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
10100 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
10101 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
10102 break;
10103 case OP_VMRGLW:
10104 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
10105 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
10106 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
10107 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
10108 break;
10109 case OP_VSPLTISW0:
10110 for (unsigned i = 0; i != 16; ++i)
10111 ShufIdxs[i] = (i&3)+0;
10112 break;
10113 case OP_VSPLTISW1:
10114 for (unsigned i = 0; i != 16; ++i)
10115 ShufIdxs[i] = (i&3)+4;
10116 break;
10117 case OP_VSPLTISW2:
10118 for (unsigned i = 0; i != 16; ++i)
10119 ShufIdxs[i] = (i&3)+8;
10120 break;
10121 case OP_VSPLTISW3:
10122 for (unsigned i = 0; i != 16; ++i)
10123 ShufIdxs[i] = (i&3)+12;
10124 break;
10125 case OP_VSLDOI4:
10126 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
10127 case OP_VSLDOI8:
10128 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
10129 case OP_VSLDOI12:
10130 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
10131 }
10132 EVT VT = OpLHS.getValueType();
10133 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
10134 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
10135 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
10136 return DAG.getNode(ISD::BITCAST, dl, VT, T);
10137}
10138
10139/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10140/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10141/// SDValue.
10142SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10143 SelectionDAG &DAG) const {
10144 const unsigned BytesInVector = 16;
10145 bool IsLE = Subtarget.isLittleEndian();
10146 SDLoc dl(N);
10147 SDValue V1 = N->getOperand(0);
10148 SDValue V2 = N->getOperand(1);
10149 unsigned ShiftElts = 0, InsertAtByte = 0;
10150 bool Swap = false;
10151
10152 // Shifts required to get the byte we want at element 7.
10153 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10154 0, 15, 14, 13, 12, 11, 10, 9};
10155 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10156 1, 2, 3, 4, 5, 6, 7, 8};
10157
10158 ArrayRef<int> Mask = N->getMask();
10159 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10160
10161 // For each mask element, find out if we're just inserting something
10162 // from V2 into V1 or vice versa.
10163 // Possible permutations inserting an element from V2 into V1:
10164 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10165 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10166 // ...
10167 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10168 // Inserting from V1 into V2 will be similar, except mask range will be
10169 // [16,31].
10170
10171 bool FoundCandidate = false;
10172 // If both vector operands for the shuffle are the same vector, the mask
10173 // will contain only elements from the first one and the second one will be
10174 // undef.
10175 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10176 // Go through the mask of half-words to find an element that's being moved
10177 // from one vector to the other.
10178 for (unsigned i = 0; i < BytesInVector; ++i) {
10179 unsigned CurrentElement = Mask[i];
10180 // If 2nd operand is undefined, we should only look for element 7 in the
10181 // Mask.
10182 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10183 continue;
10184
10185 bool OtherElementsInOrder = true;
10186 // Examine the other elements in the Mask to see if they're in original
10187 // order.
10188 for (unsigned j = 0; j < BytesInVector; ++j) {
10189 if (j == i)
10190 continue;
10191 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10192 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10193 // in which we always assume we're always picking from the 1st operand.
10194 int MaskOffset =
10195 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10196 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10197 OtherElementsInOrder = false;
10198 break;
10199 }
10200 }
10201 // If other elements are in original order, we record the number of shifts
10202 // we need to get the element we want into element 7. Also record which byte
10203 // in the vector we should insert into.
10204 if (OtherElementsInOrder) {
10205 // If 2nd operand is undefined, we assume no shifts and no swapping.
10206 if (V2.isUndef()) {
10207 ShiftElts = 0;
10208 Swap = false;
10209 } else {
10210 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10211 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10212 : BigEndianShifts[CurrentElement & 0xF];
10213 Swap = CurrentElement < BytesInVector;
10214 }
10215 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10216 FoundCandidate = true;
10217 break;
10218 }
10219 }
10220
10221 if (!FoundCandidate)
10222 return SDValue();
10223
10224 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10225 // optionally with VECSHL if shift is required.
10226 if (Swap)
10227 std::swap(V1, V2);
10228 if (V2.isUndef())
10229 V2 = V1;
10230 if (ShiftElts) {
10231 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10232 DAG.getConstant(ShiftElts, dl, MVT::i32));
10233 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10234 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10235 }
10236 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10237 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10238}
10239
10240/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10241/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10242/// SDValue.
10243SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10244 SelectionDAG &DAG) const {
10245 const unsigned NumHalfWords = 8;
10246 const unsigned BytesInVector = NumHalfWords * 2;
10247 // Check that the shuffle is on half-words.
10248 if (!isNByteElemShuffleMask(N, 2, 1))
10249 return SDValue();
10250
10251 bool IsLE = Subtarget.isLittleEndian();
10252 SDLoc dl(N);
10253 SDValue V1 = N->getOperand(0);
10254 SDValue V2 = N->getOperand(1);
10255 unsigned ShiftElts = 0, InsertAtByte = 0;
10256 bool Swap = false;
10257
10258 // Shifts required to get the half-word we want at element 3.
10259 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10260 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10261
10262 uint32_t Mask = 0;
10263 uint32_t OriginalOrderLow = 0x1234567;
10264 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10265 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10266 // 32-bit space, only need 4-bit nibbles per element.
10267 for (unsigned i = 0; i < NumHalfWords; ++i) {
10268 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10269 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10270 }
10271
10272 // For each mask element, find out if we're just inserting something
10273 // from V2 into V1 or vice versa. Possible permutations inserting an element
10274 // from V2 into V1:
10275 // X, 1, 2, 3, 4, 5, 6, 7
10276 // 0, X, 2, 3, 4, 5, 6, 7
10277 // 0, 1, X, 3, 4, 5, 6, 7
10278 // 0, 1, 2, X, 4, 5, 6, 7
10279 // 0, 1, 2, 3, X, 5, 6, 7
10280 // 0, 1, 2, 3, 4, X, 6, 7
10281 // 0, 1, 2, 3, 4, 5, X, 7
10282 // 0, 1, 2, 3, 4, 5, 6, X
10283 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10284
10285 bool FoundCandidate = false;
10286 // Go through the mask of half-words to find an element that's being moved
10287 // from one vector to the other.
10288 for (unsigned i = 0; i < NumHalfWords; ++i) {
10289 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10290 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10291 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10292 uint32_t TargetOrder = 0x0;
10293
10294 // If both vector operands for the shuffle are the same vector, the mask
10295 // will contain only elements from the first one and the second one will be
10296 // undef.
10297 if (V2.isUndef()) {
10298 ShiftElts = 0;
10299 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10300 TargetOrder = OriginalOrderLow;
10301 Swap = false;
10302 // Skip if not the correct element or mask of other elements don't equal
10303 // to our expected order.
10304 if (MaskOneElt == VINSERTHSrcElem &&
10305 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10306 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10307 FoundCandidate = true;
10308 break;
10309 }
10310 } else { // If both operands are defined.
10311 // Target order is [8,15] if the current mask is between [0,7].
10312 TargetOrder =
10313 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10314 // Skip if mask of other elements don't equal our expected order.
10315 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10316 // We only need the last 3 bits for the number of shifts.
10317 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10318 : BigEndianShifts[MaskOneElt & 0x7];
10319 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10320 Swap = MaskOneElt < NumHalfWords;
10321 FoundCandidate = true;
10322 break;
10323 }
10324 }
10325 }
10326
10327 if (!FoundCandidate)
10328 return SDValue();
10329
10330 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10331 // optionally with VECSHL if shift is required.
10332 if (Swap)
10333 std::swap(V1, V2);
10334 if (V2.isUndef())
10335 V2 = V1;
10336 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10337 if (ShiftElts) {
10338 // Double ShiftElts because we're left shifting on v16i8 type.
10339 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10340 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10341 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10342 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10343 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10344 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10345 }
10346 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10347 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10348 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10349 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10350}
10351
10352/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10353/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10354/// return the default SDValue.
10355SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10356 SelectionDAG &DAG) const {
10357 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10358 // to v16i8. Peek through the bitcasts to get the actual operands.
10361
10362 auto ShuffleMask = SVN->getMask();
10363 SDValue VecShuffle(SVN, 0);
10364 SDLoc DL(SVN);
10365
10366 // Check that we have a four byte shuffle.
10367 if (!isNByteElemShuffleMask(SVN, 4, 1))
10368 return SDValue();
10369
10370 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10371 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10372 std::swap(LHS, RHS);
10374 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10375 if (!CommutedSV)
10376 return SDValue();
10377 ShuffleMask = CommutedSV->getMask();
10378 }
10379
10380 // Ensure that the RHS is a vector of constants.
10381 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10382 if (!BVN)
10383 return SDValue();
10384
10385 // Check if RHS is a splat of 4-bytes (or smaller).
10386 APInt APSplatValue, APSplatUndef;
10387 unsigned SplatBitSize;
10388 bool HasAnyUndefs;
10389 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10390 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10391 SplatBitSize > 32)
10392 return SDValue();
10393
10394 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10395 // The instruction splats a constant C into two words of the source vector
10396 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10397 // Thus we check that the shuffle mask is the equivalent of
10398 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10399 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10400 // within each word are consecutive, so we only need to check the first byte.
10401 SDValue Index;
10402 bool IsLE = Subtarget.isLittleEndian();
10403 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10404 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10405 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10406 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10407 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10408 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10409 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10410 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10411 else
10412 return SDValue();
10413
10414 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10415 // for XXSPLTI32DX.
10416 unsigned SplatVal = APSplatValue.getZExtValue();
10417 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10418 SplatVal |= (SplatVal << SplatBitSize);
10419
10420 SDValue SplatNode = DAG.getNode(
10421 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10422 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10423 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10424}
10425
10426/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10427/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10428/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10429/// i.e (or (shl x, C1), (srl x, 128-C1)).
10430SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10431 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10432 assert(Op.getValueType() == MVT::v1i128 &&
10433 "Only set v1i128 as custom, other type shouldn't reach here!");
10434 SDLoc dl(Op);
10435 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10436 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10437 unsigned SHLAmt = N1.getConstantOperandVal(0);
10438 if (SHLAmt % 8 == 0) {
10439 std::array<int, 16> Mask;
10440 std::iota(Mask.begin(), Mask.end(), 0);
10441 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10442 if (SDValue Shuffle =
10443 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10444 DAG.getUNDEF(MVT::v16i8), Mask))
10445 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10446 }
10447 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10448 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10449 DAG.getConstant(SHLAmt, dl, MVT::i32));
10450 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10451 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10452 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10453 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10454}
10455
10456/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10457/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10458/// return the code it can be lowered into. Worst case, it can always be
10459/// lowered into a vperm.
10460SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10461 SelectionDAG &DAG) const {
10462 SDLoc dl(Op);
10463 SDValue V1 = Op.getOperand(0);
10464 SDValue V2 = Op.getOperand(1);
10465 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10466
10467 // Any nodes that were combined in the target-independent combiner prior
10468 // to vector legalization will not be sent to the target combine. Try to
10469 // combine it here.
10470 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10471 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10472 return NewShuffle;
10473 Op = NewShuffle;
10475 V1 = Op.getOperand(0);
10476 V2 = Op.getOperand(1);
10477 }
10478 EVT VT = Op.getValueType();
10479 bool isLittleEndian = Subtarget.isLittleEndian();
10480
10481 unsigned ShiftElts, InsertAtByte;
10482 bool Swap = false;
10483
10484 // If this is a load-and-splat, we can do that with a single instruction
10485 // in some cases. However if the load has multiple uses, we don't want to
10486 // combine it because that will just produce multiple loads.
10487 bool IsPermutedLoad = false;
10488 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10489 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10490 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10491 InputLoad->hasOneUse()) {
10492 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10493 int SplatIdx =
10494 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10495
10496 // The splat index for permuted loads will be in the left half of the vector
10497 // which is strictly wider than the loaded value by 8 bytes. So we need to
10498 // adjust the splat index to point to the correct address in memory.
10499 if (IsPermutedLoad) {
10500 assert((isLittleEndian || IsFourByte) &&
10501 "Unexpected size for permuted load on big endian target");
10502 SplatIdx += IsFourByte ? 2 : 1;
10503 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10504 "Splat of a value outside of the loaded memory");
10505 }
10506
10507 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10508 // For 4-byte load-and-splat, we need Power9.
10509 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10510 uint64_t Offset = 0;
10511 if (IsFourByte)
10512 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10513 else
10514 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10515
10516 // If the width of the load is the same as the width of the splat,
10517 // loading with an offset would load the wrong memory.
10518 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10519 Offset = 0;
10520
10521 SDValue BasePtr = LD->getBasePtr();
10522 if (Offset != 0)
10524 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10525 SDValue Ops[] = {
10526 LD->getChain(), // Chain
10527 BasePtr, // BasePtr
10528 DAG.getValueType(Op.getValueType()) // VT
10529 };
10530 SDVTList VTL =
10531 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10532 SDValue LdSplt =
10534 Ops, LD->getMemoryVT(), LD->getMemOperand());
10535 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10536 if (LdSplt.getValueType() != SVOp->getValueType(0))
10537 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10538 return LdSplt;
10539 }
10540 }
10541
10542 // All v2i64 and v2f64 shuffles are legal
10543 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10544 return Op;
10545
10546 if (Subtarget.hasP9Vector() &&
10547 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10548 isLittleEndian)) {
10549 if (V2.isUndef())
10550 V2 = V1;
10551 else if (Swap)
10552 std::swap(V1, V2);
10553 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10554 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10555 if (ShiftElts) {
10556 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10557 DAG.getConstant(ShiftElts, dl, MVT::i32));
10558 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10559 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10560 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10561 }
10562 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10563 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10564 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10565 }
10566
10567 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10568 SDValue SplatInsertNode;
10569 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10570 return SplatInsertNode;
10571 }
10572
10573 if (Subtarget.hasP9Altivec()) {
10574 SDValue NewISDNode;
10575 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10576 return NewISDNode;
10577
10578 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10579 return NewISDNode;
10580 }
10581
10582 if (Subtarget.hasVSX() &&
10583 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10584 if (Swap)
10585 std::swap(V1, V2);
10586 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10587 SDValue Conv2 =
10588 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10589
10590 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10591 DAG.getConstant(ShiftElts, dl, MVT::i32));
10592 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10593 }
10594
10595 if (Subtarget.hasVSX() &&
10596 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10597 if (Swap)
10598 std::swap(V1, V2);
10599 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10600 SDValue Conv2 =
10601 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10602
10603 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10604 DAG.getConstant(ShiftElts, dl, MVT::i32));
10605 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10606 }
10607
10608 if (Subtarget.hasP9Vector()) {
10609 if (PPC::isXXBRHShuffleMask(SVOp)) {
10610 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10611 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10612 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10613 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10614 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10615 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10616 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10617 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10618 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10619 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10620 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10621 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10622 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10623 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10624 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10625 }
10626 }
10627
10628 if (Subtarget.hasVSX()) {
10629 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10630 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10631
10632 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10633 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10634 DAG.getConstant(SplatIdx, dl, MVT::i32));
10635 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10636 }
10637
10638 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10639 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10640 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10641 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10642 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10643 }
10644 }
10645
10646 // Cases that are handled by instructions that take permute immediates
10647 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10648 // selected by the instruction selector.
10649 if (V2.isUndef()) {
10650 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10651 PPC::isSplatShuffleMask(SVOp, 2) ||
10652 PPC::isSplatShuffleMask(SVOp, 4) ||
10653 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10654 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10655 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10656 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10657 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10658 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10659 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10660 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10661 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10662 (Subtarget.hasP8Altivec() && (
10663 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10664 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10665 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10666 return Op;
10667 }
10668 }
10669
10670 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10671 // and produce a fixed permutation. If any of these match, do not lower to
10672 // VPERM.
10673 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10674 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10675 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10676 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10677 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10678 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10679 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10680 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10681 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10682 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10683 (Subtarget.hasP8Altivec() && (
10684 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10685 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10686 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10687 return Op;
10688
10689 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10690 // perfect shuffle table to emit an optimal matching sequence.
10691 ArrayRef<int> PermMask = SVOp->getMask();
10692
10693 if (!DisablePerfectShuffle && !isLittleEndian) {
10694 unsigned PFIndexes[4];
10695 bool isFourElementShuffle = true;
10696 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10697 ++i) { // Element number
10698 unsigned EltNo = 8; // Start out undef.
10699 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10700 if (PermMask[i * 4 + j] < 0)
10701 continue; // Undef, ignore it.
10702
10703 unsigned ByteSource = PermMask[i * 4 + j];
10704 if ((ByteSource & 3) != j) {
10705 isFourElementShuffle = false;
10706 break;
10707 }
10708
10709 if (EltNo == 8) {
10710 EltNo = ByteSource / 4;
10711 } else if (EltNo != ByteSource / 4) {
10712 isFourElementShuffle = false;
10713 break;
10714 }
10715 }
10716 PFIndexes[i] = EltNo;
10717 }
10718
10719 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10720 // perfect shuffle vector to determine if it is cost effective to do this as
10721 // discrete instructions, or whether we should use a vperm.
10722 // For now, we skip this for little endian until such time as we have a
10723 // little-endian perfect shuffle table.
10724 if (isFourElementShuffle) {
10725 // Compute the index in the perfect shuffle table.
10726 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10727 PFIndexes[2] * 9 + PFIndexes[3];
10728
10729 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10730 unsigned Cost = (PFEntry >> 30);
10731
10732 // Determining when to avoid vperm is tricky. Many things affect the cost
10733 // of vperm, particularly how many times the perm mask needs to be
10734 // computed. For example, if the perm mask can be hoisted out of a loop or
10735 // is already used (perhaps because there are multiple permutes with the
10736 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10737 // permute mask out of the loop requires an extra register.
10738 //
10739 // As a compromise, we only emit discrete instructions if the shuffle can
10740 // be generated in 3 or fewer operations. When we have loop information
10741 // available, if this block is within a loop, we should avoid using vperm
10742 // for 3-operation perms and use a constant pool load instead.
10743 if (Cost < 3)
10744 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10745 }
10746 }
10747
10748 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10749 // vector that will get spilled to the constant pool.
10750 if (V2.isUndef()) V2 = V1;
10751
10752 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10753}
10754
10755SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10756 ArrayRef<int> PermMask, EVT VT,
10757 SDValue V1, SDValue V2) const {
10758 unsigned Opcode = PPCISD::VPERM;
10759 EVT ValType = V1.getValueType();
10760 SDLoc dl(Op);
10761 bool NeedSwap = false;
10762 bool isLittleEndian = Subtarget.isLittleEndian();
10763 bool isPPC64 = Subtarget.isPPC64();
10764
10765 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10766 (V1->hasOneUse() || V2->hasOneUse())) {
10767 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10768 "XXPERM instead\n");
10769 Opcode = PPCISD::XXPERM;
10770
10771 // The second input to XXPERM is also an output so if the second input has
10772 // multiple uses then copying is necessary, as a result we want the
10773 // single-use operand to be used as the second input to prevent copying.
10774 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10775 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10776 std::swap(V1, V2);
10777 NeedSwap = !NeedSwap;
10778 }
10779 }
10780
10781 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10782 // that it is in input element units, not in bytes. Convert now.
10783
10784 // For little endian, the order of the input vectors is reversed, and
10785 // the permutation mask is complemented with respect to 31. This is
10786 // necessary to produce proper semantics with the big-endian-based vperm
10787 // instruction.
10788 EVT EltVT = V1.getValueType().getVectorElementType();
10789 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10790
10791 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10792 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10793
10794 /*
10795 Vectors will be appended like so: [ V1 | v2 ]
10796 XXSWAPD on V1:
10797 [ A | B | C | D ] -> [ C | D | A | B ]
10798 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10799 i.e. index of A, B += 8, and index of C, D -= 8.
10800 XXSWAPD on V2:
10801 [ E | F | G | H ] -> [ G | H | E | F ]
10802 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10803 i.e. index of E, F += 8, index of G, H -= 8
10804 Swap V1 and V2:
10805 [ V1 | V2 ] -> [ V2 | V1 ]
10806 0-15 16-31 0-15 16-31
10807 i.e. index of V1 += 16, index of V2 -= 16
10808 */
10809
10810 SmallVector<SDValue, 16> ResultMask;
10811 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10812 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10813
10814 if (V1HasXXSWAPD) {
10815 if (SrcElt < 8)
10816 SrcElt += 8;
10817 else if (SrcElt < 16)
10818 SrcElt -= 8;
10819 }
10820 if (V2HasXXSWAPD) {
10821 if (SrcElt > 23)
10822 SrcElt -= 8;
10823 else if (SrcElt > 15)
10824 SrcElt += 8;
10825 }
10826 if (NeedSwap) {
10827 if (SrcElt < 16)
10828 SrcElt += 16;
10829 else
10830 SrcElt -= 16;
10831 }
10832 for (unsigned j = 0; j != BytesPerElement; ++j)
10833 if (isLittleEndian)
10834 ResultMask.push_back(
10835 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10836 else
10837 ResultMask.push_back(
10838 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10839 }
10840
10841 if (V1HasXXSWAPD) {
10842 dl = SDLoc(V1->getOperand(0));
10843 V1 = V1->getOperand(0)->getOperand(1);
10844 }
10845 if (V2HasXXSWAPD) {
10846 dl = SDLoc(V2->getOperand(0));
10847 V2 = V2->getOperand(0)->getOperand(1);
10848 }
10849
10850 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10851 if (ValType != MVT::v2f64)
10852 V1 = DAG.getBitcast(MVT::v2f64, V1);
10853 if (V2.getValueType() != MVT::v2f64)
10854 V2 = DAG.getBitcast(MVT::v2f64, V2);
10855 }
10856
10857 ShufflesHandledWithVPERM++;
10858 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10859 LLVM_DEBUG({
10860 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10861 if (Opcode == PPCISD::XXPERM) {
10862 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10863 } else {
10864 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10865 }
10866 SVOp->dump();
10867 dbgs() << "With the following permute control vector:\n";
10868 VPermMask.dump();
10869 });
10870
10871 if (Opcode == PPCISD::XXPERM)
10872 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10873
10874 // Only need to place items backwards in LE,
10875 // the mask was properly calculated.
10876 if (isLittleEndian)
10877 std::swap(V1, V2);
10878
10879 SDValue VPERMNode =
10880 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10881
10882 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10883 return VPERMNode;
10884}
10885
10886/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10887/// vector comparison. If it is, return true and fill in Opc/isDot with
10888/// information about the intrinsic.
10889static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10890 bool &isDot, const PPCSubtarget &Subtarget) {
10891 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10892 CompareOpc = -1;
10893 isDot = false;
10894 switch (IntrinsicID) {
10895 default:
10896 return false;
10897 // Comparison predicates.
10898 case Intrinsic::ppc_altivec_vcmpbfp_p:
10899 CompareOpc = 966;
10900 isDot = true;
10901 break;
10902 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10903 CompareOpc = 198;
10904 isDot = true;
10905 break;
10906 case Intrinsic::ppc_altivec_vcmpequb_p:
10907 CompareOpc = 6;
10908 isDot = true;
10909 break;
10910 case Intrinsic::ppc_altivec_vcmpequh_p:
10911 CompareOpc = 70;
10912 isDot = true;
10913 break;
10914 case Intrinsic::ppc_altivec_vcmpequw_p:
10915 CompareOpc = 134;
10916 isDot = true;
10917 break;
10918 case Intrinsic::ppc_altivec_vcmpequd_p:
10919 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10920 CompareOpc = 199;
10921 isDot = true;
10922 } else
10923 return false;
10924 break;
10925 case Intrinsic::ppc_altivec_vcmpneb_p:
10926 case Intrinsic::ppc_altivec_vcmpneh_p:
10927 case Intrinsic::ppc_altivec_vcmpnew_p:
10928 case Intrinsic::ppc_altivec_vcmpnezb_p:
10929 case Intrinsic::ppc_altivec_vcmpnezh_p:
10930 case Intrinsic::ppc_altivec_vcmpnezw_p:
10931 if (Subtarget.hasP9Altivec()) {
10932 switch (IntrinsicID) {
10933 default:
10934 llvm_unreachable("Unknown comparison intrinsic.");
10935 case Intrinsic::ppc_altivec_vcmpneb_p:
10936 CompareOpc = 7;
10937 break;
10938 case Intrinsic::ppc_altivec_vcmpneh_p:
10939 CompareOpc = 71;
10940 break;
10941 case Intrinsic::ppc_altivec_vcmpnew_p:
10942 CompareOpc = 135;
10943 break;
10944 case Intrinsic::ppc_altivec_vcmpnezb_p:
10945 CompareOpc = 263;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpnezh_p:
10948 CompareOpc = 327;
10949 break;
10950 case Intrinsic::ppc_altivec_vcmpnezw_p:
10951 CompareOpc = 391;
10952 break;
10953 }
10954 isDot = true;
10955 } else
10956 return false;
10957 break;
10958 case Intrinsic::ppc_altivec_vcmpgefp_p:
10959 CompareOpc = 454;
10960 isDot = true;
10961 break;
10962 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10963 CompareOpc = 710;
10964 isDot = true;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10967 CompareOpc = 774;
10968 isDot = true;
10969 break;
10970 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10971 CompareOpc = 838;
10972 isDot = true;
10973 break;
10974 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10975 CompareOpc = 902;
10976 isDot = true;
10977 break;
10978 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10979 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10980 CompareOpc = 967;
10981 isDot = true;
10982 } else
10983 return false;
10984 break;
10985 case Intrinsic::ppc_altivec_vcmpgtub_p:
10986 CompareOpc = 518;
10987 isDot = true;
10988 break;
10989 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10990 CompareOpc = 582;
10991 isDot = true;
10992 break;
10993 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10994 CompareOpc = 646;
10995 isDot = true;
10996 break;
10997 case Intrinsic::ppc_altivec_vcmpgtud_p:
10998 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10999 CompareOpc = 711;
11000 isDot = true;
11001 } else
11002 return false;
11003 break;
11004
11005 case Intrinsic::ppc_altivec_vcmpequq:
11006 case Intrinsic::ppc_altivec_vcmpgtsq:
11007 case Intrinsic::ppc_altivec_vcmpgtuq:
11008 if (!Subtarget.isISA3_1())
11009 return false;
11010 switch (IntrinsicID) {
11011 default:
11012 llvm_unreachable("Unknown comparison intrinsic.");
11013 case Intrinsic::ppc_altivec_vcmpequq:
11014 CompareOpc = 455;
11015 break;
11016 case Intrinsic::ppc_altivec_vcmpgtsq:
11017 CompareOpc = 903;
11018 break;
11019 case Intrinsic::ppc_altivec_vcmpgtuq:
11020 CompareOpc = 647;
11021 break;
11022 }
11023 break;
11024
11025 // VSX predicate comparisons use the same infrastructure
11026 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
11027 case Intrinsic::ppc_vsx_xvcmpgedp_p:
11028 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11029 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11030 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11031 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11032 if (Subtarget.hasVSX()) {
11033 switch (IntrinsicID) {
11034 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
11035 CompareOpc = 99;
11036 break;
11037 case Intrinsic::ppc_vsx_xvcmpgedp_p:
11038 CompareOpc = 115;
11039 break;
11040 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11041 CompareOpc = 107;
11042 break;
11043 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11044 CompareOpc = 67;
11045 break;
11046 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11047 CompareOpc = 83;
11048 break;
11049 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11050 CompareOpc = 75;
11051 break;
11052 }
11053 isDot = true;
11054 } else
11055 return false;
11056 break;
11057
11058 // Normal Comparisons.
11059 case Intrinsic::ppc_altivec_vcmpbfp:
11060 CompareOpc = 966;
11061 break;
11062 case Intrinsic::ppc_altivec_vcmpeqfp:
11063 CompareOpc = 198;
11064 break;
11065 case Intrinsic::ppc_altivec_vcmpequb:
11066 CompareOpc = 6;
11067 break;
11068 case Intrinsic::ppc_altivec_vcmpequh:
11069 CompareOpc = 70;
11070 break;
11071 case Intrinsic::ppc_altivec_vcmpequw:
11072 CompareOpc = 134;
11073 break;
11074 case Intrinsic::ppc_altivec_vcmpequd:
11075 if (Subtarget.hasP8Altivec())
11076 CompareOpc = 199;
11077 else
11078 return false;
11079 break;
11080 case Intrinsic::ppc_altivec_vcmpneb:
11081 case Intrinsic::ppc_altivec_vcmpneh:
11082 case Intrinsic::ppc_altivec_vcmpnew:
11083 case Intrinsic::ppc_altivec_vcmpnezb:
11084 case Intrinsic::ppc_altivec_vcmpnezh:
11085 case Intrinsic::ppc_altivec_vcmpnezw:
11086 if (Subtarget.hasP9Altivec())
11087 switch (IntrinsicID) {
11088 default:
11089 llvm_unreachable("Unknown comparison intrinsic.");
11090 case Intrinsic::ppc_altivec_vcmpneb:
11091 CompareOpc = 7;
11092 break;
11093 case Intrinsic::ppc_altivec_vcmpneh:
11094 CompareOpc = 71;
11095 break;
11096 case Intrinsic::ppc_altivec_vcmpnew:
11097 CompareOpc = 135;
11098 break;
11099 case Intrinsic::ppc_altivec_vcmpnezb:
11100 CompareOpc = 263;
11101 break;
11102 case Intrinsic::ppc_altivec_vcmpnezh:
11103 CompareOpc = 327;
11104 break;
11105 case Intrinsic::ppc_altivec_vcmpnezw:
11106 CompareOpc = 391;
11107 break;
11108 }
11109 else
11110 return false;
11111 break;
11112 case Intrinsic::ppc_altivec_vcmpgefp:
11113 CompareOpc = 454;
11114 break;
11115 case Intrinsic::ppc_altivec_vcmpgtfp:
11116 CompareOpc = 710;
11117 break;
11118 case Intrinsic::ppc_altivec_vcmpgtsb:
11119 CompareOpc = 774;
11120 break;
11121 case Intrinsic::ppc_altivec_vcmpgtsh:
11122 CompareOpc = 838;
11123 break;
11124 case Intrinsic::ppc_altivec_vcmpgtsw:
11125 CompareOpc = 902;
11126 break;
11127 case Intrinsic::ppc_altivec_vcmpgtsd:
11128 if (Subtarget.hasP8Altivec())
11129 CompareOpc = 967;
11130 else
11131 return false;
11132 break;
11133 case Intrinsic::ppc_altivec_vcmpgtub:
11134 CompareOpc = 518;
11135 break;
11136 case Intrinsic::ppc_altivec_vcmpgtuh:
11137 CompareOpc = 582;
11138 break;
11139 case Intrinsic::ppc_altivec_vcmpgtuw:
11140 CompareOpc = 646;
11141 break;
11142 case Intrinsic::ppc_altivec_vcmpgtud:
11143 if (Subtarget.hasP8Altivec())
11144 CompareOpc = 711;
11145 else
11146 return false;
11147 break;
11148 case Intrinsic::ppc_altivec_vcmpequq_p:
11149 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11150 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11151 if (!Subtarget.isISA3_1())
11152 return false;
11153 switch (IntrinsicID) {
11154 default:
11155 llvm_unreachable("Unknown comparison intrinsic.");
11156 case Intrinsic::ppc_altivec_vcmpequq_p:
11157 CompareOpc = 455;
11158 break;
11159 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11160 CompareOpc = 903;
11161 break;
11162 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11163 CompareOpc = 647;
11164 break;
11165 }
11166 isDot = true;
11167 break;
11168 }
11169 return true;
11170}
11171
11172/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11173/// lower, do it, otherwise return null.
11174SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11175 SelectionDAG &DAG) const {
11176 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11177
11178 SDLoc dl(Op);
11179
11180 switch (IntrinsicID) {
11181 case Intrinsic::thread_pointer:
11182 // Reads the thread pointer register, used for __builtin_thread_pointer.
11183 if (Subtarget.isPPC64())
11184 return DAG.getRegister(PPC::X13, MVT::i64);
11185 return DAG.getRegister(PPC::R2, MVT::i32);
11186
11187 case Intrinsic::ppc_rldimi: {
11188 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11189 SDValue Src = Op.getOperand(1);
11190 APInt Mask = Op.getConstantOperandAPInt(4);
11191 if (Mask.isZero())
11192 return Op.getOperand(2);
11193 if (Mask.isAllOnes())
11194 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11195 uint64_t SH = Op.getConstantOperandVal(3);
11196 unsigned MB = 0, ME = 0;
11197 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11198 report_fatal_error("invalid rldimi mask!");
11199 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11200 if (ME < 63 - SH) {
11201 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11202 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11203 } else if (ME > 63 - SH) {
11204 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11205 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11206 }
11207 return SDValue(
11208 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11209 {Op.getOperand(2), Src,
11210 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11211 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11212 0);
11213 }
11214
11215 case Intrinsic::ppc_rlwimi: {
11216 APInt Mask = Op.getConstantOperandAPInt(4);
11217 if (Mask.isZero())
11218 return Op.getOperand(2);
11219 if (Mask.isAllOnes())
11220 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11221 Op.getOperand(3));
11222 unsigned MB = 0, ME = 0;
11223 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11224 report_fatal_error("invalid rlwimi mask!");
11225 return SDValue(DAG.getMachineNode(
11226 PPC::RLWIMI, dl, MVT::i32,
11227 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11228 DAG.getTargetConstant(MB, dl, MVT::i32),
11229 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11230 0);
11231 }
11232
11233 case Intrinsic::ppc_rlwnm: {
11234 if (Op.getConstantOperandVal(3) == 0)
11235 return DAG.getConstant(0, dl, MVT::i32);
11236 unsigned MB = 0, ME = 0;
11237 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11238 report_fatal_error("invalid rlwnm mask!");
11239 return SDValue(
11240 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11241 {Op.getOperand(1), Op.getOperand(2),
11242 DAG.getTargetConstant(MB, dl, MVT::i32),
11243 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11244 0);
11245 }
11246
11247 case Intrinsic::ppc_mma_disassemble_acc: {
11248 if (Subtarget.isISAFuture()) {
11249 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11250 SDValue WideVec =
11251 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11252 Op.getOperand(1)),
11253 0);
11255 SDValue Value = SDValue(WideVec.getNode(), 0);
11256 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11257
11258 SDValue Extract;
11259 Extract = DAG.getNode(
11260 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11261 Subtarget.isLittleEndian() ? Value2 : Value,
11262 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11263 dl, getPointerTy(DAG.getDataLayout())));
11264 RetOps.push_back(Extract);
11265 Extract = DAG.getNode(
11266 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11267 Subtarget.isLittleEndian() ? Value2 : Value,
11268 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11269 dl, getPointerTy(DAG.getDataLayout())));
11270 RetOps.push_back(Extract);
11271 Extract = DAG.getNode(
11272 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11273 Subtarget.isLittleEndian() ? Value : Value2,
11274 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11275 dl, getPointerTy(DAG.getDataLayout())));
11276 RetOps.push_back(Extract);
11277 Extract = DAG.getNode(
11278 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11279 Subtarget.isLittleEndian() ? Value : Value2,
11280 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11281 dl, getPointerTy(DAG.getDataLayout())));
11282 RetOps.push_back(Extract);
11283 return DAG.getMergeValues(RetOps, dl);
11284 }
11285 [[fallthrough]];
11286 }
11287 case Intrinsic::ppc_vsx_disassemble_pair: {
11288 int NumVecs = 2;
11289 SDValue WideVec = Op.getOperand(1);
11290 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11291 NumVecs = 4;
11292 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11293 }
11295 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11296 SDValue Extract = DAG.getNode(
11297 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11298 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11299 : VecNo,
11300 dl, getPointerTy(DAG.getDataLayout())));
11301 RetOps.push_back(Extract);
11302 }
11303 return DAG.getMergeValues(RetOps, dl);
11304 }
11305
11306 case Intrinsic::ppc_mma_build_dmr: {
11309 for (int i = 1; i < 9; i += 2) {
11310 SDValue Hi = Op.getOperand(i);
11311 SDValue Lo = Op.getOperand(i + 1);
11312 if (Hi->getOpcode() == ISD::LOAD)
11313 Chains.push_back(Hi.getValue(1));
11314 if (Lo->getOpcode() == ISD::LOAD)
11315 Chains.push_back(Lo.getValue(1));
11316 Pairs.push_back(
11317 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11318 }
11319 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11320 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11321 return DAG.getMergeValues({Value, TF}, dl);
11322 }
11323
11324 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11325 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11326 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11327 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11328 "Specify P of 0 or 1 for lower or upper 512 bytes");
11329 unsigned HiLo = Idx->getSExtValue();
11330 unsigned Opcode;
11331 unsigned Subx;
11332 if (HiLo == 0) {
11333 Opcode = PPC::DMXXEXTFDMR512;
11334 Subx = PPC::sub_wacc_lo;
11335 } else {
11336 Opcode = PPC::DMXXEXTFDMR512_HI;
11337 Subx = PPC::sub_wacc_hi;
11338 }
11339 SDValue Subreg(
11340 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11341 Op.getOperand(1),
11342 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11343 0);
11344 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11345 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11346 }
11347
11348 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11349 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11350 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11351 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11352 "Specify a dmr row pair 0-3");
11353 unsigned IdxVal = Idx->getSExtValue();
11354 unsigned Subx;
11355 switch (IdxVal) {
11356 case 0:
11357 Subx = PPC::sub_dmrrowp0;
11358 break;
11359 case 1:
11360 Subx = PPC::sub_dmrrowp1;
11361 break;
11362 case 2:
11363 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11364 break;
11365 case 3:
11366 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11367 break;
11368 }
11369 SDValue Subreg(
11370 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11371 Op.getOperand(1),
11372 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11373 0);
11374 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11375 return SDValue(
11376 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11377 0);
11378 }
11379
11380 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11381 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11382 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11383 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11384 "Specify P of 0 or 1 for lower or upper 512 bytes");
11385 unsigned HiLo = Idx->getSExtValue();
11386 unsigned Opcode;
11387 unsigned Subx;
11388 if (HiLo == 0) {
11389 Opcode = PPC::DMXXINSTDMR512;
11390 Subx = PPC::sub_wacc_lo;
11391 } else {
11392 Opcode = PPC::DMXXINSTDMR512_HI;
11393 Subx = PPC::sub_wacc_hi;
11394 }
11395 SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
11396 SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
11397 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11398 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11399 Op.getOperand(1), Wacc, SubReg),
11400 0);
11401 }
11402
11403 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11404 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11405 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11406 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11407 "Specify a dmr row pair 0-3");
11408 unsigned IdxVal = Idx->getSExtValue();
11409 unsigned Subx;
11410 switch (IdxVal) {
11411 case 0:
11412 Subx = PPC::sub_dmrrowp0;
11413 break;
11414 case 1:
11415 Subx = PPC::sub_dmrrowp1;
11416 break;
11417 case 2:
11418 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11419 break;
11420 case 3:
11421 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11422 break;
11423 }
11424 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11425 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11426 SDValue Ops[] = {Op.getOperand(2), P};
11427 SDValue DMRRowp = SDValue(
11428 DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
11429 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11430 Op.getOperand(1), DMRRowp, SubReg),
11431 0);
11432 }
11433
11434 case Intrinsic::ppc_mma_xxmfacc:
11435 case Intrinsic::ppc_mma_xxmtacc: {
11436 // Allow pre-isa-future subtargets to lower as normal.
11437 if (!Subtarget.isISAFuture())
11438 return SDValue();
11439 // The intrinsics for xxmtacc and xxmfacc take one argument of
11440 // type v512i1, for future cpu the corresponding wacc instruction
11441 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11442 // the need to produce the xxm[t|f]acc.
11443 SDValue WideVec = Op.getOperand(1);
11444 DAG.ReplaceAllUsesWith(Op, WideVec);
11445 return SDValue();
11446 }
11447
11448 case Intrinsic::ppc_unpack_longdouble: {
11449 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11450 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11451 "Argument of long double unpack must be 0 or 1!");
11452 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11453 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11454 Idx->getValueType(0)));
11455 }
11456
11457 case Intrinsic::ppc_compare_exp_lt:
11458 case Intrinsic::ppc_compare_exp_gt:
11459 case Intrinsic::ppc_compare_exp_eq:
11460 case Intrinsic::ppc_compare_exp_uo: {
11461 unsigned Pred;
11462 switch (IntrinsicID) {
11463 case Intrinsic::ppc_compare_exp_lt:
11464 Pred = PPC::PRED_LT;
11465 break;
11466 case Intrinsic::ppc_compare_exp_gt:
11467 Pred = PPC::PRED_GT;
11468 break;
11469 case Intrinsic::ppc_compare_exp_eq:
11470 Pred = PPC::PRED_EQ;
11471 break;
11472 case Intrinsic::ppc_compare_exp_uo:
11473 Pred = PPC::PRED_UN;
11474 break;
11475 }
11476 return SDValue(
11477 DAG.getMachineNode(
11478 PPC::SELECT_CC_I4, dl, MVT::i32,
11479 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11480 Op.getOperand(1), Op.getOperand(2)),
11481 0),
11482 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11483 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11484 0);
11485 }
11486 case Intrinsic::ppc_test_data_class: {
11487 EVT OpVT = Op.getOperand(1).getValueType();
11488 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11489 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11490 : PPC::XSTSTDCSP);
11491 return SDValue(
11492 DAG.getMachineNode(
11493 PPC::SELECT_CC_I4, dl, MVT::i32,
11494 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11495 Op.getOperand(1)),
11496 0),
11497 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11498 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11499 0);
11500 }
11501 case Intrinsic::ppc_fnmsub: {
11502 EVT VT = Op.getOperand(1).getValueType();
11503 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11504 return DAG.getNode(
11505 ISD::FNEG, dl, VT,
11506 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11507 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11508 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11509 Op.getOperand(2), Op.getOperand(3));
11510 }
11511 case Intrinsic::ppc_convert_f128_to_ppcf128:
11512 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11513 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11514 ? RTLIB::CONVERT_PPCF128_F128
11515 : RTLIB::CONVERT_F128_PPCF128;
11516 MakeLibCallOptions CallOptions;
11517 std::pair<SDValue, SDValue> Result =
11518 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11519 dl, SDValue());
11520 return Result.first;
11521 }
11522 case Intrinsic::ppc_maxfe:
11523 case Intrinsic::ppc_maxfl:
11524 case Intrinsic::ppc_maxfs:
11525 case Intrinsic::ppc_minfe:
11526 case Intrinsic::ppc_minfl:
11527 case Intrinsic::ppc_minfs: {
11528 EVT VT = Op.getValueType();
11529 assert(
11530 all_of(Op->ops().drop_front(4),
11531 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11532 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11533 (void)VT;
11535 if (IntrinsicID == Intrinsic::ppc_minfe ||
11536 IntrinsicID == Intrinsic::ppc_minfl ||
11537 IntrinsicID == Intrinsic::ppc_minfs)
11538 CC = ISD::SETLT;
11539 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11540 SDValue Res = Op.getOperand(I);
11541 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11542 Res =
11543 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11544 }
11545 return Res;
11546 }
11547 }
11548
11549 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11550 // opcode number of the comparison.
11551 int CompareOpc;
11552 bool isDot;
11553 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11554 return SDValue(); // Don't custom lower most intrinsics.
11555
11556 // If this is a non-dot comparison, make the VCMP node and we are done.
11557 if (!isDot) {
11558 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11559 Op.getOperand(1), Op.getOperand(2),
11560 DAG.getConstant(CompareOpc, dl, MVT::i32));
11561 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11562 }
11563
11564 // Create the PPCISD altivec 'dot' comparison node.
11565 SDValue Ops[] = {
11566 Op.getOperand(2), // LHS
11567 Op.getOperand(3), // RHS
11568 DAG.getConstant(CompareOpc, dl, MVT::i32)
11569 };
11570 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11571 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11572
11573 // Unpack the result based on how the target uses it.
11574 unsigned BitNo; // Bit # of CR6.
11575 bool InvertBit; // Invert result?
11576 unsigned Bitx;
11577 unsigned SetOp;
11578 switch (Op.getConstantOperandVal(1)) {
11579 default: // Can't happen, don't crash on invalid number though.
11580 case 0: // Return the value of the EQ bit of CR6.
11581 BitNo = 0;
11582 InvertBit = false;
11583 Bitx = PPC::sub_eq;
11584 SetOp = PPCISD::SETBC;
11585 break;
11586 case 1: // Return the inverted value of the EQ bit of CR6.
11587 BitNo = 0;
11588 InvertBit = true;
11589 Bitx = PPC::sub_eq;
11590 SetOp = PPCISD::SETBCR;
11591 break;
11592 case 2: // Return the value of the LT bit of CR6.
11593 BitNo = 2;
11594 InvertBit = false;
11595 Bitx = PPC::sub_lt;
11596 SetOp = PPCISD::SETBC;
11597 break;
11598 case 3: // Return the inverted value of the LT bit of CR6.
11599 BitNo = 2;
11600 InvertBit = true;
11601 Bitx = PPC::sub_lt;
11602 SetOp = PPCISD::SETBCR;
11603 break;
11604 }
11605
11606 SDValue GlueOp = CompNode.getValue(1);
11607 if (Subtarget.isISA3_1()) {
11608 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11609 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11610 SDValue CRBit =
11611 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11612 CR6Reg, SubRegIdx, GlueOp),
11613 0);
11614 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11615 }
11616
11617 // Now that we have the comparison, emit a copy from the CR to a GPR.
11618 // This is flagged to the above dot comparison.
11619 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11620 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11621
11622 // Shift the bit into the low position.
11623 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11624 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11625 // Isolate the bit.
11626 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11627 DAG.getConstant(1, dl, MVT::i32));
11628
11629 // If we are supposed to, toggle the bit.
11630 if (InvertBit)
11631 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11632 DAG.getConstant(1, dl, MVT::i32));
11633 return Flags;
11634}
11635
11636SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11637 SelectionDAG &DAG) const {
11638 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11639 // the beginning of the argument list.
11640 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11641 SDLoc DL(Op);
11642 switch (Op.getConstantOperandVal(ArgStart)) {
11643 case Intrinsic::ppc_cfence: {
11644 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11645 SDValue Val = Op.getOperand(ArgStart + 1);
11646 EVT Ty = Val.getValueType();
11647 if (Ty == MVT::i128) {
11648 // FIXME: Testing one of two paired registers is sufficient to guarantee
11649 // ordering?
11650 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11651 }
11652 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11653 return SDValue(
11654 DAG.getMachineNode(
11655 Opcode, DL, MVT::Other,
11656 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11657 Op.getOperand(0)),
11658 0);
11659 }
11660 case Intrinsic::ppc_mma_disassemble_dmr: {
11661 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11662 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11663 }
11664 default:
11665 break;
11666 }
11667 return SDValue();
11668}
11669
11670// Lower scalar BSWAP64 to xxbrd.
11671SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11672 SDLoc dl(Op);
11673 if (!Subtarget.isPPC64())
11674 return Op;
11675 // MTVSRDD
11676 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11677 Op.getOperand(0));
11678 // XXBRD
11679 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11680 // MFVSRD
11681 int VectorIndex = 0;
11682 if (Subtarget.isLittleEndian())
11683 VectorIndex = 1;
11684 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11685 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11686 return Op;
11687}
11688
11689// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11690// compared to a value that is atomically loaded (atomic loads zero-extend).
11691SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11692 SelectionDAG &DAG) const {
11693 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11694 "Expecting an atomic compare-and-swap here.");
11695 SDLoc dl(Op);
11696 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11697 EVT MemVT = AtomicNode->getMemoryVT();
11698 if (MemVT.getSizeInBits() >= 32)
11699 return Op;
11700
11701 SDValue CmpOp = Op.getOperand(2);
11702 // If this is already correctly zero-extended, leave it alone.
11703 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11704 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11705 return Op;
11706
11707 // Clear the high bits of the compare operand.
11708 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11709 SDValue NewCmpOp =
11710 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11711 DAG.getConstant(MaskVal, dl, MVT::i32));
11712
11713 // Replace the existing compare operand with the properly zero-extended one.
11715 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11716 Ops.push_back(AtomicNode->getOperand(i));
11717 Ops[2] = NewCmpOp;
11718 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11719 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11720 auto NodeTy =
11721 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11722 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11723}
11724
11725SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11726 SelectionDAG &DAG) const {
11727 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11728 EVT MemVT = N->getMemoryVT();
11729 assert(MemVT.getSimpleVT() == MVT::i128 &&
11730 "Expect quadword atomic operations");
11731 SDLoc dl(N);
11732 unsigned Opc = N->getOpcode();
11733 switch (Opc) {
11734 case ISD::ATOMIC_LOAD: {
11735 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11736 // lowered to ppc instructions by pattern matching instruction selector.
11737 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11739 N->getOperand(0),
11740 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11741 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11742 Ops.push_back(N->getOperand(I));
11743 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11744 Ops, MemVT, N->getMemOperand());
11745 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11746 SDValue ValHi =
11747 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11748 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11749 DAG.getConstant(64, dl, MVT::i32));
11750 SDValue Val =
11751 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11752 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11753 {Val, LoadedVal.getValue(2)});
11754 }
11755 case ISD::ATOMIC_STORE: {
11756 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11757 // lowered to ppc instructions by pattern matching instruction selector.
11758 SDVTList Tys = DAG.getVTList(MVT::Other);
11760 N->getOperand(0),
11761 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11762 SDValue Val = N->getOperand(1);
11763 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11764 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11765 DAG.getConstant(64, dl, MVT::i32));
11766 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11767 Ops.push_back(ValLo);
11768 Ops.push_back(ValHi);
11769 Ops.push_back(N->getOperand(2));
11770 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11771 N->getMemOperand());
11772 }
11773 default:
11774 llvm_unreachable("Unexpected atomic opcode");
11775 }
11776}
11777
11779 SelectionDAG &DAG,
11780 const PPCSubtarget &Subtarget) {
11781 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11782
11783 enum DataClassMask {
11784 DC_NAN = 1 << 6,
11785 DC_NEG_INF = 1 << 4,
11786 DC_POS_INF = 1 << 5,
11787 DC_NEG_ZERO = 1 << 2,
11788 DC_POS_ZERO = 1 << 3,
11789 DC_NEG_SUBNORM = 1,
11790 DC_POS_SUBNORM = 1 << 1,
11791 };
11792
11793 EVT VT = Op.getValueType();
11794
11795 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11796 : VT == MVT::f64 ? PPC::XSTSTDCDP
11797 : PPC::XSTSTDCSP;
11798
11799 if (Mask == fcAllFlags)
11800 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11801 if (Mask == 0)
11802 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11803
11804 // When it's cheaper or necessary to test reverse flags.
11805 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11806 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11807 return DAG.getNOT(Dl, Rev, MVT::i1);
11808 }
11809
11810 // Power doesn't support testing whether a value is 'normal'. Test the rest
11811 // first, and test if it's 'not not-normal' with expected sign.
11812 if (Mask & fcNormal) {
11813 SDValue Rev(DAG.getMachineNode(
11814 TestOp, Dl, MVT::i32,
11815 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11816 DC_NEG_ZERO | DC_POS_ZERO |
11817 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11818 Dl, MVT::i32),
11819 Op),
11820 0);
11821 // Sign are stored in CR bit 0, result are in CR bit 2.
11822 SDValue Sign(
11823 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11824 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11825 0);
11826 SDValue Normal(DAG.getNOT(
11827 Dl,
11829 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11830 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11831 0),
11832 MVT::i1));
11833 if (Mask & fcPosNormal)
11834 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11835 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11836 if (Mask == fcPosNormal || Mask == fcNegNormal)
11837 return Result;
11838
11839 return DAG.getNode(
11840 ISD::OR, Dl, MVT::i1,
11841 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11842 }
11843
11844 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11845 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11846 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11847 bool IsQuiet = Mask & fcQNan;
11848 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11849
11850 // Quietness is determined by the first bit in fraction field.
11851 uint64_t QuietMask = 0;
11852 SDValue HighWord;
11853 if (VT == MVT::f128) {
11854 HighWord = DAG.getNode(
11855 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11856 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11857 QuietMask = 0x8000;
11858 } else if (VT == MVT::f64) {
11859 if (Subtarget.isPPC64()) {
11860 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11861 DAG.getBitcast(MVT::i64, Op),
11862 DAG.getConstant(1, Dl, MVT::i32));
11863 } else {
11864 SDValue Vec = DAG.getBitcast(
11865 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11866 HighWord = DAG.getNode(
11867 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11868 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11869 }
11870 QuietMask = 0x80000;
11871 } else if (VT == MVT::f32) {
11872 HighWord = DAG.getBitcast(MVT::i32, Op);
11873 QuietMask = 0x400000;
11874 }
11875 SDValue NanRes = DAG.getSetCC(
11876 Dl, MVT::i1,
11877 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11878 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11879 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11880 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11881 if (Mask == fcQNan || Mask == fcSNan)
11882 return NanRes;
11883
11884 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11885 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11886 NanRes);
11887 }
11888
11889 unsigned NativeMask = 0;
11890 if ((Mask & fcNan) == fcNan)
11891 NativeMask |= DC_NAN;
11892 if (Mask & fcNegInf)
11893 NativeMask |= DC_NEG_INF;
11894 if (Mask & fcPosInf)
11895 NativeMask |= DC_POS_INF;
11896 if (Mask & fcNegZero)
11897 NativeMask |= DC_NEG_ZERO;
11898 if (Mask & fcPosZero)
11899 NativeMask |= DC_POS_ZERO;
11900 if (Mask & fcNegSubnormal)
11901 NativeMask |= DC_NEG_SUBNORM;
11902 if (Mask & fcPosSubnormal)
11903 NativeMask |= DC_POS_SUBNORM;
11904 return SDValue(
11905 DAG.getMachineNode(
11906 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11908 TestOp, Dl, MVT::i32,
11909 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11910 0),
11911 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11912 0);
11913}
11914
11915SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11916 SelectionDAG &DAG) const {
11917 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11918 SDValue LHS = Op.getOperand(0);
11919 uint64_t RHSC = Op.getConstantOperandVal(1);
11920 SDLoc Dl(Op);
11921 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11922 if (LHS.getValueType() == MVT::ppcf128) {
11923 // The higher part determines the value class.
11924 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11925 DAG.getConstant(1, Dl, MVT::i32));
11926 }
11927
11928 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11929}
11930
11931// Adjust the length value for a load/store with length to account for the
11932// instructions requiring a left justified length, and for non-byte element
11933// types requiring scaling by element size.
11934static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11935 SelectionDAG &DAG) {
11936 SDLoc dl(Val);
11937 EVT VT = Val->getValueType(0);
11938 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11939 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11940 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11941 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11942}
11943
11944SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11945 auto VPLD = cast<VPLoadSDNode>(Op);
11946 bool Future = Subtarget.isISAFuture();
11947 SDLoc dl(Op);
11948 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11949 "Mask predication not supported");
11950 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11951 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11952 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11953 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11954 Len = AdjustLength(Len, EltBits, !Future, DAG);
11955 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11956 VPLD->getOperand(1), Len};
11957 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11958 SDValue VPL =
11960 VPLD->getMemoryVT(), VPLD->getMemOperand());
11961 return VPL;
11962}
11963
11964SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11965 auto VPST = cast<VPStoreSDNode>(Op);
11966 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11967 "Mask predication not supported");
11968 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11969 SDLoc dl(Op);
11970 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11971 unsigned EltBits =
11972 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11973 bool Future = Subtarget.isISAFuture();
11974 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11975 Len = AdjustLength(Len, EltBits, !Future, DAG);
11976 SDValue Ops[] = {
11977 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11978 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11979 VPST->getOperand(2), Len};
11980 SDVTList Tys = DAG.getVTList(MVT::Other);
11981 SDValue VPS =
11983 VPST->getMemoryVT(), VPST->getMemOperand());
11984 return VPS;
11985}
11986
11987SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11988 SelectionDAG &DAG) const {
11989 SDLoc dl(Op);
11990
11991 MachineFunction &MF = DAG.getMachineFunction();
11992 SDValue Op0 = Op.getOperand(0);
11993 EVT ValVT = Op0.getValueType();
11994 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11995 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11996 int64_t IntVal = Op.getConstantOperandVal(0);
11997 if (IntVal >= -16 && IntVal <= 15)
11998 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11999 dl);
12000 }
12001
12002 ReuseLoadInfo RLI;
12003 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
12004 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
12005 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
12006 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
12007
12008 MachineMemOperand *MMO =
12010 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
12011 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
12013 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
12014 MVT::i32, MMO);
12015 if (RLI.ResChain)
12016 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
12017 return Bits.getValue(0);
12018 }
12019
12020 // Create a stack slot that is 16-byte aligned.
12021 MachineFrameInfo &MFI = MF.getFrameInfo();
12022 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
12023 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12024 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
12025
12026 SDValue Val = Op0;
12027 // P10 hardware store forwarding requires that a single store contains all
12028 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
12029 // to avoid load hit store on P10 when running binaries compiled for older
12030 // processors by generating two mergeable scalar stores to forward with the
12031 // vector load.
12032 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
12033 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
12034 ValVT.getSizeInBits() <= 64) {
12035 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
12036 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
12037 SDValue ShiftBy = DAG.getConstant(
12038 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
12039 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
12040 SDValue Plus8 =
12041 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
12042 SDValue Store2 =
12043 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
12044 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
12045 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
12046 MachinePointerInfo());
12047 }
12048
12049 // Store the input value into Value#0 of the stack slot.
12050 SDValue Store =
12051 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
12052 // Load it out.
12053 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
12054}
12055
12056SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12057 SelectionDAG &DAG) const {
12058 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12059 "Should only be called for ISD::INSERT_VECTOR_ELT");
12060
12061 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
12062
12063 EVT VT = Op.getValueType();
12064 SDLoc dl(Op);
12065 SDValue V1 = Op.getOperand(0);
12066 SDValue V2 = Op.getOperand(1);
12067
12068 if (VT == MVT::v2f64 && C)
12069 return Op;
12070
12071 if (Subtarget.hasP9Vector()) {
12072 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12073 // because on P10, it allows this specific insert_vector_elt load pattern to
12074 // utilize the refactored load and store infrastructure in order to exploit
12075 // prefixed loads.
12076 // On targets with inexpensive direct moves (Power9 and up), a
12077 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12078 // load since a single precision load will involve conversion to double
12079 // precision on the load followed by another conversion to single precision.
12080 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12081 (isa<LoadSDNode>(V2))) {
12082 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12083 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12084 SDValue InsVecElt =
12085 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12086 BitcastLoad, Op.getOperand(2));
12087 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12088 }
12089 }
12090
12091 if (Subtarget.isISA3_1()) {
12092 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12093 return SDValue();
12094 // On P10, we have legal lowering for constant and variable indices for
12095 // all vectors.
12096 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12097 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12098 return Op;
12099 }
12100
12101 // Before P10, we have legal lowering for constant indices but not for
12102 // variable ones.
12103 if (!C)
12104 return SDValue();
12105
12106 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12107 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12108 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12109 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12110 unsigned InsertAtElement = C->getZExtValue();
12111 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12112 if (Subtarget.isLittleEndian()) {
12113 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12114 }
12115 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12116 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12117 }
12118 return Op;
12119}
12120
12121SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12122 SelectionDAG &DAG) const {
12123 SDLoc dl(Op);
12124 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12125 SDValue LoadChain = LN->getChain();
12126 SDValue BasePtr = LN->getBasePtr();
12127 EVT VT = Op.getValueType();
12128 bool IsV1024i1 = VT == MVT::v1024i1;
12129 bool IsV2048i1 = VT == MVT::v2048i1;
12130
12131 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12132 // Dense Math dmr pair registers, respectively.
12133 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12134 (void)IsV2048i1;
12135 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12136 "Dense Math support required.");
12137 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12138
12140 SmallVector<SDValue, 8> LoadChains;
12141
12142 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12143 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12144 MachineMemOperand *MMO = LN->getMemOperand();
12145 unsigned NumVecs = VT.getSizeInBits() / 256;
12146 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12147 MachineMemOperand *NewMMO =
12148 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12149 if (Idx > 0) {
12150 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12151 DAG.getConstant(32, dl, BasePtr.getValueType()));
12152 LoadOps[2] = BasePtr;
12153 }
12155 DAG.getVTList(MVT::v256i1, MVT::Other),
12156 LoadOps, MVT::v256i1, NewMMO);
12157 LoadChains.push_back(Ld.getValue(1));
12158 Loads.push_back(Ld);
12159 }
12160
12161 if (Subtarget.isLittleEndian()) {
12162 std::reverse(Loads.begin(), Loads.end());
12163 std::reverse(LoadChains.begin(), LoadChains.end());
12164 }
12165
12166 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12167 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[0],
12168 Loads[1]),
12169 0);
12170 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12171 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12172 Loads[2], Loads[3]),
12173 0);
12174 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12175 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12176 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
12177
12178 SDValue Value =
12179 SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12180
12181 if (IsV1024i1) {
12182 return DAG.getMergeValues({Value, TF}, dl);
12183 }
12184
12185 // Handle Loads for V2048i1 which represents a dmr pair.
12186 SDValue DmrPValue;
12187 SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
12188 Loads[4], Loads[5]),
12189 0);
12190 SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12191 Loads[6], Loads[7]),
12192 0);
12193 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
12194 SDValue Dmr1Value = SDValue(
12195 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
12196
12197 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12198 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12199
12200 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12201 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12202
12203 DmrPValue = SDValue(
12204 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12205
12206 return DAG.getMergeValues({DmrPValue, TF}, dl);
12207}
12208
12209SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12210 const SDLoc &dl,
12211 SelectionDAG &DAG) const {
12212 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
12213 Pairs[1]),
12214 0);
12215 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12216 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12217 Pairs[2], Pairs[3]),
12218 0);
12219 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12220 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12221
12222 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12223 {RC, Lo, LoSub, Hi, HiSub}),
12224 0);
12225}
12226
12227SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12228 SelectionDAG &DAG) const {
12229 SDLoc dl(Op);
12230 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12231 SDValue LoadChain = LN->getChain();
12232 SDValue BasePtr = LN->getBasePtr();
12233 EVT VT = Op.getValueType();
12234
12235 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12236 return LowerDMFVectorLoad(Op, DAG);
12237
12238 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12239 return Op;
12240
12241 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12242 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12243 // 2 or 4 vsx registers.
12244 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12245 "Type unsupported without MMA");
12246 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12247 "Type unsupported without paired vector support");
12248 Align Alignment = LN->getAlign();
12250 SmallVector<SDValue, 4> LoadChains;
12251 unsigned NumVecs = VT.getSizeInBits() / 128;
12252 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12253 SDValue Load =
12254 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12255 LN->getPointerInfo().getWithOffset(Idx * 16),
12256 commonAlignment(Alignment, Idx * 16),
12257 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12258 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12259 DAG.getConstant(16, dl, BasePtr.getValueType()));
12260 Loads.push_back(Load);
12261 LoadChains.push_back(Load.getValue(1));
12262 }
12263 if (Subtarget.isLittleEndian()) {
12264 std::reverse(Loads.begin(), Loads.end());
12265 std::reverse(LoadChains.begin(), LoadChains.end());
12266 }
12267 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12268 SDValue Value =
12269 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12270 dl, VT, Loads);
12271 SDValue RetOps[] = {Value, TF};
12272 return DAG.getMergeValues(RetOps, dl);
12273}
12274
12275SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12276 SelectionDAG &DAG) const {
12277
12278 SDLoc dl(Op);
12279 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12280 SDValue StoreChain = SN->getChain();
12281 SDValue BasePtr = SN->getBasePtr();
12284 EVT VT = SN->getValue().getValueType();
12285 bool IsV1024i1 = VT == MVT::v1024i1;
12286 bool IsV2048i1 = VT == MVT::v2048i1;
12287
12288 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12289 // Dense Math dmr pair registers, respectively.
12290 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12291 (void)IsV2048i1;
12292 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12293 "Dense Math support required.");
12294 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12295
12296 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12297 if (IsV1024i1) {
12299 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12300 Op.getOperand(1),
12301 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12302 0);
12304 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12305 Op.getOperand(1),
12306 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12307 0);
12308 MachineSDNode *ExtNode =
12309 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12310 Values.push_back(SDValue(ExtNode, 0));
12311 Values.push_back(SDValue(ExtNode, 1));
12312 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12313 Values.push_back(SDValue(ExtNode, 0));
12314 Values.push_back(SDValue(ExtNode, 1));
12315 } else {
12316 // This corresponds to v2048i1 which represents a dmr pair.
12317 SDValue Dmr0(
12318 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12319 Op.getOperand(1),
12320 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12321 0);
12322
12323 SDValue Dmr1(
12324 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12325 Op.getOperand(1),
12326 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12327 0);
12328
12329 SDValue Dmr0Lo(DAG.getMachineNode(
12330 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12331 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12332 0);
12333
12334 SDValue Dmr0Hi(DAG.getMachineNode(
12335 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12336 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12337 0);
12338
12339 SDValue Dmr1Lo(DAG.getMachineNode(
12340 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12341 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12342 0);
12343
12344 SDValue Dmr1Hi(DAG.getMachineNode(
12345 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12346 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12347 0);
12348
12349 MachineSDNode *ExtNode =
12350 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12351 Values.push_back(SDValue(ExtNode, 0));
12352 Values.push_back(SDValue(ExtNode, 1));
12353 ExtNode =
12354 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12355 Values.push_back(SDValue(ExtNode, 0));
12356 Values.push_back(SDValue(ExtNode, 1));
12357 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12358 Values.push_back(SDValue(ExtNode, 0));
12359 Values.push_back(SDValue(ExtNode, 1));
12360 ExtNode =
12361 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12362 Values.push_back(SDValue(ExtNode, 0));
12363 Values.push_back(SDValue(ExtNode, 1));
12364 }
12365
12366 if (Subtarget.isLittleEndian())
12367 std::reverse(Values.begin(), Values.end());
12368
12369 SDVTList Tys = DAG.getVTList(MVT::Other);
12371 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12372 Values[0], BasePtr};
12373 MachineMemOperand *MMO = SN->getMemOperand();
12374 unsigned NumVecs = VT.getSizeInBits() / 256;
12375 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12376 MachineMemOperand *NewMMO =
12377 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12378 if (Idx > 0) {
12379 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12380 DAG.getConstant(32, dl, BasePtr.getValueType()));
12381 Ops[3] = BasePtr;
12382 }
12383 Ops[2] = Values[Idx];
12385 MVT::v256i1, NewMMO);
12386 Stores.push_back(St);
12387 }
12388
12389 SDValue TF = DAG.getTokenFactor(dl, Stores);
12390 return TF;
12391}
12392
12393SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12394 SelectionDAG &DAG) const {
12395 SDLoc dl(Op);
12396 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12397 SDValue StoreChain = SN->getChain();
12398 SDValue BasePtr = SN->getBasePtr();
12399 SDValue Value = SN->getValue();
12400 SDValue Value2 = SN->getValue();
12401 EVT StoreVT = Value.getValueType();
12402
12403 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12404 return LowerDMFVectorStore(Op, DAG);
12405
12406 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12407 return Op;
12408
12409 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12410 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12411 // underlying registers individually.
12412 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12413 "Type unsupported without MMA");
12414 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12415 "Type unsupported without paired vector support");
12416 Align Alignment = SN->getAlign();
12418 unsigned NumVecs = 2;
12419 if (StoreVT == MVT::v512i1) {
12420 if (Subtarget.isISAFuture()) {
12421 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12422 MachineSDNode *ExtNode = DAG.getMachineNode(
12423 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12424
12425 Value = SDValue(ExtNode, 0);
12426 Value2 = SDValue(ExtNode, 1);
12427 } else
12428 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12429 NumVecs = 4;
12430 }
12431 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12432 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12433 SDValue Elt;
12434 if (Subtarget.isISAFuture()) {
12435 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12436 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12437 Idx > 1 ? Value2 : Value,
12438 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12439 } else
12440 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12441 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12442
12443 SDValue Store =
12444 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12445 SN->getPointerInfo().getWithOffset(Idx * 16),
12446 commonAlignment(Alignment, Idx * 16),
12447 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12448 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12449 DAG.getConstant(16, dl, BasePtr.getValueType()));
12450 Stores.push_back(Store);
12451 }
12452 SDValue TF = DAG.getTokenFactor(dl, Stores);
12453 return TF;
12454}
12455
12456SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12457 SDLoc dl(Op);
12458 if (Op.getValueType() == MVT::v4i32) {
12459 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12460
12461 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12462 // +16 as shift amt.
12463 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12464 SDValue RHSSwap = // = vrlw RHS, 16
12465 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12466
12467 // Shrinkify inputs to v8i16.
12468 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12469 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12470 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12471
12472 // Low parts multiplied together, generating 32-bit results (we ignore the
12473 // top parts).
12474 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12475 LHS, RHS, DAG, dl, MVT::v4i32);
12476
12477 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12478 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12479 // Shift the high parts up 16 bits.
12480 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12481 Neg16, DAG, dl);
12482 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12483 } else if (Op.getValueType() == MVT::v16i8) {
12484 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12485 bool isLittleEndian = Subtarget.isLittleEndian();
12486
12487 // Multiply the even 8-bit parts, producing 16-bit sums.
12488 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12489 LHS, RHS, DAG, dl, MVT::v8i16);
12490 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12491
12492 // Multiply the odd 8-bit parts, producing 16-bit sums.
12493 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12494 LHS, RHS, DAG, dl, MVT::v8i16);
12495 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12496
12497 // Merge the results together. Because vmuleub and vmuloub are
12498 // instructions with a big-endian bias, we must reverse the
12499 // element numbering and reverse the meaning of "odd" and "even"
12500 // when generating little endian code.
12501 int Ops[16];
12502 for (unsigned i = 0; i != 8; ++i) {
12503 if (isLittleEndian) {
12504 Ops[i*2 ] = 2*i;
12505 Ops[i*2+1] = 2*i+16;
12506 } else {
12507 Ops[i*2 ] = 2*i+1;
12508 Ops[i*2+1] = 2*i+1+16;
12509 }
12510 }
12511 if (isLittleEndian)
12512 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12513 else
12514 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12515 } else {
12516 llvm_unreachable("Unknown mul to lower!");
12517 }
12518}
12519
12520SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12521 bool IsStrict = Op->isStrictFPOpcode();
12522 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12523 !Subtarget.hasP9Vector())
12524 return SDValue();
12525
12526 return Op;
12527}
12528
12529// Custom lowering for fpext vf32 to v2f64
12530SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12531
12532 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12533 "Should only be called for ISD::FP_EXTEND");
12534
12535 // FIXME: handle extends from half precision float vectors on P9.
12536 // We only want to custom lower an extend from v2f32 to v2f64.
12537 if (Op.getValueType() != MVT::v2f64 ||
12538 Op.getOperand(0).getValueType() != MVT::v2f32)
12539 return SDValue();
12540
12541 SDLoc dl(Op);
12542 SDValue Op0 = Op.getOperand(0);
12543
12544 switch (Op0.getOpcode()) {
12545 default:
12546 return SDValue();
12548 assert(Op0.getNumOperands() == 2 &&
12550 "Node should have 2 operands with second one being a constant!");
12551
12552 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12553 return SDValue();
12554
12555 // Custom lower is only done for high or low doubleword.
12556 int Idx = Op0.getConstantOperandVal(1);
12557 if (Idx % 2 != 0)
12558 return SDValue();
12559
12560 // Since input is v4f32, at this point Idx is either 0 or 2.
12561 // Shift to get the doubleword position we want.
12562 int DWord = Idx >> 1;
12563
12564 // High and low word positions are different on little endian.
12565 if (Subtarget.isLittleEndian())
12566 DWord ^= 0x1;
12567
12568 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12569 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12570 }
12571 case ISD::FADD:
12572 case ISD::FMUL:
12573 case ISD::FSUB: {
12574 SDValue NewLoad[2];
12575 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12576 // Ensure both input are loads.
12577 SDValue LdOp = Op0.getOperand(i);
12578 if (LdOp.getOpcode() != ISD::LOAD)
12579 return SDValue();
12580 // Generate new load node.
12581 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12582 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12583 NewLoad[i] = DAG.getMemIntrinsicNode(
12584 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12585 LD->getMemoryVT(), LD->getMemOperand());
12586 }
12587 SDValue NewOp =
12588 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12589 NewLoad[1], Op0.getNode()->getFlags());
12590 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12591 DAG.getConstant(0, dl, MVT::i32));
12592 }
12593 case ISD::LOAD: {
12594 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12595 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12596 SDValue NewLd = DAG.getMemIntrinsicNode(
12597 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12598 LD->getMemoryVT(), LD->getMemOperand());
12599 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12600 DAG.getConstant(0, dl, MVT::i32));
12601 }
12602 }
12603 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12604}
12605
12607 SelectionDAG &DAG,
12608 const PPCSubtarget &STI) {
12609 SDLoc DL(Value);
12610 if (STI.useCRBits())
12611 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12612 DAG.getConstant(1, DL, SumType),
12613 DAG.getConstant(0, DL, SumType));
12614 else
12615 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12616 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12617 Value, DAG.getAllOnesConstant(DL, SumType));
12618 return Sum.getValue(1);
12619}
12620
12622 EVT CarryType, SelectionDAG &DAG,
12623 const PPCSubtarget &STI) {
12624 SDLoc DL(Flag);
12625 SDValue Zero = DAG.getConstant(0, DL, SumType);
12626 SDValue Carry = DAG.getNode(
12627 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12628 if (STI.useCRBits())
12629 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12630 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12631}
12632
12633SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12634
12635 SDLoc DL(Op);
12636 SDNode *N = Op.getNode();
12637 EVT VT = N->getValueType(0);
12638 EVT CarryType = N->getValueType(1);
12639 unsigned Opc = N->getOpcode();
12640 bool IsAdd = Opc == ISD::UADDO;
12641 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12642 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12643 N->getOperand(0), N->getOperand(1));
12644 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12645 DAG, Subtarget);
12646 if (!IsAdd)
12647 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12648 DAG.getConstant(1UL, DL, CarryType));
12649 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12650}
12651
12652SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12653 SelectionDAG &DAG) const {
12654 SDLoc DL(Op);
12655 SDNode *N = Op.getNode();
12656 unsigned Opc = N->getOpcode();
12657 EVT VT = N->getValueType(0);
12658 EVT CarryType = N->getValueType(1);
12659 SDValue CarryOp = N->getOperand(2);
12660 bool IsAdd = Opc == ISD::UADDO_CARRY;
12661 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12662 if (!IsAdd)
12663 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12664 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12665 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12666 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12667 Op.getOperand(0), Op.getOperand(1), CarryOp);
12668 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12669 Subtarget);
12670 if (!IsAdd)
12671 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12672 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12673 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12674}
12675
12676SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12677
12678 SDLoc dl(Op);
12679 SDValue LHS = Op.getOperand(0);
12680 SDValue RHS = Op.getOperand(1);
12681 EVT VT = Op.getNode()->getValueType(0);
12682
12683 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12684
12685 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12686 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12687
12688 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12689
12690 SDValue Overflow =
12691 DAG.getNode(ISD::SRL, dl, VT, And,
12692 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12693
12694 SDValue OverflowTrunc =
12695 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12696
12697 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12698}
12699
12700// Lower unsigned 3-way compare producing -1/0/1.
12701SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12702 SDLoc DL(Op);
12703 SDValue A = DAG.getFreeze(Op.getOperand(0));
12704 SDValue B = DAG.getFreeze(Op.getOperand(1));
12705 EVT OpVT = A.getValueType(); // operand type
12706 EVT ResVT = Op.getValueType(); // result type
12707
12708 // First compute diff = A - B (will become subf).
12709 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12710
12711 // Generate B - A using SUBC to capture carry.
12712 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12713 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12714 SDValue CA0 = SubC.getValue(1);
12715
12716 // t2 = A - B + CA0 using SUBE.
12717 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12718 SDValue CA1 = SubE1.getValue(1);
12719
12720 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12721 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12722
12723 // Extract the first result and truncate to result type if needed
12724 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12725}
12726
12727/// LowerOperation - Provide custom lowering hooks for some operations.
12728///
12730 switch (Op.getOpcode()) {
12731 default:
12732 llvm_unreachable("Wasn't expecting to be able to lower this!");
12733 case ISD::FPOW: return lowerPow(Op, DAG);
12734 case ISD::FSIN: return lowerSin(Op, DAG);
12735 case ISD::FCOS: return lowerCos(Op, DAG);
12736 case ISD::FLOG: return lowerLog(Op, DAG);
12737 case ISD::FLOG10: return lowerLog10(Op, DAG);
12738 case ISD::FEXP: return lowerExp(Op, DAG);
12739 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12740 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12741 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12742 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12743 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12744 case ISD::STRICT_FSETCC:
12746 case ISD::SETCC: return LowerSETCC(Op, DAG);
12747 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12748 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12749 case ISD::SSUBO:
12750 return LowerSSUBO(Op, DAG);
12751
12752 case ISD::INLINEASM:
12753 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12754 // Variable argument lowering.
12755 case ISD::VASTART: return LowerVASTART(Op, DAG);
12756 case ISD::VAARG: return LowerVAARG(Op, DAG);
12757 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12758
12759 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12760 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12761 case ISD::GET_DYNAMIC_AREA_OFFSET:
12762 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12763
12764 // Exception handling lowering.
12765 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12766 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12767 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12768
12769 case ISD::LOAD: return LowerLOAD(Op, DAG);
12770 case ISD::STORE: return LowerSTORE(Op, DAG);
12771 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12772 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12775 case ISD::FP_TO_UINT:
12776 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12779 case ISD::UINT_TO_FP:
12780 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12781 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12782 case ISD::SET_ROUNDING:
12783 return LowerSET_ROUNDING(Op, DAG);
12784
12785 // Lower 64-bit shifts.
12786 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12787 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12788 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12789
12790 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12791 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12792
12793 // Vector-related lowering.
12794 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12795 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12796 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12797 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12798 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12799 case ISD::MUL: return LowerMUL(Op, DAG);
12800 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12802 case ISD::FP_ROUND:
12803 return LowerFP_ROUND(Op, DAG);
12804 case ISD::ROTL: return LowerROTL(Op, DAG);
12805
12806 // For counter-based loop handling.
12807 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12808
12809 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12810
12811 // Frame & Return address.
12812 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12813 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12814
12816 return LowerINTRINSIC_VOID(Op, DAG);
12817 case ISD::BSWAP:
12818 return LowerBSWAP(Op, DAG);
12819 case ISD::ATOMIC_CMP_SWAP:
12820 return LowerATOMIC_CMP_SWAP(Op, DAG);
12821 case ISD::ATOMIC_STORE:
12822 return LowerATOMIC_LOAD_STORE(Op, DAG);
12823 case ISD::IS_FPCLASS:
12824 return LowerIS_FPCLASS(Op, DAG);
12825 case ISD::UADDO:
12826 case ISD::USUBO:
12827 return LowerADDSUBO(Op, DAG);
12828 case ISD::UADDO_CARRY:
12829 case ISD::USUBO_CARRY:
12830 return LowerADDSUBO_CARRY(Op, DAG);
12831 case ISD::UCMP:
12832 return LowerUCMP(Op, DAG);
12833 case ISD::STRICT_LRINT:
12834 case ISD::STRICT_LLRINT:
12835 case ISD::STRICT_LROUND:
12838 if (Op->getFlags().hasNoFPExcept())
12839 return Op;
12840 return SDValue();
12841 case ISD::VP_LOAD:
12842 return LowerVP_LOAD(Op, DAG);
12843 case ISD::VP_STORE:
12844 return LowerVP_STORE(Op, DAG);
12845 }
12846}
12847
12850 SelectionDAG &DAG) const {
12851 SDLoc dl(N);
12852 switch (N->getOpcode()) {
12853 default:
12854 llvm_unreachable("Do not know how to custom type legalize this operation!");
12855 case ISD::ATOMIC_LOAD: {
12856 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12857 Results.push_back(Res);
12858 Results.push_back(Res.getValue(1));
12859 break;
12860 }
12861 case ISD::READCYCLECOUNTER: {
12862 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12863 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12864
12865 Results.push_back(
12866 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12867 Results.push_back(RTB.getValue(2));
12868 break;
12869 }
12871 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12872 break;
12873
12874 assert(N->getValueType(0) == MVT::i1 &&
12875 "Unexpected result type for CTR decrement intrinsic");
12876 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12877 N->getValueType(0));
12878 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12879 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12880 N->getOperand(1));
12881
12882 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12883 Results.push_back(NewInt.getValue(1));
12884 break;
12885 }
12887 switch (N->getConstantOperandVal(0)) {
12888 case Intrinsic::ppc_pack_longdouble:
12889 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12890 N->getOperand(2), N->getOperand(1)));
12891 break;
12892 case Intrinsic::ppc_maxfe:
12893 case Intrinsic::ppc_minfe:
12894 case Intrinsic::ppc_fnmsub:
12895 case Intrinsic::ppc_convert_f128_to_ppcf128:
12896 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12897 break;
12898 }
12899 break;
12900 }
12901 case ISD::VAARG: {
12902 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12903 return;
12904
12905 EVT VT = N->getValueType(0);
12906
12907 if (VT == MVT::i64) {
12908 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12909
12910 Results.push_back(NewNode);
12911 Results.push_back(NewNode.getValue(1));
12912 }
12913 return;
12914 }
12917 case ISD::FP_TO_SINT:
12918 case ISD::FP_TO_UINT: {
12919 // LowerFP_TO_INT() can only handle f32 and f64.
12920 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12921 MVT::ppcf128)
12922 return;
12923 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12924 Results.push_back(LoweredValue);
12925 if (N->isStrictFPOpcode())
12926 Results.push_back(LoweredValue.getValue(1));
12927 return;
12928 }
12929 case ISD::TRUNCATE: {
12930 if (!N->getValueType(0).isVector())
12931 return;
12932 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12933 if (Lowered)
12934 Results.push_back(Lowered);
12935 return;
12936 }
12937 case ISD::SCALAR_TO_VECTOR: {
12938 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12939 if (Lowered)
12940 Results.push_back(Lowered);
12941 return;
12942 }
12943 case ISD::FSHL:
12944 case ISD::FSHR:
12945 // Don't handle funnel shifts here.
12946 return;
12947 case ISD::BITCAST:
12948 // Don't handle bitcast here.
12949 return;
12950 case ISD::FP_EXTEND:
12951 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12952 if (Lowered)
12953 Results.push_back(Lowered);
12954 return;
12955 }
12956}
12957
12958//===----------------------------------------------------------------------===//
12959// Other Lowering Code
12960//===----------------------------------------------------------------------===//
12961
12963 return Builder.CreateIntrinsic(Id, {});
12964}
12965
12967 Value *Addr,
12968 AtomicOrdering Ord) const {
12969 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12970
12971 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12972 "Only 8/16/32/64-bit atomic loads supported");
12973 Intrinsic::ID IntID;
12974 switch (SZ) {
12975 default:
12976 llvm_unreachable("Unexpected PrimitiveSize");
12977 case 8:
12978 IntID = Intrinsic::ppc_lbarx;
12979 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12980 break;
12981 case 16:
12982 IntID = Intrinsic::ppc_lharx;
12983 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12984 break;
12985 case 32:
12986 IntID = Intrinsic::ppc_lwarx;
12987 break;
12988 case 64:
12989 IntID = Intrinsic::ppc_ldarx;
12990 break;
12991 }
12992 Value *Call =
12993 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12994
12995 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12996}
12997
12998// Perform a store-conditional operation to Addr. Return the status of the
12999// store. This should be 0 if the store succeeded, non-zero otherwise.
13001 Value *Val, Value *Addr,
13002 AtomicOrdering Ord) const {
13003 Type *Ty = Val->getType();
13004 unsigned SZ = Ty->getPrimitiveSizeInBits();
13005
13006 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
13007 "Only 8/16/32/64-bit atomic loads supported");
13008 Intrinsic::ID IntID;
13009 switch (SZ) {
13010 default:
13011 llvm_unreachable("Unexpected PrimitiveSize");
13012 case 8:
13013 IntID = Intrinsic::ppc_stbcx;
13014 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13015 break;
13016 case 16:
13017 IntID = Intrinsic::ppc_sthcx;
13018 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13019 break;
13020 case 32:
13021 IntID = Intrinsic::ppc_stwcx;
13022 break;
13023 case 64:
13024 IntID = Intrinsic::ppc_stdcx;
13025 break;
13026 }
13027
13028 if (SZ == 8 || SZ == 16)
13029 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
13030
13031 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
13032 /*FMFSource=*/nullptr, "stcx");
13033 return Builder.CreateXor(Call, Builder.getInt32(1));
13034}
13035
13036// The mappings for emitLeading/TrailingFence is taken from
13037// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13039 Instruction *Inst,
13040 AtomicOrdering Ord) const {
13042 return callIntrinsic(Builder, Intrinsic::ppc_sync);
13043 if (isReleaseOrStronger(Ord))
13044 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13045 return nullptr;
13046}
13047
13049 Instruction *Inst,
13050 AtomicOrdering Ord) const {
13051 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
13052 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13053 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13054 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13055 if (isa<LoadInst>(Inst))
13056 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
13057 {Inst});
13058 // FIXME: Can use isync for rmw operation.
13059 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13060 }
13061 return nullptr;
13062}
13063
13066 unsigned AtomicSize,
13067 unsigned BinOpcode,
13068 unsigned CmpOpcode,
13069 unsigned CmpPred) const {
13070 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13071 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13072
13073 auto LoadMnemonic = PPC::LDARX;
13074 auto StoreMnemonic = PPC::STDCX;
13075 switch (AtomicSize) {
13076 default:
13077 llvm_unreachable("Unexpected size of atomic entity");
13078 case 1:
13079 LoadMnemonic = PPC::LBARX;
13080 StoreMnemonic = PPC::STBCX;
13081 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13082 break;
13083 case 2:
13084 LoadMnemonic = PPC::LHARX;
13085 StoreMnemonic = PPC::STHCX;
13086 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13087 break;
13088 case 4:
13089 LoadMnemonic = PPC::LWARX;
13090 StoreMnemonic = PPC::STWCX;
13091 break;
13092 case 8:
13093 LoadMnemonic = PPC::LDARX;
13094 StoreMnemonic = PPC::STDCX;
13095 break;
13096 }
13097
13098 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13099 MachineFunction *F = BB->getParent();
13101
13102 Register dest = MI.getOperand(0).getReg();
13103 Register ptrA = MI.getOperand(1).getReg();
13104 Register ptrB = MI.getOperand(2).getReg();
13105 Register incr = MI.getOperand(3).getReg();
13106 DebugLoc dl = MI.getDebugLoc();
13107
13108 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13109 MachineBasicBlock *loop2MBB =
13110 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13111 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13112 F->insert(It, loopMBB);
13113 if (CmpOpcode)
13114 F->insert(It, loop2MBB);
13115 F->insert(It, exitMBB);
13116 exitMBB->splice(exitMBB->begin(), BB,
13117 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13119
13120 MachineRegisterInfo &RegInfo = F->getRegInfo();
13121 Register TmpReg = (!BinOpcode) ? incr :
13122 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13123 : &PPC::GPRCRegClass);
13124
13125 // thisMBB:
13126 // ...
13127 // fallthrough --> loopMBB
13128 BB->addSuccessor(loopMBB);
13129
13130 // loopMBB:
13131 // l[wd]arx dest, ptr
13132 // add r0, dest, incr
13133 // st[wd]cx. r0, ptr
13134 // bne- loopMBB
13135 // fallthrough --> exitMBB
13136
13137 // For max/min...
13138 // loopMBB:
13139 // l[wd]arx dest, ptr
13140 // cmpl?[wd] dest, incr
13141 // bgt exitMBB
13142 // loop2MBB:
13143 // st[wd]cx. dest, ptr
13144 // bne- loopMBB
13145 // fallthrough --> exitMBB
13146
13147 BB = loopMBB;
13148 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13149 .addReg(ptrA).addReg(ptrB);
13150 if (BinOpcode)
13151 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13152 if (CmpOpcode) {
13153 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13154 // Signed comparisons of byte or halfword values must be sign-extended.
13155 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13156 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13157 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13158 ExtReg).addReg(dest);
13159 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13160 } else
13161 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13162
13163 BuildMI(BB, dl, TII->get(PPC::BCC))
13164 .addImm(CmpPred)
13165 .addReg(CrReg)
13166 .addMBB(exitMBB);
13167 BB->addSuccessor(loop2MBB);
13168 BB->addSuccessor(exitMBB);
13169 BB = loop2MBB;
13170 }
13171 BuildMI(BB, dl, TII->get(StoreMnemonic))
13172 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13173 BuildMI(BB, dl, TII->get(PPC::BCC))
13175 .addReg(PPC::CR0)
13176 .addMBB(loopMBB);
13177 BB->addSuccessor(loopMBB);
13178 BB->addSuccessor(exitMBB);
13179
13180 // exitMBB:
13181 // ...
13182 BB = exitMBB;
13183 return BB;
13184}
13185
13187 switch(MI.getOpcode()) {
13188 default:
13189 return false;
13190 case PPC::COPY:
13191 return TII->isSignExtended(MI.getOperand(1).getReg(),
13192 &MI.getMF()->getRegInfo());
13193 case PPC::LHA:
13194 case PPC::LHA8:
13195 case PPC::LHAU:
13196 case PPC::LHAU8:
13197 case PPC::LHAUX:
13198 case PPC::LHAUX8:
13199 case PPC::LHAX:
13200 case PPC::LHAX8:
13201 case PPC::LWA:
13202 case PPC::LWAUX:
13203 case PPC::LWAX:
13204 case PPC::LWAX_32:
13205 case PPC::LWA_32:
13206 case PPC::PLHA:
13207 case PPC::PLHA8:
13208 case PPC::PLHA8pc:
13209 case PPC::PLHApc:
13210 case PPC::PLWA:
13211 case PPC::PLWA8:
13212 case PPC::PLWA8pc:
13213 case PPC::PLWApc:
13214 case PPC::EXTSB:
13215 case PPC::EXTSB8:
13216 case PPC::EXTSB8_32_64:
13217 case PPC::EXTSB8_rec:
13218 case PPC::EXTSB_rec:
13219 case PPC::EXTSH:
13220 case PPC::EXTSH8:
13221 case PPC::EXTSH8_32_64:
13222 case PPC::EXTSH8_rec:
13223 case PPC::EXTSH_rec:
13224 case PPC::EXTSW:
13225 case PPC::EXTSWSLI:
13226 case PPC::EXTSWSLI_32_64:
13227 case PPC::EXTSWSLI_32_64_rec:
13228 case PPC::EXTSWSLI_rec:
13229 case PPC::EXTSW_32:
13230 case PPC::EXTSW_32_64:
13231 case PPC::EXTSW_32_64_rec:
13232 case PPC::EXTSW_rec:
13233 case PPC::SRAW:
13234 case PPC::SRAWI:
13235 case PPC::SRAWI_rec:
13236 case PPC::SRAW_rec:
13237 return true;
13238 }
13239 return false;
13240}
13241
13244 bool is8bit, // operation
13245 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13246 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13247 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13248
13249 // If this is a signed comparison and the value being compared is not known
13250 // to be sign extended, sign extend it here.
13251 DebugLoc dl = MI.getDebugLoc();
13252 MachineFunction *F = BB->getParent();
13253 MachineRegisterInfo &RegInfo = F->getRegInfo();
13254 Register incr = MI.getOperand(3).getReg();
13255 bool IsSignExtended =
13256 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13257
13258 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13259 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13260 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13261 .addReg(MI.getOperand(3).getReg());
13262 MI.getOperand(3).setReg(ValueReg);
13263 incr = ValueReg;
13264 }
13265 // If we support part-word atomic mnemonics, just use them
13266 if (Subtarget.hasPartwordAtomics())
13267 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13268 CmpPred);
13269
13270 // In 64 bit mode we have to use 64 bits for addresses, even though the
13271 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13272 // registers without caring whether they're 32 or 64, but here we're
13273 // doing actual arithmetic on the addresses.
13274 bool is64bit = Subtarget.isPPC64();
13275 bool isLittleEndian = Subtarget.isLittleEndian();
13276 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13277
13278 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13280
13281 Register dest = MI.getOperand(0).getReg();
13282 Register ptrA = MI.getOperand(1).getReg();
13283 Register ptrB = MI.getOperand(2).getReg();
13284
13285 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13286 MachineBasicBlock *loop2MBB =
13287 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13288 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13289 F->insert(It, loopMBB);
13290 if (CmpOpcode)
13291 F->insert(It, loop2MBB);
13292 F->insert(It, exitMBB);
13293 exitMBB->splice(exitMBB->begin(), BB,
13294 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13296
13297 const TargetRegisterClass *RC =
13298 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13299 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13300
13301 Register PtrReg = RegInfo.createVirtualRegister(RC);
13302 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13303 Register ShiftReg =
13304 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13305 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13306 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13307 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13308 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13309 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13310 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13311 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13312 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13313 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13314 Register Ptr1Reg;
13315 Register TmpReg =
13316 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13317
13318 // thisMBB:
13319 // ...
13320 // fallthrough --> loopMBB
13321 BB->addSuccessor(loopMBB);
13322
13323 // The 4-byte load must be aligned, while a char or short may be
13324 // anywhere in the word. Hence all this nasty bookkeeping code.
13325 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13326 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13327 // xori shift, shift1, 24 [16]
13328 // rlwinm ptr, ptr1, 0, 0, 29
13329 // slw incr2, incr, shift
13330 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13331 // slw mask, mask2, shift
13332 // loopMBB:
13333 // lwarx tmpDest, ptr
13334 // add tmp, tmpDest, incr2
13335 // andc tmp2, tmpDest, mask
13336 // and tmp3, tmp, mask
13337 // or tmp4, tmp3, tmp2
13338 // stwcx. tmp4, ptr
13339 // bne- loopMBB
13340 // fallthrough --> exitMBB
13341 // srw SrwDest, tmpDest, shift
13342 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13343 if (ptrA != ZeroReg) {
13344 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13345 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13346 .addReg(ptrA)
13347 .addReg(ptrB);
13348 } else {
13349 Ptr1Reg = ptrB;
13350 }
13351 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13352 // mode.
13353 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13354 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13355 .addImm(3)
13356 .addImm(27)
13357 .addImm(is8bit ? 28 : 27);
13358 if (!isLittleEndian)
13359 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13360 .addReg(Shift1Reg)
13361 .addImm(is8bit ? 24 : 16);
13362 if (is64bit)
13363 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13364 .addReg(Ptr1Reg)
13365 .addImm(0)
13366 .addImm(61);
13367 else
13368 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13369 .addReg(Ptr1Reg)
13370 .addImm(0)
13371 .addImm(0)
13372 .addImm(29);
13373 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13374 if (is8bit)
13375 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13376 else {
13377 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13378 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13379 .addReg(Mask3Reg)
13380 .addImm(65535);
13381 }
13382 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13383 .addReg(Mask2Reg)
13384 .addReg(ShiftReg);
13385
13386 BB = loopMBB;
13387 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13388 .addReg(ZeroReg)
13389 .addReg(PtrReg);
13390 if (BinOpcode)
13391 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13392 .addReg(Incr2Reg)
13393 .addReg(TmpDestReg);
13394 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13395 .addReg(TmpDestReg)
13396 .addReg(MaskReg);
13397 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13398 if (CmpOpcode) {
13399 // For unsigned comparisons, we can directly compare the shifted values.
13400 // For signed comparisons we shift and sign extend.
13401 Register SReg = RegInfo.createVirtualRegister(GPRC);
13402 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13403 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13404 .addReg(TmpDestReg)
13405 .addReg(MaskReg);
13406 unsigned ValueReg = SReg;
13407 unsigned CmpReg = Incr2Reg;
13408 if (CmpOpcode == PPC::CMPW) {
13409 ValueReg = RegInfo.createVirtualRegister(GPRC);
13410 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13411 .addReg(SReg)
13412 .addReg(ShiftReg);
13413 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13414 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13415 .addReg(ValueReg);
13416 ValueReg = ValueSReg;
13417 CmpReg = incr;
13418 }
13419 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13420 BuildMI(BB, dl, TII->get(PPC::BCC))
13421 .addImm(CmpPred)
13422 .addReg(CrReg)
13423 .addMBB(exitMBB);
13424 BB->addSuccessor(loop2MBB);
13425 BB->addSuccessor(exitMBB);
13426 BB = loop2MBB;
13427 }
13428 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13429 BuildMI(BB, dl, TII->get(PPC::STWCX))
13430 .addReg(Tmp4Reg)
13431 .addReg(ZeroReg)
13432 .addReg(PtrReg);
13433 BuildMI(BB, dl, TII->get(PPC::BCC))
13435 .addReg(PPC::CR0)
13436 .addMBB(loopMBB);
13437 BB->addSuccessor(loopMBB);
13438 BB->addSuccessor(exitMBB);
13439
13440 // exitMBB:
13441 // ...
13442 BB = exitMBB;
13443 // Since the shift amount is not a constant, we need to clear
13444 // the upper bits with a separate RLWINM.
13445 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13446 .addReg(SrwDestReg)
13447 .addImm(0)
13448 .addImm(is8bit ? 24 : 16)
13449 .addImm(31);
13450 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13451 .addReg(TmpDestReg)
13452 .addReg(ShiftReg);
13453 return BB;
13454}
13455
13458 MachineBasicBlock *MBB) const {
13459 DebugLoc DL = MI.getDebugLoc();
13460 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13461 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13462
13463 MachineFunction *MF = MBB->getParent();
13465
13466 const BasicBlock *BB = MBB->getBasicBlock();
13467 MachineFunction::iterator I = ++MBB->getIterator();
13468
13469 Register DstReg = MI.getOperand(0).getReg();
13470 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13471 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13472 Register mainDstReg = MRI.createVirtualRegister(RC);
13473 Register restoreDstReg = MRI.createVirtualRegister(RC);
13474
13475 MVT PVT = getPointerTy(MF->getDataLayout());
13476 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13477 "Invalid Pointer Size!");
13478 // For v = setjmp(buf), we generate
13479 //
13480 // thisMBB:
13481 // SjLjSetup mainMBB
13482 // bl mainMBB
13483 // v_restore = 1
13484 // b sinkMBB
13485 //
13486 // mainMBB:
13487 // buf[LabelOffset] = LR
13488 // v_main = 0
13489 //
13490 // sinkMBB:
13491 // v = phi(main, restore)
13492 //
13493
13494 MachineBasicBlock *thisMBB = MBB;
13495 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13496 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13497 MF->insert(I, mainMBB);
13498 MF->insert(I, sinkMBB);
13499
13501
13502 // Transfer the remainder of BB and its successor edges to sinkMBB.
13503 sinkMBB->splice(sinkMBB->begin(), MBB,
13504 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13506
13507 // Note that the structure of the jmp_buf used here is not compatible
13508 // with that used by libc, and is not designed to be. Specifically, it
13509 // stores only those 'reserved' registers that LLVM does not otherwise
13510 // understand how to spill. Also, by convention, by the time this
13511 // intrinsic is called, Clang has already stored the frame address in the
13512 // first slot of the buffer and stack address in the third. Following the
13513 // X86 target code, we'll store the jump address in the second slot. We also
13514 // need to save the TOC pointer (R2) to handle jumps between shared
13515 // libraries, and that will be stored in the fourth slot. The thread
13516 // identifier (R13) is not affected.
13517
13518 // thisMBB:
13519 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13520 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13521 const int64_t BPOffset = 4 * PVT.getStoreSize();
13522
13523 // Prepare IP either in reg.
13524 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13525 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13526 Register BufReg = MI.getOperand(1).getReg();
13527
13528 if (Subtarget.is64BitELFABI()) {
13529 setUsesTOCBasePtr(*MBB->getParent());
13530 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13531 .addReg(PPC::X2)
13532 .addImm(TOCOffset)
13533 .addReg(BufReg)
13534 .cloneMemRefs(MI);
13535 }
13536
13537 // Naked functions never have a base pointer, and so we use r1. For all
13538 // other functions, this decision must be delayed until during PEI.
13539 unsigned BaseReg;
13540 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13541 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13542 else
13543 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13544
13545 MIB = BuildMI(*thisMBB, MI, DL,
13546 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13547 .addReg(BaseReg)
13548 .addImm(BPOffset)
13549 .addReg(BufReg)
13550 .cloneMemRefs(MI);
13551
13552 // Setup
13553 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13554 MIB.addRegMask(TRI->getNoPreservedMask());
13555
13556 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13557
13558 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13559 .addMBB(mainMBB);
13560 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13561
13562 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13563 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13564
13565 // mainMBB:
13566 // mainDstReg = 0
13567 MIB =
13568 BuildMI(mainMBB, DL,
13569 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13570
13571 // Store IP
13572 if (Subtarget.isPPC64()) {
13573 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13574 .addReg(LabelReg)
13575 .addImm(LabelOffset)
13576 .addReg(BufReg);
13577 } else {
13578 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13579 .addReg(LabelReg)
13580 .addImm(LabelOffset)
13581 .addReg(BufReg);
13582 }
13583 MIB.cloneMemRefs(MI);
13584
13585 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13586 mainMBB->addSuccessor(sinkMBB);
13587
13588 // sinkMBB:
13589 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13590 TII->get(PPC::PHI), DstReg)
13591 .addReg(mainDstReg).addMBB(mainMBB)
13592 .addReg(restoreDstReg).addMBB(thisMBB);
13593
13594 MI.eraseFromParent();
13595 return sinkMBB;
13596}
13597
13600 MachineBasicBlock *MBB) const {
13601 DebugLoc DL = MI.getDebugLoc();
13602 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13603
13604 MachineFunction *MF = MBB->getParent();
13606
13607 MVT PVT = getPointerTy(MF->getDataLayout());
13608 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13609 "Invalid Pointer Size!");
13610
13611 const TargetRegisterClass *RC =
13612 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13613 Register Tmp = MRI.createVirtualRegister(RC);
13614 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13615 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13616 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13617 unsigned BP =
13618 (PVT == MVT::i64)
13619 ? PPC::X30
13620 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13621 : PPC::R30);
13622
13624
13625 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13626 const int64_t SPOffset = 2 * PVT.getStoreSize();
13627 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13628 const int64_t BPOffset = 4 * PVT.getStoreSize();
13629
13630 Register BufReg = MI.getOperand(0).getReg();
13631
13632 // Reload FP (the jumped-to function may not have had a
13633 // frame pointer, and if so, then its r31 will be restored
13634 // as necessary).
13635 if (PVT == MVT::i64) {
13636 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13637 .addImm(0)
13638 .addReg(BufReg);
13639 } else {
13640 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13641 .addImm(0)
13642 .addReg(BufReg);
13643 }
13644 MIB.cloneMemRefs(MI);
13645
13646 // Reload IP
13647 if (PVT == MVT::i64) {
13648 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13649 .addImm(LabelOffset)
13650 .addReg(BufReg);
13651 } else {
13652 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13653 .addImm(LabelOffset)
13654 .addReg(BufReg);
13655 }
13656 MIB.cloneMemRefs(MI);
13657
13658 // Reload SP
13659 if (PVT == MVT::i64) {
13660 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13661 .addImm(SPOffset)
13662 .addReg(BufReg);
13663 } else {
13664 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13665 .addImm(SPOffset)
13666 .addReg(BufReg);
13667 }
13668 MIB.cloneMemRefs(MI);
13669
13670 // Reload BP
13671 if (PVT == MVT::i64) {
13672 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13673 .addImm(BPOffset)
13674 .addReg(BufReg);
13675 } else {
13676 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13677 .addImm(BPOffset)
13678 .addReg(BufReg);
13679 }
13680 MIB.cloneMemRefs(MI);
13681
13682 // Reload TOC
13683 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13684 setUsesTOCBasePtr(*MBB->getParent());
13685 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13686 .addImm(TOCOffset)
13687 .addReg(BufReg)
13688 .cloneMemRefs(MI);
13689 }
13690
13691 // Jump
13692 BuildMI(*MBB, MI, DL,
13693 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13694 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13695
13696 MI.eraseFromParent();
13697 return MBB;
13698}
13699
13701 // If the function specifically requests inline stack probes, emit them.
13702 if (MF.getFunction().hasFnAttribute("probe-stack"))
13703 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13704 "inline-asm";
13705 return false;
13706}
13707
13709 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13710 unsigned StackAlign = TFI->getStackAlignment();
13711 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13712 "Unexpected stack alignment");
13713 // The default stack probe size is 4096 if the function has no
13714 // stack-probe-size attribute.
13715 const Function &Fn = MF.getFunction();
13716 unsigned StackProbeSize =
13717 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13718 // Round down to the stack alignment.
13719 StackProbeSize &= ~(StackAlign - 1);
13720 return StackProbeSize ? StackProbeSize : StackAlign;
13721}
13722
13723// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13724// into three phases. In the first phase, it uses pseudo instruction
13725// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13726// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13727// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13728// MaxCallFrameSize so that it can calculate correct data area pointer.
13731 MachineBasicBlock *MBB) const {
13732 const bool isPPC64 = Subtarget.isPPC64();
13733 MachineFunction *MF = MBB->getParent();
13734 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13735 DebugLoc DL = MI.getDebugLoc();
13736 const unsigned ProbeSize = getStackProbeSize(*MF);
13737 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13739 // The CFG of probing stack looks as
13740 // +-----+
13741 // | MBB |
13742 // +--+--+
13743 // |
13744 // +----v----+
13745 // +--->+ TestMBB +---+
13746 // | +----+----+ |
13747 // | | |
13748 // | +-----v----+ |
13749 // +---+ BlockMBB | |
13750 // +----------+ |
13751 // |
13752 // +---------+ |
13753 // | TailMBB +<--+
13754 // +---------+
13755 // In MBB, calculate previous frame pointer and final stack pointer.
13756 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13757 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13758 // TailMBB is spliced via \p MI.
13759 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13760 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13761 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13762
13763 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13764 MF->insert(MBBIter, TestMBB);
13765 MF->insert(MBBIter, BlockMBB);
13766 MF->insert(MBBIter, TailMBB);
13767
13768 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13769 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13770
13771 Register DstReg = MI.getOperand(0).getReg();
13772 Register NegSizeReg = MI.getOperand(1).getReg();
13773 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13774 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13775 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13776 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13777
13778 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13779 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13780 // NegSize.
13781 unsigned ProbeOpc;
13782 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13783 ProbeOpc =
13784 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13785 else
13786 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13787 // and NegSizeReg will be allocated in the same phyreg to avoid
13788 // redundant copy when NegSizeReg has only one use which is current MI and
13789 // will be replaced by PREPARE_PROBED_ALLOCA then.
13790 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13791 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13792 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13793 .addDef(ActualNegSizeReg)
13794 .addReg(NegSizeReg)
13795 .add(MI.getOperand(2))
13796 .add(MI.getOperand(3));
13797
13798 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13799 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13800 FinalStackPtr)
13801 .addReg(SPReg)
13802 .addReg(ActualNegSizeReg);
13803
13804 // Materialize a scratch register for update.
13805 int64_t NegProbeSize = -(int64_t)ProbeSize;
13806 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13807 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13808 if (!isInt<16>(NegProbeSize)) {
13809 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13810 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13811 .addImm(NegProbeSize >> 16);
13812 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13813 ScratchReg)
13814 .addReg(TempReg)
13815 .addImm(NegProbeSize & 0xFFFF);
13816 } else
13817 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13818 .addImm(NegProbeSize);
13819
13820 {
13821 // Probing leading residual part.
13822 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13823 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13824 .addReg(ActualNegSizeReg)
13825 .addReg(ScratchReg);
13826 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13827 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13828 .addReg(Div)
13829 .addReg(ScratchReg);
13830 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13831 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13832 .addReg(Mul)
13833 .addReg(ActualNegSizeReg);
13834 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13835 .addReg(FramePointer)
13836 .addReg(SPReg)
13837 .addReg(NegMod);
13838 }
13839
13840 {
13841 // Remaining part should be multiple of ProbeSize.
13842 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13843 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13844 .addReg(SPReg)
13845 .addReg(FinalStackPtr);
13846 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13848 .addReg(CmpResult)
13849 .addMBB(TailMBB);
13850 TestMBB->addSuccessor(BlockMBB);
13851 TestMBB->addSuccessor(TailMBB);
13852 }
13853
13854 {
13855 // Touch the block.
13856 // |P...|P...|P...
13857 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13858 .addReg(FramePointer)
13859 .addReg(SPReg)
13860 .addReg(ScratchReg);
13861 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13862 BlockMBB->addSuccessor(TestMBB);
13863 }
13864
13865 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13866 // DYNAREAOFFSET pseudo instruction to get the future result.
13867 Register MaxCallFrameSizeReg =
13868 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13869 BuildMI(TailMBB, DL,
13870 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13871 MaxCallFrameSizeReg)
13872 .add(MI.getOperand(2))
13873 .add(MI.getOperand(3));
13874 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13875 .addReg(SPReg)
13876 .addReg(MaxCallFrameSizeReg);
13877
13878 // Splice instructions after MI to TailMBB.
13879 TailMBB->splice(TailMBB->end(), MBB,
13880 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13882 MBB->addSuccessor(TestMBB);
13883
13884 // Delete the pseudo instruction.
13885 MI.eraseFromParent();
13886
13887 ++NumDynamicAllocaProbed;
13888 return TailMBB;
13889}
13890
13892 switch (MI.getOpcode()) {
13893 case PPC::SELECT_CC_I4:
13894 case PPC::SELECT_CC_I8:
13895 case PPC::SELECT_CC_F4:
13896 case PPC::SELECT_CC_F8:
13897 case PPC::SELECT_CC_F16:
13898 case PPC::SELECT_CC_VRRC:
13899 case PPC::SELECT_CC_VSFRC:
13900 case PPC::SELECT_CC_VSSRC:
13901 case PPC::SELECT_CC_VSRC:
13902 case PPC::SELECT_CC_SPE4:
13903 case PPC::SELECT_CC_SPE:
13904 return true;
13905 default:
13906 return false;
13907 }
13908}
13909
13910static bool IsSelect(MachineInstr &MI) {
13911 switch (MI.getOpcode()) {
13912 case PPC::SELECT_I4:
13913 case PPC::SELECT_I8:
13914 case PPC::SELECT_F4:
13915 case PPC::SELECT_F8:
13916 case PPC::SELECT_F16:
13917 case PPC::SELECT_SPE:
13918 case PPC::SELECT_SPE4:
13919 case PPC::SELECT_VRRC:
13920 case PPC::SELECT_VSFRC:
13921 case PPC::SELECT_VSSRC:
13922 case PPC::SELECT_VSRC:
13923 return true;
13924 default:
13925 return false;
13926 }
13927}
13928
13931 MachineBasicBlock *BB) const {
13932 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13933 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13934 if (Subtarget.is64BitELFABI() &&
13935 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13936 !Subtarget.isUsingPCRelativeCalls()) {
13937 // Call lowering should have added an r2 operand to indicate a dependence
13938 // on the TOC base pointer value. It can't however, because there is no
13939 // way to mark the dependence as implicit there, and so the stackmap code
13940 // will confuse it with a regular operand. Instead, add the dependence
13941 // here.
13942 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13943 }
13944
13945 return emitPatchPoint(MI, BB);
13946 }
13947
13948 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13949 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13950 return emitEHSjLjSetJmp(MI, BB);
13951 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13952 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13953 return emitEHSjLjLongJmp(MI, BB);
13954 }
13955
13956 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13957
13958 // To "insert" these instructions we actually have to insert their
13959 // control-flow patterns.
13960 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13962
13963 MachineFunction *F = BB->getParent();
13964 MachineRegisterInfo &MRI = F->getRegInfo();
13965
13966 if (Subtarget.hasISEL() &&
13967 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13968 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13969 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13971 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13972 MI.getOpcode() == PPC::SELECT_CC_I8)
13973 Cond.push_back(MI.getOperand(4));
13974 else
13976 Cond.push_back(MI.getOperand(1));
13977
13978 DebugLoc dl = MI.getDebugLoc();
13979 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13980 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13981 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13982 // The incoming instruction knows the destination vreg to set, the
13983 // condition code register to branch on, the true/false values to
13984 // select between, and a branch opcode to use.
13985
13986 // thisMBB:
13987 // ...
13988 // TrueVal = ...
13989 // cmpTY ccX, r1, r2
13990 // bCC sinkMBB
13991 // fallthrough --> copy0MBB
13992 MachineBasicBlock *thisMBB = BB;
13993 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13994 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13995 DebugLoc dl = MI.getDebugLoc();
13996 F->insert(It, copy0MBB);
13997 F->insert(It, sinkMBB);
13998
13999 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
14000 copy0MBB->addLiveIn(PPC::CARRY);
14001 sinkMBB->addLiveIn(PPC::CARRY);
14002 }
14003
14004 // Set the call frame size on entry to the new basic blocks.
14005 // See https://reviews.llvm.org/D156113.
14006 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
14007 copy0MBB->setCallFrameSize(CallFrameSize);
14008 sinkMBB->setCallFrameSize(CallFrameSize);
14009
14010 // Transfer the remainder of BB and its successor edges to sinkMBB.
14011 sinkMBB->splice(sinkMBB->begin(), BB,
14012 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14014
14015 // Next, add the true and fallthrough blocks as its successors.
14016 BB->addSuccessor(copy0MBB);
14017 BB->addSuccessor(sinkMBB);
14018
14019 if (IsSelect(MI)) {
14020 BuildMI(BB, dl, TII->get(PPC::BC))
14021 .addReg(MI.getOperand(1).getReg())
14022 .addMBB(sinkMBB);
14023 } else {
14024 unsigned SelectPred = MI.getOperand(4).getImm();
14025 BuildMI(BB, dl, TII->get(PPC::BCC))
14026 .addImm(SelectPred)
14027 .addReg(MI.getOperand(1).getReg())
14028 .addMBB(sinkMBB);
14029 }
14030
14031 // copy0MBB:
14032 // %FalseValue = ...
14033 // # fallthrough to sinkMBB
14034 BB = copy0MBB;
14035
14036 // Update machine-CFG edges
14037 BB->addSuccessor(sinkMBB);
14038
14039 // sinkMBB:
14040 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
14041 // ...
14042 BB = sinkMBB;
14043 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
14044 .addReg(MI.getOperand(3).getReg())
14045 .addMBB(copy0MBB)
14046 .addReg(MI.getOperand(2).getReg())
14047 .addMBB(thisMBB);
14048 } else if (MI.getOpcode() == PPC::ReadTB) {
14049 // To read the 64-bit time-base register on a 32-bit target, we read the
14050 // two halves. Should the counter have wrapped while it was being read, we
14051 // need to try again.
14052 // ...
14053 // readLoop:
14054 // mfspr Rx,TBU # load from TBU
14055 // mfspr Ry,TB # load from TB
14056 // mfspr Rz,TBU # load from TBU
14057 // cmpw crX,Rx,Rz # check if 'old'='new'
14058 // bne readLoop # branch if they're not equal
14059 // ...
14060
14061 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
14062 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14063 DebugLoc dl = MI.getDebugLoc();
14064 F->insert(It, readMBB);
14065 F->insert(It, sinkMBB);
14066
14067 // Transfer the remainder of BB and its successor edges to sinkMBB.
14068 sinkMBB->splice(sinkMBB->begin(), BB,
14069 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14071
14072 BB->addSuccessor(readMBB);
14073 BB = readMBB;
14074
14075 MachineRegisterInfo &RegInfo = F->getRegInfo();
14076 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
14077 Register LoReg = MI.getOperand(0).getReg();
14078 Register HiReg = MI.getOperand(1).getReg();
14079
14080 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
14081 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
14082 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
14083
14084 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14085
14086 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
14087 .addReg(HiReg)
14088 .addReg(ReadAgainReg);
14089 BuildMI(BB, dl, TII->get(PPC::BCC))
14091 .addReg(CmpReg)
14092 .addMBB(readMBB);
14093
14094 BB->addSuccessor(readMBB);
14095 BB->addSuccessor(sinkMBB);
14096 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
14097 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
14098 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
14099 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
14100 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
14101 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
14102 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
14103 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
14104
14105 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
14106 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
14107 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
14108 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
14109 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
14110 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
14111 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
14112 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
14113
14114 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
14115 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
14116 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
14117 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
14118 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14119 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
14120 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14121 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
14122
14123 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14124 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
14125 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14126 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
14127 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14128 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14129 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14130 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14131
14132 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14133 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14134 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14135 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14136 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14137 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14138 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14139 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14140
14141 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14142 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14143 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14144 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14145 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14146 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14147 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14148 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14149
14150 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14151 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14152 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14153 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14154 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14155 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14156 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14157 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14158
14159 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14160 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14161 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14162 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14163 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14164 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14165 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14166 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14167
14168 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14169 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14170 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14171 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14172 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14173 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14174 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14175 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14176
14177 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14178 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14179 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14180 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14181 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14182 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14183 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14184 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14185
14186 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14187 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14188 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14189 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14190 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14191 BB = EmitAtomicBinary(MI, BB, 4, 0);
14192 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14193 BB = EmitAtomicBinary(MI, BB, 8, 0);
14194 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14195 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14196 (Subtarget.hasPartwordAtomics() &&
14197 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14198 (Subtarget.hasPartwordAtomics() &&
14199 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14200 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14201
14202 auto LoadMnemonic = PPC::LDARX;
14203 auto StoreMnemonic = PPC::STDCX;
14204 switch (MI.getOpcode()) {
14205 default:
14206 llvm_unreachable("Compare and swap of unknown size");
14207 case PPC::ATOMIC_CMP_SWAP_I8:
14208 LoadMnemonic = PPC::LBARX;
14209 StoreMnemonic = PPC::STBCX;
14210 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14211 break;
14212 case PPC::ATOMIC_CMP_SWAP_I16:
14213 LoadMnemonic = PPC::LHARX;
14214 StoreMnemonic = PPC::STHCX;
14215 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14216 break;
14217 case PPC::ATOMIC_CMP_SWAP_I32:
14218 LoadMnemonic = PPC::LWARX;
14219 StoreMnemonic = PPC::STWCX;
14220 break;
14221 case PPC::ATOMIC_CMP_SWAP_I64:
14222 LoadMnemonic = PPC::LDARX;
14223 StoreMnemonic = PPC::STDCX;
14224 break;
14225 }
14226 MachineRegisterInfo &RegInfo = F->getRegInfo();
14227 Register dest = MI.getOperand(0).getReg();
14228 Register ptrA = MI.getOperand(1).getReg();
14229 Register ptrB = MI.getOperand(2).getReg();
14230 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14231 Register oldval = MI.getOperand(3).getReg();
14232 Register newval = MI.getOperand(4).getReg();
14233 DebugLoc dl = MI.getDebugLoc();
14234
14235 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14236 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14237 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14238 F->insert(It, loop1MBB);
14239 F->insert(It, loop2MBB);
14240 F->insert(It, exitMBB);
14241 exitMBB->splice(exitMBB->begin(), BB,
14242 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14244
14245 // thisMBB:
14246 // ...
14247 // fallthrough --> loopMBB
14248 BB->addSuccessor(loop1MBB);
14249
14250 // loop1MBB:
14251 // l[bhwd]arx dest, ptr
14252 // cmp[wd] dest, oldval
14253 // bne- exitBB
14254 // loop2MBB:
14255 // st[bhwd]cx. newval, ptr
14256 // bne- loopMBB
14257 // b exitBB
14258 // exitBB:
14259 BB = loop1MBB;
14260 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14261 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14262 .addReg(dest)
14263 .addReg(oldval);
14264 BuildMI(BB, dl, TII->get(PPC::BCC))
14266 .addReg(CrReg)
14267 .addMBB(exitMBB);
14268 BB->addSuccessor(loop2MBB);
14269 BB->addSuccessor(exitMBB);
14270
14271 BB = loop2MBB;
14272 BuildMI(BB, dl, TII->get(StoreMnemonic))
14273 .addReg(newval)
14274 .addReg(ptrA)
14275 .addReg(ptrB);
14276 BuildMI(BB, dl, TII->get(PPC::BCC))
14278 .addReg(PPC::CR0)
14279 .addMBB(loop1MBB);
14280 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14281 BB->addSuccessor(loop1MBB);
14282 BB->addSuccessor(exitMBB);
14283
14284 // exitMBB:
14285 // ...
14286 BB = exitMBB;
14287 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14288 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14289 // We must use 64-bit registers for addresses when targeting 64-bit,
14290 // since we're actually doing arithmetic on them. Other registers
14291 // can be 32-bit.
14292 bool is64bit = Subtarget.isPPC64();
14293 bool isLittleEndian = Subtarget.isLittleEndian();
14294 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14295
14296 Register dest = MI.getOperand(0).getReg();
14297 Register ptrA = MI.getOperand(1).getReg();
14298 Register ptrB = MI.getOperand(2).getReg();
14299 Register oldval = MI.getOperand(3).getReg();
14300 Register newval = MI.getOperand(4).getReg();
14301 DebugLoc dl = MI.getDebugLoc();
14302
14303 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14304 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14305 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14306 F->insert(It, loop1MBB);
14307 F->insert(It, loop2MBB);
14308 F->insert(It, exitMBB);
14309 exitMBB->splice(exitMBB->begin(), BB,
14310 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14312
14313 MachineRegisterInfo &RegInfo = F->getRegInfo();
14314 const TargetRegisterClass *RC =
14315 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14316 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14317
14318 Register PtrReg = RegInfo.createVirtualRegister(RC);
14319 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14320 Register ShiftReg =
14321 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14322 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14323 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14324 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14325 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14326 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14327 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14328 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14329 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14330 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14331 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14332 Register Ptr1Reg;
14333 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14334 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14335 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14336 // thisMBB:
14337 // ...
14338 // fallthrough --> loopMBB
14339 BB->addSuccessor(loop1MBB);
14340
14341 // The 4-byte load must be aligned, while a char or short may be
14342 // anywhere in the word. Hence all this nasty bookkeeping code.
14343 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14344 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14345 // xori shift, shift1, 24 [16]
14346 // rlwinm ptr, ptr1, 0, 0, 29
14347 // slw newval2, newval, shift
14348 // slw oldval2, oldval,shift
14349 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14350 // slw mask, mask2, shift
14351 // and newval3, newval2, mask
14352 // and oldval3, oldval2, mask
14353 // loop1MBB:
14354 // lwarx tmpDest, ptr
14355 // and tmp, tmpDest, mask
14356 // cmpw tmp, oldval3
14357 // bne- exitBB
14358 // loop2MBB:
14359 // andc tmp2, tmpDest, mask
14360 // or tmp4, tmp2, newval3
14361 // stwcx. tmp4, ptr
14362 // bne- loop1MBB
14363 // b exitBB
14364 // exitBB:
14365 // srw dest, tmpDest, shift
14366 if (ptrA != ZeroReg) {
14367 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14368 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14369 .addReg(ptrA)
14370 .addReg(ptrB);
14371 } else {
14372 Ptr1Reg = ptrB;
14373 }
14374
14375 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14376 // mode.
14377 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14378 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
14379 .addImm(3)
14380 .addImm(27)
14381 .addImm(is8bit ? 28 : 27);
14382 if (!isLittleEndian)
14383 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14384 .addReg(Shift1Reg)
14385 .addImm(is8bit ? 24 : 16);
14386 if (is64bit)
14387 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14388 .addReg(Ptr1Reg)
14389 .addImm(0)
14390 .addImm(61);
14391 else
14392 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14393 .addReg(Ptr1Reg)
14394 .addImm(0)
14395 .addImm(0)
14396 .addImm(29);
14397 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14398 .addReg(newval)
14399 .addReg(ShiftReg);
14400 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14401 .addReg(oldval)
14402 .addReg(ShiftReg);
14403 if (is8bit)
14404 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14405 else {
14406 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14407 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14408 .addReg(Mask3Reg)
14409 .addImm(65535);
14410 }
14411 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14412 .addReg(Mask2Reg)
14413 .addReg(ShiftReg);
14414 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14415 .addReg(NewVal2Reg)
14416 .addReg(MaskReg);
14417 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14418 .addReg(OldVal2Reg)
14419 .addReg(MaskReg);
14420
14421 BB = loop1MBB;
14422 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14423 .addReg(ZeroReg)
14424 .addReg(PtrReg);
14425 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14426 .addReg(TmpDestReg)
14427 .addReg(MaskReg);
14428 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14429 .addReg(TmpReg)
14430 .addReg(OldVal3Reg);
14431 BuildMI(BB, dl, TII->get(PPC::BCC))
14433 .addReg(CrReg)
14434 .addMBB(exitMBB);
14435 BB->addSuccessor(loop2MBB);
14436 BB->addSuccessor(exitMBB);
14437
14438 BB = loop2MBB;
14439 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14440 .addReg(TmpDestReg)
14441 .addReg(MaskReg);
14442 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14443 .addReg(Tmp2Reg)
14444 .addReg(NewVal3Reg);
14445 BuildMI(BB, dl, TII->get(PPC::STWCX))
14446 .addReg(Tmp4Reg)
14447 .addReg(ZeroReg)
14448 .addReg(PtrReg);
14449 BuildMI(BB, dl, TII->get(PPC::BCC))
14451 .addReg(PPC::CR0)
14452 .addMBB(loop1MBB);
14453 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14454 BB->addSuccessor(loop1MBB);
14455 BB->addSuccessor(exitMBB);
14456
14457 // exitMBB:
14458 // ...
14459 BB = exitMBB;
14460 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14461 .addReg(TmpReg)
14462 .addReg(ShiftReg);
14463 } else if (MI.getOpcode() == PPC::FADDrtz) {
14464 // This pseudo performs an FADD with rounding mode temporarily forced
14465 // to round-to-zero. We emit this via custom inserter since the FPSCR
14466 // is not modeled at the SelectionDAG level.
14467 Register Dest = MI.getOperand(0).getReg();
14468 Register Src1 = MI.getOperand(1).getReg();
14469 Register Src2 = MI.getOperand(2).getReg();
14470 DebugLoc dl = MI.getDebugLoc();
14471
14472 MachineRegisterInfo &RegInfo = F->getRegInfo();
14473 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14474
14475 // Save FPSCR value.
14476 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14477
14478 // Set rounding mode to round-to-zero.
14479 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14480 .addImm(31)
14482
14483 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14484 .addImm(30)
14486
14487 // Perform addition.
14488 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14489 .addReg(Src1)
14490 .addReg(Src2);
14491 if (MI.getFlag(MachineInstr::NoFPExcept))
14493
14494 // Restore FPSCR value.
14495 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14496 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14497 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14498 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14499 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14500 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14501 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14502 ? PPC::ANDI8_rec
14503 : PPC::ANDI_rec;
14504 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14505 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14506
14507 MachineRegisterInfo &RegInfo = F->getRegInfo();
14508 Register Dest = RegInfo.createVirtualRegister(
14509 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14510
14511 DebugLoc Dl = MI.getDebugLoc();
14512 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14513 .addReg(MI.getOperand(1).getReg())
14514 .addImm(1);
14515 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14516 MI.getOperand(0).getReg())
14517 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14518 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14519 DebugLoc Dl = MI.getDebugLoc();
14520 MachineRegisterInfo &RegInfo = F->getRegInfo();
14521 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14522 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14523 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14524 MI.getOperand(0).getReg())
14525 .addReg(CRReg);
14526 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14527 DebugLoc Dl = MI.getDebugLoc();
14528 unsigned Imm = MI.getOperand(1).getImm();
14529 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14530 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14531 MI.getOperand(0).getReg())
14532 .addReg(PPC::CR0EQ);
14533 } else if (MI.getOpcode() == PPC::SETRNDi) {
14534 DebugLoc dl = MI.getDebugLoc();
14535 Register OldFPSCRReg = MI.getOperand(0).getReg();
14536
14537 // Save FPSCR value.
14538 if (MRI.use_empty(OldFPSCRReg))
14539 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14540 else
14541 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14542
14543 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14544 // the following settings:
14545 // 00 Round to nearest
14546 // 01 Round to 0
14547 // 10 Round to +inf
14548 // 11 Round to -inf
14549
14550 // When the operand is immediate, using the two least significant bits of
14551 // the immediate to set the bits 62:63 of FPSCR.
14552 unsigned Mode = MI.getOperand(1).getImm();
14553 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14554 .addImm(31)
14556
14557 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14558 .addImm(30)
14560 } else if (MI.getOpcode() == PPC::SETRND) {
14561 DebugLoc dl = MI.getDebugLoc();
14562
14563 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14564 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14565 // If the target doesn't have DirectMove, we should use stack to do the
14566 // conversion, because the target doesn't have the instructions like mtvsrd
14567 // or mfvsrd to do this conversion directly.
14568 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14569 if (Subtarget.hasDirectMove()) {
14570 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14571 .addReg(SrcReg);
14572 } else {
14573 // Use stack to do the register copy.
14574 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14575 MachineRegisterInfo &RegInfo = F->getRegInfo();
14576 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14577 if (RC == &PPC::F8RCRegClass) {
14578 // Copy register from F8RCRegClass to G8RCRegclass.
14579 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14580 "Unsupported RegClass.");
14581
14582 StoreOp = PPC::STFD;
14583 LoadOp = PPC::LD;
14584 } else {
14585 // Copy register from G8RCRegClass to F8RCRegclass.
14586 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14587 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14588 "Unsupported RegClass.");
14589 }
14590
14591 MachineFrameInfo &MFI = F->getFrameInfo();
14592 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14593
14594 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14595 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14597 MFI.getObjectAlign(FrameIdx));
14598
14599 // Store the SrcReg into the stack.
14600 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14601 .addReg(SrcReg)
14602 .addImm(0)
14603 .addFrameIndex(FrameIdx)
14604 .addMemOperand(MMOStore);
14605
14606 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14607 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14609 MFI.getObjectAlign(FrameIdx));
14610
14611 // Load from the stack where SrcReg is stored, and save to DestReg,
14612 // so we have done the RegClass conversion from RegClass::SrcReg to
14613 // RegClass::DestReg.
14614 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14615 .addImm(0)
14616 .addFrameIndex(FrameIdx)
14617 .addMemOperand(MMOLoad);
14618 }
14619 };
14620
14621 Register OldFPSCRReg = MI.getOperand(0).getReg();
14622
14623 // Save FPSCR value.
14624 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14625
14626 // When the operand is gprc register, use two least significant bits of the
14627 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14628 //
14629 // copy OldFPSCRTmpReg, OldFPSCRReg
14630 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14631 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14632 // copy NewFPSCRReg, NewFPSCRTmpReg
14633 // mtfsf 255, NewFPSCRReg
14634 MachineOperand SrcOp = MI.getOperand(1);
14635 MachineRegisterInfo &RegInfo = F->getRegInfo();
14636 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14637
14638 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14639
14640 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14641 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14642
14643 // The first operand of INSERT_SUBREG should be a register which has
14644 // subregisters, we only care about its RegClass, so we should use an
14645 // IMPLICIT_DEF register.
14646 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14647 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14648 .addReg(ImDefReg)
14649 .add(SrcOp)
14650 .addImm(1);
14651
14652 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14653 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14654 .addReg(OldFPSCRTmpReg)
14655 .addReg(ExtSrcReg)
14656 .addImm(0)
14657 .addImm(62);
14658
14659 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14660 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14661
14662 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14663 // bits of FPSCR.
14664 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14665 .addImm(255)
14666 .addReg(NewFPSCRReg)
14667 .addImm(0)
14668 .addImm(0);
14669 } else if (MI.getOpcode() == PPC::SETFLM) {
14670 DebugLoc Dl = MI.getDebugLoc();
14671
14672 // Result of setflm is previous FPSCR content, so we need to save it first.
14673 Register OldFPSCRReg = MI.getOperand(0).getReg();
14674 if (MRI.use_empty(OldFPSCRReg))
14675 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14676 else
14677 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14678
14679 // Put bits in 32:63 to FPSCR.
14680 Register NewFPSCRReg = MI.getOperand(1).getReg();
14681 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14682 .addImm(255)
14683 .addReg(NewFPSCRReg)
14684 .addImm(0)
14685 .addImm(0);
14686 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14687 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14688 return emitProbedAlloca(MI, BB);
14689 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14690 DebugLoc DL = MI.getDebugLoc();
14691 Register Src = MI.getOperand(2).getReg();
14692 Register Lo = MI.getOperand(0).getReg();
14693 Register Hi = MI.getOperand(1).getReg();
14694 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14695 .addDef(Lo)
14696 .addUse(Src, 0, PPC::sub_gp8_x1);
14697 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14698 .addDef(Hi)
14699 .addUse(Src, 0, PPC::sub_gp8_x0);
14700 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14701 MI.getOpcode() == PPC::STQX_PSEUDO) {
14702 DebugLoc DL = MI.getDebugLoc();
14703 // Ptr is used as the ptr_rc_no_r0 part
14704 // of LQ/STQ's memory operand and adding result of RA and RB,
14705 // so it has to be g8rc_and_g8rc_nox0.
14706 Register Ptr =
14707 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14708 Register Val = MI.getOperand(0).getReg();
14709 Register RA = MI.getOperand(1).getReg();
14710 Register RB = MI.getOperand(2).getReg();
14711 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14712 BuildMI(*BB, MI, DL,
14713 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14714 : TII->get(PPC::STQ))
14715 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
14716 .addImm(0)
14717 .addReg(Ptr);
14718 } else {
14719 llvm_unreachable("Unexpected instr type to insert");
14720 }
14721
14722 MI.eraseFromParent(); // The pseudo instruction is gone now.
14723 return BB;
14724}
14725
14726//===----------------------------------------------------------------------===//
14727// Target Optimization Hooks
14728//===----------------------------------------------------------------------===//
14729
14730static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14731 // For the estimates, convergence is quadratic, so we essentially double the
14732 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14733 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14734 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14735 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14736 if (VT.getScalarType() == MVT::f64)
14737 RefinementSteps++;
14738 return RefinementSteps;
14739}
14740
14741SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14742 const DenormalMode &Mode) const {
14743 // We only have VSX Vector Test for software Square Root.
14744 EVT VT = Op.getValueType();
14745 if (!isTypeLegal(MVT::i1) ||
14746 (VT != MVT::f64 &&
14747 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14749
14750 SDLoc DL(Op);
14751 // The output register of FTSQRT is CR field.
14752 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14753 // ftsqrt BF,FRB
14754 // Let e_b be the unbiased exponent of the double-precision
14755 // floating-point operand in register FRB.
14756 // fe_flag is set to 1 if either of the following conditions occurs.
14757 // - The double-precision floating-point operand in register FRB is a zero,
14758 // a NaN, or an infinity, or a negative value.
14759 // - e_b is less than or equal to -970.
14760 // Otherwise fe_flag is set to 0.
14761 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14762 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14763 // exponent is less than -970)
14764 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14765 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14766 FTSQRT, SRIdxVal),
14767 0);
14768}
14769
14770SDValue
14771PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14772 SelectionDAG &DAG) const {
14773 // We only have VSX Vector Square Root.
14774 EVT VT = Op.getValueType();
14775 if (VT != MVT::f64 &&
14776 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14778
14779 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14780}
14781
14782SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14783 int Enabled, int &RefinementSteps,
14784 bool &UseOneConstNR,
14785 bool Reciprocal) const {
14786 EVT VT = Operand.getValueType();
14787 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14788 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14789 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14790 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14791 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14792 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14793
14794 // The Newton-Raphson computation with a single constant does not provide
14795 // enough accuracy on some CPUs.
14796 UseOneConstNR = !Subtarget.needsTwoConstNR();
14797 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14798 }
14799 return SDValue();
14800}
14801
14802SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14803 int Enabled,
14804 int &RefinementSteps) const {
14805 EVT VT = Operand.getValueType();
14806 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14807 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14808 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14809 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14810 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14811 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14812 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14813 }
14814 return SDValue();
14815}
14816
14818 // Note: This functionality is used only when arcp is enabled, and
14819 // on cores with reciprocal estimates (which are used when arcp is
14820 // enabled for division), this functionality is redundant with the default
14821 // combiner logic (once the division -> reciprocal/multiply transformation
14822 // has taken place). As a result, this matters more for older cores than for
14823 // newer ones.
14824
14825 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14826 // reciprocal if there are two or more FDIVs (for embedded cores with only
14827 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14828 switch (Subtarget.getCPUDirective()) {
14829 default:
14830 return 3;
14831 case PPC::DIR_440:
14832 case PPC::DIR_A2:
14833 case PPC::DIR_E500:
14834 case PPC::DIR_E500mc:
14835 case PPC::DIR_E5500:
14836 return 2;
14837 }
14838}
14839
14840// isConsecutiveLSLoc needs to work even if all adds have not yet been
14841// collapsed, and so we need to look through chains of them.
14843 int64_t& Offset, SelectionDAG &DAG) {
14844 if (DAG.isBaseWithConstantOffset(Loc)) {
14845 Base = Loc.getOperand(0);
14846 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14847
14848 // The base might itself be a base plus an offset, and if so, accumulate
14849 // that as well.
14850 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14851 }
14852}
14853
14855 unsigned Bytes, int Dist,
14856 SelectionDAG &DAG) {
14857 if (VT.getSizeInBits() / 8 != Bytes)
14858 return false;
14859
14860 SDValue BaseLoc = Base->getBasePtr();
14861 if (Loc.getOpcode() == ISD::FrameIndex) {
14862 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14863 return false;
14865 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14866 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14867 int FS = MFI.getObjectSize(FI);
14868 int BFS = MFI.getObjectSize(BFI);
14869 if (FS != BFS || FS != (int)Bytes) return false;
14870 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14871 }
14872
14873 SDValue Base1 = Loc, Base2 = BaseLoc;
14874 int64_t Offset1 = 0, Offset2 = 0;
14875 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14876 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14877 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14878 return true;
14879
14880 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14881 const GlobalValue *GV1 = nullptr;
14882 const GlobalValue *GV2 = nullptr;
14883 Offset1 = 0;
14884 Offset2 = 0;
14885 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14886 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14887 if (isGA1 && isGA2 && GV1 == GV2)
14888 return Offset1 == (Offset2 + Dist*Bytes);
14889 return false;
14890}
14891
14892// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14893// not enforce equality of the chain operands.
14895 unsigned Bytes, int Dist,
14896 SelectionDAG &DAG) {
14898 EVT VT = LS->getMemoryVT();
14899 SDValue Loc = LS->getBasePtr();
14900 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14901 }
14902
14903 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14904 EVT VT;
14905 switch (N->getConstantOperandVal(1)) {
14906 default: return false;
14907 case Intrinsic::ppc_altivec_lvx:
14908 case Intrinsic::ppc_altivec_lvxl:
14909 case Intrinsic::ppc_vsx_lxvw4x:
14910 case Intrinsic::ppc_vsx_lxvw4x_be:
14911 VT = MVT::v4i32;
14912 break;
14913 case Intrinsic::ppc_vsx_lxvd2x:
14914 case Intrinsic::ppc_vsx_lxvd2x_be:
14915 VT = MVT::v2f64;
14916 break;
14917 case Intrinsic::ppc_altivec_lvebx:
14918 VT = MVT::i8;
14919 break;
14920 case Intrinsic::ppc_altivec_lvehx:
14921 VT = MVT::i16;
14922 break;
14923 case Intrinsic::ppc_altivec_lvewx:
14924 VT = MVT::i32;
14925 break;
14926 }
14927
14928 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14929 }
14930
14931 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14932 EVT VT;
14933 switch (N->getConstantOperandVal(1)) {
14934 default: return false;
14935 case Intrinsic::ppc_altivec_stvx:
14936 case Intrinsic::ppc_altivec_stvxl:
14937 case Intrinsic::ppc_vsx_stxvw4x:
14938 VT = MVT::v4i32;
14939 break;
14940 case Intrinsic::ppc_vsx_stxvd2x:
14941 VT = MVT::v2f64;
14942 break;
14943 case Intrinsic::ppc_vsx_stxvw4x_be:
14944 VT = MVT::v4i32;
14945 break;
14946 case Intrinsic::ppc_vsx_stxvd2x_be:
14947 VT = MVT::v2f64;
14948 break;
14949 case Intrinsic::ppc_altivec_stvebx:
14950 VT = MVT::i8;
14951 break;
14952 case Intrinsic::ppc_altivec_stvehx:
14953 VT = MVT::i16;
14954 break;
14955 case Intrinsic::ppc_altivec_stvewx:
14956 VT = MVT::i32;
14957 break;
14958 }
14959
14960 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14961 }
14962
14963 return false;
14964}
14965
14966// Return true is there is a nearyby consecutive load to the one provided
14967// (regardless of alignment). We search up and down the chain, looking though
14968// token factors and other loads (but nothing else). As a result, a true result
14969// indicates that it is safe to create a new consecutive load adjacent to the
14970// load provided.
14972 SDValue Chain = LD->getChain();
14973 EVT VT = LD->getMemoryVT();
14974
14975 SmallPtrSet<SDNode *, 16> LoadRoots;
14976 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14978
14979 // First, search up the chain, branching to follow all token-factor operands.
14980 // If we find a consecutive load, then we're done, otherwise, record all
14981 // nodes just above the top-level loads and token factors.
14982 while (!Queue.empty()) {
14983 SDNode *ChainNext = Queue.pop_back_val();
14984 if (!Visited.insert(ChainNext).second)
14985 continue;
14986
14987 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14988 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14989 return true;
14990
14991 if (!Visited.count(ChainLD->getChain().getNode()))
14992 Queue.push_back(ChainLD->getChain().getNode());
14993 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14994 for (const SDUse &O : ChainNext->ops())
14995 if (!Visited.count(O.getNode()))
14996 Queue.push_back(O.getNode());
14997 } else
14998 LoadRoots.insert(ChainNext);
14999 }
15000
15001 // Second, search down the chain, starting from the top-level nodes recorded
15002 // in the first phase. These top-level nodes are the nodes just above all
15003 // loads and token factors. Starting with their uses, recursively look though
15004 // all loads (just the chain uses) and token factors to find a consecutive
15005 // load.
15006 Visited.clear();
15007 Queue.clear();
15008
15009 for (SDNode *I : LoadRoots) {
15010 Queue.push_back(I);
15011
15012 while (!Queue.empty()) {
15013 SDNode *LoadRoot = Queue.pop_back_val();
15014 if (!Visited.insert(LoadRoot).second)
15015 continue;
15016
15017 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
15018 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15019 return true;
15020
15021 for (SDNode *U : LoadRoot->users())
15022 if (((isa<MemSDNode>(U) &&
15023 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
15024 U->getOpcode() == ISD::TokenFactor) &&
15025 !Visited.count(U))
15026 Queue.push_back(U);
15027 }
15028 }
15029
15030 return false;
15031}
15032
15033/// This function is called when we have proved that a SETCC node can be replaced
15034/// by subtraction (and other supporting instructions) so that the result of
15035/// comparison is kept in a GPR instead of CR. This function is purely for
15036/// codegen purposes and has some flags to guide the codegen process.
15037static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15038 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15039 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15040
15041 // Zero extend the operands to the largest legal integer. Originally, they
15042 // must be of a strictly smaller size.
15043 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
15044 DAG.getConstant(Size, DL, MVT::i32));
15045 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
15046 DAG.getConstant(Size, DL, MVT::i32));
15047
15048 // Swap if needed. Depends on the condition code.
15049 if (Swap)
15050 std::swap(Op0, Op1);
15051
15052 // Subtract extended integers.
15053 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
15054
15055 // Move the sign bit to the least significant position and zero out the rest.
15056 // Now the least significant bit carries the result of original comparison.
15057 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
15058 DAG.getConstant(Size - 1, DL, MVT::i32));
15059 auto Final = Shifted;
15060
15061 // Complement the result if needed. Based on the condition code.
15062 if (Complement)
15063 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
15064 DAG.getConstant(1, DL, MVT::i64));
15065
15066 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
15067}
15068
15069SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15070 DAGCombinerInfo &DCI) const {
15071 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15072
15073 SelectionDAG &DAG = DCI.DAG;
15074 SDLoc DL(N);
15075
15076 // Size of integers being compared has a critical role in the following
15077 // analysis, so we prefer to do this when all types are legal.
15078 if (!DCI.isAfterLegalizeDAG())
15079 return SDValue();
15080
15081 // If all users of SETCC extend its value to a legal integer type
15082 // then we replace SETCC with a subtraction
15083 for (const SDNode *U : N->users())
15084 if (U->getOpcode() != ISD::ZERO_EXTEND)
15085 return SDValue();
15086
15087 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15088 auto OpSize = N->getOperand(0).getValueSizeInBits();
15089
15091
15092 if (OpSize < Size) {
15093 switch (CC) {
15094 default: break;
15095 case ISD::SETULT:
15096 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15097 case ISD::SETULE:
15098 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15099 case ISD::SETUGT:
15100 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15101 case ISD::SETUGE:
15102 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15103 }
15104 }
15105
15106 return SDValue();
15107}
15108
15109SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15110 DAGCombinerInfo &DCI) const {
15111 SelectionDAG &DAG = DCI.DAG;
15112 SDLoc dl(N);
15113
15114 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15115 // If we're tracking CR bits, we need to be careful that we don't have:
15116 // trunc(binary-ops(zext(x), zext(y)))
15117 // or
15118 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15119 // such that we're unnecessarily moving things into GPRs when it would be
15120 // better to keep them in CR bits.
15121
15122 // Note that trunc here can be an actual i1 trunc, or can be the effective
15123 // truncation that comes from a setcc or select_cc.
15124 if (N->getOpcode() == ISD::TRUNCATE &&
15125 N->getValueType(0) != MVT::i1)
15126 return SDValue();
15127
15128 if (N->getOperand(0).getValueType() != MVT::i32 &&
15129 N->getOperand(0).getValueType() != MVT::i64)
15130 return SDValue();
15131
15132 if (N->getOpcode() == ISD::SETCC ||
15133 N->getOpcode() == ISD::SELECT_CC) {
15134 // If we're looking at a comparison, then we need to make sure that the
15135 // high bits (all except for the first) don't matter the result.
15136 ISD::CondCode CC =
15137 cast<CondCodeSDNode>(N->getOperand(
15138 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15139 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15140
15141 if (ISD::isSignedIntSetCC(CC)) {
15142 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15143 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15144 return SDValue();
15145 } else if (ISD::isUnsignedIntSetCC(CC)) {
15146 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15147 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15148 !DAG.MaskedValueIsZero(N->getOperand(1),
15149 APInt::getHighBitsSet(OpBits, OpBits-1)))
15150 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15151 : SDValue());
15152 } else {
15153 // This is neither a signed nor an unsigned comparison, just make sure
15154 // that the high bits are equal.
15155 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15156 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15157
15158 // We don't really care about what is known about the first bit (if
15159 // anything), so pretend that it is known zero for both to ensure they can
15160 // be compared as constants.
15161 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15162 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15163
15164 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15165 Op1Known.getConstant() != Op2Known.getConstant())
15166 return SDValue();
15167 }
15168 }
15169
15170 // We now know that the higher-order bits are irrelevant, we just need to
15171 // make sure that all of the intermediate operations are bit operations, and
15172 // all inputs are extensions.
15173 if (N->getOperand(0).getOpcode() != ISD::AND &&
15174 N->getOperand(0).getOpcode() != ISD::OR &&
15175 N->getOperand(0).getOpcode() != ISD::XOR &&
15176 N->getOperand(0).getOpcode() != ISD::SELECT &&
15177 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15178 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15179 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15180 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15181 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15182 return SDValue();
15183
15184 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15185 N->getOperand(1).getOpcode() != ISD::AND &&
15186 N->getOperand(1).getOpcode() != ISD::OR &&
15187 N->getOperand(1).getOpcode() != ISD::XOR &&
15188 N->getOperand(1).getOpcode() != ISD::SELECT &&
15189 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15190 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15191 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15192 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15193 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15194 return SDValue();
15195
15197 SmallVector<SDValue, 8> BinOps, PromOps;
15198 SmallPtrSet<SDNode *, 16> Visited;
15199
15200 for (unsigned i = 0; i < 2; ++i) {
15201 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15202 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15203 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15204 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15205 isa<ConstantSDNode>(N->getOperand(i)))
15206 Inputs.push_back(N->getOperand(i));
15207 else
15208 BinOps.push_back(N->getOperand(i));
15209
15210 if (N->getOpcode() == ISD::TRUNCATE)
15211 break;
15212 }
15213
15214 // Visit all inputs, collect all binary operations (and, or, xor and
15215 // select) that are all fed by extensions.
15216 while (!BinOps.empty()) {
15217 SDValue BinOp = BinOps.pop_back_val();
15218
15219 if (!Visited.insert(BinOp.getNode()).second)
15220 continue;
15221
15222 PromOps.push_back(BinOp);
15223
15224 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15225 // The condition of the select is not promoted.
15226 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15227 continue;
15228 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15229 continue;
15230
15231 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15232 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15233 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15234 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15235 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15236 Inputs.push_back(BinOp.getOperand(i));
15237 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15238 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15239 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15240 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15241 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15242 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15243 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15244 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15245 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15246 BinOps.push_back(BinOp.getOperand(i));
15247 } else {
15248 // We have an input that is not an extension or another binary
15249 // operation; we'll abort this transformation.
15250 return SDValue();
15251 }
15252 }
15253 }
15254
15255 // Make sure that this is a self-contained cluster of operations (which
15256 // is not quite the same thing as saying that everything has only one
15257 // use).
15258 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15259 if (isa<ConstantSDNode>(Inputs[i]))
15260 continue;
15261
15262 for (const SDNode *User : Inputs[i].getNode()->users()) {
15263 if (User != N && !Visited.count(User))
15264 return SDValue();
15265
15266 // Make sure that we're not going to promote the non-output-value
15267 // operand(s) or SELECT or SELECT_CC.
15268 // FIXME: Although we could sometimes handle this, and it does occur in
15269 // practice that one of the condition inputs to the select is also one of
15270 // the outputs, we currently can't deal with this.
15271 if (User->getOpcode() == ISD::SELECT) {
15272 if (User->getOperand(0) == Inputs[i])
15273 return SDValue();
15274 } else if (User->getOpcode() == ISD::SELECT_CC) {
15275 if (User->getOperand(0) == Inputs[i] ||
15276 User->getOperand(1) == Inputs[i])
15277 return SDValue();
15278 }
15279 }
15280 }
15281
15282 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15283 for (const SDNode *User : PromOps[i].getNode()->users()) {
15284 if (User != N && !Visited.count(User))
15285 return SDValue();
15286
15287 // Make sure that we're not going to promote the non-output-value
15288 // operand(s) or SELECT or SELECT_CC.
15289 // FIXME: Although we could sometimes handle this, and it does occur in
15290 // practice that one of the condition inputs to the select is also one of
15291 // the outputs, we currently can't deal with this.
15292 if (User->getOpcode() == ISD::SELECT) {
15293 if (User->getOperand(0) == PromOps[i])
15294 return SDValue();
15295 } else if (User->getOpcode() == ISD::SELECT_CC) {
15296 if (User->getOperand(0) == PromOps[i] ||
15297 User->getOperand(1) == PromOps[i])
15298 return SDValue();
15299 }
15300 }
15301 }
15302
15303 // Replace all inputs with the extension operand.
15304 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15305 // Constants may have users outside the cluster of to-be-promoted nodes,
15306 // and so we need to replace those as we do the promotions.
15307 if (isa<ConstantSDNode>(Inputs[i]))
15308 continue;
15309 else
15310 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15311 }
15312
15313 std::list<HandleSDNode> PromOpHandles;
15314 for (auto &PromOp : PromOps)
15315 PromOpHandles.emplace_back(PromOp);
15316
15317 // Replace all operations (these are all the same, but have a different
15318 // (i1) return type). DAG.getNode will validate that the types of
15319 // a binary operator match, so go through the list in reverse so that
15320 // we've likely promoted both operands first. Any intermediate truncations or
15321 // extensions disappear.
15322 while (!PromOpHandles.empty()) {
15323 SDValue PromOp = PromOpHandles.back().getValue();
15324 PromOpHandles.pop_back();
15325
15326 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15327 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15328 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15329 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15330 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15331 PromOp.getOperand(0).getValueType() != MVT::i1) {
15332 // The operand is not yet ready (see comment below).
15333 PromOpHandles.emplace_front(PromOp);
15334 continue;
15335 }
15336
15337 SDValue RepValue = PromOp.getOperand(0);
15338 if (isa<ConstantSDNode>(RepValue))
15339 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15340
15341 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15342 continue;
15343 }
15344
15345 unsigned C;
15346 switch (PromOp.getOpcode()) {
15347 default: C = 0; break;
15348 case ISD::SELECT: C = 1; break;
15349 case ISD::SELECT_CC: C = 2; break;
15350 }
15351
15352 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15353 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15354 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15355 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15356 // The to-be-promoted operands of this node have not yet been
15357 // promoted (this should be rare because we're going through the
15358 // list backward, but if one of the operands has several users in
15359 // this cluster of to-be-promoted nodes, it is possible).
15360 PromOpHandles.emplace_front(PromOp);
15361 continue;
15362 }
15363
15365
15366 // If there are any constant inputs, make sure they're replaced now.
15367 for (unsigned i = 0; i < 2; ++i)
15368 if (isa<ConstantSDNode>(Ops[C+i]))
15369 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15370
15371 DAG.ReplaceAllUsesOfValueWith(PromOp,
15372 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15373 }
15374
15375 // Now we're left with the initial truncation itself.
15376 if (N->getOpcode() == ISD::TRUNCATE)
15377 return N->getOperand(0);
15378
15379 // Otherwise, this is a comparison. The operands to be compared have just
15380 // changed type (to i1), but everything else is the same.
15381 return SDValue(N, 0);
15382}
15383
15384SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15385 DAGCombinerInfo &DCI) const {
15386 SelectionDAG &DAG = DCI.DAG;
15387 SDLoc dl(N);
15388
15389 // If we're tracking CR bits, we need to be careful that we don't have:
15390 // zext(binary-ops(trunc(x), trunc(y)))
15391 // or
15392 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15393 // such that we're unnecessarily moving things into CR bits that can more
15394 // efficiently stay in GPRs. Note that if we're not certain that the high
15395 // bits are set as required by the final extension, we still may need to do
15396 // some masking to get the proper behavior.
15397
15398 // This same functionality is important on PPC64 when dealing with
15399 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15400 // the return values of functions. Because it is so similar, it is handled
15401 // here as well.
15402
15403 if (N->getValueType(0) != MVT::i32 &&
15404 N->getValueType(0) != MVT::i64)
15405 return SDValue();
15406
15407 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15408 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15409 return SDValue();
15410
15411 if (N->getOperand(0).getOpcode() != ISD::AND &&
15412 N->getOperand(0).getOpcode() != ISD::OR &&
15413 N->getOperand(0).getOpcode() != ISD::XOR &&
15414 N->getOperand(0).getOpcode() != ISD::SELECT &&
15415 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15416 return SDValue();
15417
15419 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15420 SmallPtrSet<SDNode *, 16> Visited;
15421
15422 // Visit all inputs, collect all binary operations (and, or, xor and
15423 // select) that are all fed by truncations.
15424 while (!BinOps.empty()) {
15425 SDValue BinOp = BinOps.pop_back_val();
15426
15427 if (!Visited.insert(BinOp.getNode()).second)
15428 continue;
15429
15430 PromOps.push_back(BinOp);
15431
15432 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15433 // The condition of the select is not promoted.
15434 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15435 continue;
15436 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15437 continue;
15438
15439 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15440 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15441 Inputs.push_back(BinOp.getOperand(i));
15442 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15443 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15444 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15445 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15446 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15447 BinOps.push_back(BinOp.getOperand(i));
15448 } else {
15449 // We have an input that is not a truncation or another binary
15450 // operation; we'll abort this transformation.
15451 return SDValue();
15452 }
15453 }
15454 }
15455
15456 // The operands of a select that must be truncated when the select is
15457 // promoted because the operand is actually part of the to-be-promoted set.
15458 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15459
15460 // Make sure that this is a self-contained cluster of operations (which
15461 // is not quite the same thing as saying that everything has only one
15462 // use).
15463 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15464 if (isa<ConstantSDNode>(Inputs[i]))
15465 continue;
15466
15467 for (SDNode *User : Inputs[i].getNode()->users()) {
15468 if (User != N && !Visited.count(User))
15469 return SDValue();
15470
15471 // If we're going to promote the non-output-value operand(s) or SELECT or
15472 // SELECT_CC, record them for truncation.
15473 if (User->getOpcode() == ISD::SELECT) {
15474 if (User->getOperand(0) == Inputs[i])
15475 SelectTruncOp[0].insert(std::make_pair(User,
15476 User->getOperand(0).getValueType()));
15477 } else if (User->getOpcode() == ISD::SELECT_CC) {
15478 if (User->getOperand(0) == Inputs[i])
15479 SelectTruncOp[0].insert(std::make_pair(User,
15480 User->getOperand(0).getValueType()));
15481 if (User->getOperand(1) == Inputs[i])
15482 SelectTruncOp[1].insert(std::make_pair(User,
15483 User->getOperand(1).getValueType()));
15484 }
15485 }
15486 }
15487
15488 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15489 for (SDNode *User : PromOps[i].getNode()->users()) {
15490 if (User != N && !Visited.count(User))
15491 return SDValue();
15492
15493 // If we're going to promote the non-output-value operand(s) or SELECT or
15494 // SELECT_CC, record them for truncation.
15495 if (User->getOpcode() == ISD::SELECT) {
15496 if (User->getOperand(0) == PromOps[i])
15497 SelectTruncOp[0].insert(std::make_pair(User,
15498 User->getOperand(0).getValueType()));
15499 } else if (User->getOpcode() == ISD::SELECT_CC) {
15500 if (User->getOperand(0) == PromOps[i])
15501 SelectTruncOp[0].insert(std::make_pair(User,
15502 User->getOperand(0).getValueType()));
15503 if (User->getOperand(1) == PromOps[i])
15504 SelectTruncOp[1].insert(std::make_pair(User,
15505 User->getOperand(1).getValueType()));
15506 }
15507 }
15508 }
15509
15510 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15511 bool ReallyNeedsExt = false;
15512 if (N->getOpcode() != ISD::ANY_EXTEND) {
15513 // If all of the inputs are not already sign/zero extended, then
15514 // we'll still need to do that at the end.
15515 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15516 if (isa<ConstantSDNode>(Inputs[i]))
15517 continue;
15518
15519 unsigned OpBits =
15520 Inputs[i].getOperand(0).getValueSizeInBits();
15521 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15522
15523 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15524 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15525 APInt::getHighBitsSet(OpBits,
15526 OpBits-PromBits))) ||
15527 (N->getOpcode() == ISD::SIGN_EXTEND &&
15528 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15529 (OpBits-(PromBits-1)))) {
15530 ReallyNeedsExt = true;
15531 break;
15532 }
15533 }
15534 }
15535
15536 // Convert PromOps to handles before doing any RAUW operations, as these
15537 // may CSE with existing nodes, deleting the originals.
15538 std::list<HandleSDNode> PromOpHandles;
15539 for (auto &PromOp : PromOps)
15540 PromOpHandles.emplace_back(PromOp);
15541
15542 // Replace all inputs, either with the truncation operand, or a
15543 // truncation or extension to the final output type.
15544 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15545 // Constant inputs need to be replaced with the to-be-promoted nodes that
15546 // use them because they might have users outside of the cluster of
15547 // promoted nodes.
15548 if (isa<ConstantSDNode>(Inputs[i]))
15549 continue;
15550
15551 SDValue InSrc = Inputs[i].getOperand(0);
15552 if (Inputs[i].getValueType() == N->getValueType(0))
15553 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15554 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15555 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15556 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15557 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15558 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15559 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15560 else
15561 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15562 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15563 }
15564
15565 // Replace all operations (these are all the same, but have a different
15566 // (promoted) return type). DAG.getNode will validate that the types of
15567 // a binary operator match, so go through the list in reverse so that
15568 // we've likely promoted both operands first.
15569 while (!PromOpHandles.empty()) {
15570 SDValue PromOp = PromOpHandles.back().getValue();
15571 PromOpHandles.pop_back();
15572
15573 unsigned C;
15574 switch (PromOp.getOpcode()) {
15575 default: C = 0; break;
15576 case ISD::SELECT: C = 1; break;
15577 case ISD::SELECT_CC: C = 2; break;
15578 }
15579
15580 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15581 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15582 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15583 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15584 // The to-be-promoted operands of this node have not yet been
15585 // promoted (this should be rare because we're going through the
15586 // list backward, but if one of the operands has several users in
15587 // this cluster of to-be-promoted nodes, it is possible).
15588 PromOpHandles.emplace_front(PromOp);
15589 continue;
15590 }
15591
15592 // For SELECT and SELECT_CC nodes, we do a similar check for any
15593 // to-be-promoted comparison inputs.
15594 if (PromOp.getOpcode() == ISD::SELECT ||
15595 PromOp.getOpcode() == ISD::SELECT_CC) {
15596 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15597 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15598 (SelectTruncOp[1].count(PromOp.getNode()) &&
15599 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15600 PromOpHandles.emplace_front(PromOp);
15601 continue;
15602 }
15603 }
15604
15606
15607 // If this node has constant inputs, then they'll need to be promoted here.
15608 for (unsigned i = 0; i < 2; ++i) {
15609 if (!isa<ConstantSDNode>(Ops[C+i]))
15610 continue;
15611 if (Ops[C+i].getValueType() == N->getValueType(0))
15612 continue;
15613
15614 if (N->getOpcode() == ISD::SIGN_EXTEND)
15615 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15616 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15617 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15618 else
15619 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15620 }
15621
15622 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15623 // truncate them again to the original value type.
15624 if (PromOp.getOpcode() == ISD::SELECT ||
15625 PromOp.getOpcode() == ISD::SELECT_CC) {
15626 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15627 if (SI0 != SelectTruncOp[0].end())
15628 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15629 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15630 if (SI1 != SelectTruncOp[1].end())
15631 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15632 }
15633
15634 DAG.ReplaceAllUsesOfValueWith(PromOp,
15635 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15636 }
15637
15638 // Now we're left with the initial extension itself.
15639 if (!ReallyNeedsExt)
15640 return N->getOperand(0);
15641
15642 // To zero extend, just mask off everything except for the first bit (in the
15643 // i1 case).
15644 if (N->getOpcode() == ISD::ZERO_EXTEND)
15645 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15647 N->getValueSizeInBits(0), PromBits),
15648 dl, N->getValueType(0)));
15649
15650 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15651 "Invalid extension type");
15652 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15653 SDValue ShiftCst =
15654 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15655 return DAG.getNode(
15656 ISD::SRA, dl, N->getValueType(0),
15657 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15658 ShiftCst);
15659}
15660
15661SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15662 DAGCombinerInfo &DCI) const {
15663 assert(N->getOpcode() == ISD::SETCC &&
15664 "Should be called with a SETCC node");
15665
15666 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15667 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15668 SDValue LHS = N->getOperand(0);
15669 SDValue RHS = N->getOperand(1);
15670
15671 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15672 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15673 LHS.hasOneUse())
15674 std::swap(LHS, RHS);
15675
15676 // x == 0-y --> x+y == 0
15677 // x != 0-y --> x+y != 0
15678 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15679 RHS.hasOneUse()) {
15680 SDLoc DL(N);
15681 SelectionDAG &DAG = DCI.DAG;
15682 EVT VT = N->getValueType(0);
15683 EVT OpVT = LHS.getValueType();
15684 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15685 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15686 }
15687 }
15688
15689 return DAGCombineTruncBoolExt(N, DCI);
15690}
15691
15692// Is this an extending load from an f32 to an f64?
15693static bool isFPExtLoad(SDValue Op) {
15694 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15695 return LD->getExtensionType() == ISD::EXTLOAD &&
15696 Op.getValueType() == MVT::f64;
15697 return false;
15698}
15699
15700/// Reduces the number of fp-to-int conversion when building a vector.
15701///
15702/// If this vector is built out of floating to integer conversions,
15703/// transform it to a vector built out of floating point values followed by a
15704/// single floating to integer conversion of the vector.
15705/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15706/// becomes (fptosi (build_vector ($A, $B, ...)))
15707SDValue PPCTargetLowering::
15708combineElementTruncationToVectorTruncation(SDNode *N,
15709 DAGCombinerInfo &DCI) const {
15710 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15711 "Should be called with a BUILD_VECTOR node");
15712
15713 SelectionDAG &DAG = DCI.DAG;
15714 SDLoc dl(N);
15715
15716 SDValue FirstInput = N->getOperand(0);
15717 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15718 "The input operand must be an fp-to-int conversion.");
15719
15720 // This combine happens after legalization so the fp_to_[su]i nodes are
15721 // already converted to PPCSISD nodes.
15722 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15723 if (FirstConversion == PPCISD::FCTIDZ ||
15724 FirstConversion == PPCISD::FCTIDUZ ||
15725 FirstConversion == PPCISD::FCTIWZ ||
15726 FirstConversion == PPCISD::FCTIWUZ) {
15727 bool IsSplat = true;
15728 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15729 FirstConversion == PPCISD::FCTIWUZ;
15730 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15732 EVT TargetVT = N->getValueType(0);
15733 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15734 SDValue NextOp = N->getOperand(i);
15735 if (NextOp.getOpcode() != PPCISD::MFVSR)
15736 return SDValue();
15737 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15738 if (NextConversion != FirstConversion)
15739 return SDValue();
15740 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15741 // This is not valid if the input was originally double precision. It is
15742 // also not profitable to do unless this is an extending load in which
15743 // case doing this combine will allow us to combine consecutive loads.
15744 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15745 return SDValue();
15746 if (N->getOperand(i) != FirstInput)
15747 IsSplat = false;
15748 }
15749
15750 // If this is a splat, we leave it as-is since there will be only a single
15751 // fp-to-int conversion followed by a splat of the integer. This is better
15752 // for 32-bit and smaller ints and neutral for 64-bit ints.
15753 if (IsSplat)
15754 return SDValue();
15755
15756 // Now that we know we have the right type of node, get its operands
15757 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15758 SDValue In = N->getOperand(i).getOperand(0);
15759 if (Is32Bit) {
15760 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15761 // here, we know that all inputs are extending loads so this is safe).
15762 if (In.isUndef())
15763 Ops.push_back(DAG.getUNDEF(SrcVT));
15764 else {
15765 SDValue Trunc =
15766 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15767 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15768 Ops.push_back(Trunc);
15769 }
15770 } else
15771 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15772 }
15773
15774 unsigned Opcode;
15775 if (FirstConversion == PPCISD::FCTIDZ ||
15776 FirstConversion == PPCISD::FCTIWZ)
15777 Opcode = ISD::FP_TO_SINT;
15778 else
15779 Opcode = ISD::FP_TO_UINT;
15780
15781 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15782 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15783 return DAG.getNode(Opcode, dl, TargetVT, BV);
15784 }
15785 return SDValue();
15786}
15787
15788// LXVKQ instruction load VSX vector with a special quadword value
15789// based on an immediate value. This helper method returns the details of the
15790// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15791// to help generate the LXVKQ instruction and the subsequent shift instruction
15792// required to match the original build vector pattern.
15793
15794// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15795using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15796
15797static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15798
15799 // LXVKQ instruction loads the Quadword value:
15800 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15801 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15802 static const uint32_t Uim = 16;
15803
15804 // Check for direct LXVKQ match (no shift needed)
15805 if (FullVal == BasePattern)
15806 return std::make_tuple(Uim, uint8_t{0});
15807
15808 // Check if FullValue is 1 (the result of the base pattern >> 127)
15809 if (FullVal == APInt(128, 1))
15810 return std::make_tuple(Uim, uint8_t{127});
15811
15812 return std::nullopt;
15813}
15814
15815/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15816/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15817/// LXVKQ instruction load VSX vector with a special quadword value based on an
15818/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15819/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15820/// This can be used to inline the build vector constants that have the
15821/// following patterns:
15822///
15823/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15824/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15825/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15826/// combination of splatting and right shift instructions.
15827
15828SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15829 SelectionDAG &DAG) const {
15830
15831 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
15832 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
15833
15834 // This transformation is only supported if we are loading either a byte,
15835 // halfword, word, or doubleword.
15836 EVT VT = Op.getValueType();
15837 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
15838 VT == MVT::v2i64))
15839 return SDValue();
15840
15841 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
15842 << VT.getEVTString() << "): ";
15843 Op->dump());
15844
15845 unsigned NumElems = VT.getVectorNumElements();
15846 unsigned ElemBits = VT.getScalarSizeInBits();
15847
15848 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
15849
15850 // Check for Non-constant operand in the build vector.
15851 for (const SDValue &Operand : Op.getNode()->op_values()) {
15852 if (!isa<ConstantSDNode>(Operand))
15853 return SDValue();
15854 }
15855
15856 // Assemble build vector operands as a 128-bit register value
15857 // We need to reconstruct what the 128-bit register pattern would be
15858 // that produces this vector when interpreted with the current endianness
15859 APInt FullVal = APInt::getZero(128);
15860
15861 for (unsigned Index = 0; Index < NumElems; ++Index) {
15862 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
15863
15864 // Get element value as raw bits (zero-extended)
15865 uint64_t ElemValue = C->getZExtValue();
15866
15867 // Mask to element size to ensure we only get the relevant bits
15868 if (ElemBits < 64)
15869 ElemValue &= ((1ULL << ElemBits) - 1);
15870
15871 // Calculate bit position for this element in the 128-bit register
15872 unsigned BitPos =
15873 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
15874
15875 // Create APInt for the element value and shift it to correct position
15876 APInt ElemAPInt(128, ElemValue);
15877 ElemAPInt <<= BitPos;
15878
15879 // Place the element value at the correct bit position
15880 FullVal |= ElemAPInt;
15881 }
15882
15883 if (FullVal.isZero() || FullVal.isAllOnes())
15884 return SDValue();
15885
15886 if (auto UIMOpt = getPatternInfo(FullVal)) {
15887 const auto &[Uim, ShiftAmount] = *UIMOpt;
15888 SDLoc Dl(Op);
15889
15890 // Generate LXVKQ instruction if the shift amount is zero.
15891 if (ShiftAmount == 0) {
15892 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
15893 SDValue LxvkqInstr =
15894 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
15896 << "combineBVLoadsSpecialValue: Instruction Emitted ";
15897 LxvkqInstr.dump());
15898 return LxvkqInstr;
15899 }
15900
15901 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
15902
15903 // The right shifted pattern can be constructed using a combination of
15904 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15905 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
15906 // value 255.
15907 SDValue ShiftAmountVec =
15908 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
15909 DAG.getTargetConstant(255, Dl, MVT::i32)),
15910 0);
15911 // Generate appropriate right shift instruction
15912 SDValue ShiftVec = SDValue(
15913 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
15914 0);
15916 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
15917 ShiftVec.dump());
15918 return ShiftVec;
15919 }
15920 // No patterns matched for build vectors.
15921 return SDValue();
15922}
15923
15924/// Reduce the number of loads when building a vector.
15925///
15926/// Building a vector out of multiple loads can be converted to a load
15927/// of the vector type if the loads are consecutive. If the loads are
15928/// consecutive but in descending order, a shuffle is added at the end
15929/// to reorder the vector.
15931 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15932 "Should be called with a BUILD_VECTOR node");
15933
15934 SDLoc dl(N);
15935
15936 // Return early for non byte-sized type, as they can't be consecutive.
15937 if (!N->getValueType(0).getVectorElementType().isByteSized())
15938 return SDValue();
15939
15940 bool InputsAreConsecutiveLoads = true;
15941 bool InputsAreReverseConsecutive = true;
15942 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15943 SDValue FirstInput = N->getOperand(0);
15944 bool IsRoundOfExtLoad = false;
15945 LoadSDNode *FirstLoad = nullptr;
15946
15947 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15948 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15949 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15950 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15951 }
15952 // Not a build vector of (possibly fp_rounded) loads.
15953 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15954 N->getNumOperands() == 1)
15955 return SDValue();
15956
15957 if (!IsRoundOfExtLoad)
15958 FirstLoad = cast<LoadSDNode>(FirstInput);
15959
15961 InputLoads.push_back(FirstLoad);
15962 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15963 // If any inputs are fp_round(extload), they all must be.
15964 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15965 return SDValue();
15966
15967 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15968 N->getOperand(i);
15969 if (NextInput.getOpcode() != ISD::LOAD)
15970 return SDValue();
15971
15972 SDValue PreviousInput =
15973 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15974 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15975 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15976
15977 // If any inputs are fp_round(extload), they all must be.
15978 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15979 return SDValue();
15980
15981 // We only care about regular loads. The PPC-specific load intrinsics
15982 // will not lead to a merge opportunity.
15983 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15984 InputsAreConsecutiveLoads = false;
15985 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15986 InputsAreReverseConsecutive = false;
15987
15988 // Exit early if the loads are neither consecutive nor reverse consecutive.
15989 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15990 return SDValue();
15991 InputLoads.push_back(LD2);
15992 }
15993
15994 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15995 "The loads cannot be both consecutive and reverse consecutive.");
15996
15997 SDValue WideLoad;
15998 SDValue ReturnSDVal;
15999 if (InputsAreConsecutiveLoads) {
16000 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16001 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16002 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16003 FirstLoad->getAlign());
16004 ReturnSDVal = WideLoad;
16005 } else if (InputsAreReverseConsecutive) {
16006 LoadSDNode *LastLoad = InputLoads.back();
16007 assert(LastLoad && "Input needs to be a LoadSDNode.");
16008 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16009 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16010 LastLoad->getAlign());
16012 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16013 Ops.push_back(i);
16014
16015 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16016 DAG.getUNDEF(N->getValueType(0)), Ops);
16017 } else
16018 return SDValue();
16019
16020 for (auto *LD : InputLoads)
16021 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16022 return ReturnSDVal;
16023}
16024
16025// This function adds the required vector_shuffle needed to get
16026// the elements of the vector extract in the correct position
16027// as specified by the CorrectElems encoding.
16029 SDValue Input, uint64_t Elems,
16030 uint64_t CorrectElems) {
16031 SDLoc dl(N);
16032
16033 unsigned NumElems = Input.getValueType().getVectorNumElements();
16034 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16035
16036 // Knowing the element indices being extracted from the original
16037 // vector and the order in which they're being inserted, just put
16038 // them at element indices required for the instruction.
16039 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16040 if (DAG.getDataLayout().isLittleEndian())
16041 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16042 else
16043 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16044 CorrectElems = CorrectElems >> 8;
16045 Elems = Elems >> 8;
16046 }
16047
16048 SDValue Shuffle =
16049 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16050 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16051
16052 EVT VT = N->getValueType(0);
16053 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16054
16055 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16056 Input.getValueType().getVectorElementType(),
16058 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16059 DAG.getValueType(ExtVT));
16060}
16061
16062// Look for build vector patterns where input operands come from sign
16063// extended vector_extract elements of specific indices. If the correct indices
16064// aren't used, add a vector shuffle to fix up the indices and create
16065// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16066// during instruction selection.
16068 // This array encodes the indices that the vector sign extend instructions
16069 // extract from when extending from one type to another for both BE and LE.
16070 // The right nibble of each byte corresponds to the LE incides.
16071 // and the left nibble of each byte corresponds to the BE incides.
16072 // For example: 0x3074B8FC byte->word
16073 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16074 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16075 // For example: 0x000070F8 byte->double word
16076 // For LE: the allowed indices are: 0x0,0x8
16077 // For BE: the allowed indices are: 0x7,0xF
16078 uint64_t TargetElems[] = {
16079 0x3074B8FC, // b->w
16080 0x000070F8, // b->d
16081 0x10325476, // h->w
16082 0x00003074, // h->d
16083 0x00001032, // w->d
16084 };
16085
16086 uint64_t Elems = 0;
16087 int Index;
16088 SDValue Input;
16089
16090 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16091 if (!Op)
16092 return false;
16093 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16094 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16095 return false;
16096
16097 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16098 // of the right width.
16099 SDValue Extract = Op.getOperand(0);
16100 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16101 Extract = Extract.getOperand(0);
16102 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16103 return false;
16104
16106 if (!ExtOp)
16107 return false;
16108
16109 Index = ExtOp->getZExtValue();
16110 if (Input && Input != Extract.getOperand(0))
16111 return false;
16112
16113 if (!Input)
16114 Input = Extract.getOperand(0);
16115
16116 Elems = Elems << 8;
16117 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16118 Elems |= Index;
16119
16120 return true;
16121 };
16122
16123 // If the build vector operands aren't sign extended vector extracts,
16124 // of the same input vector, then return.
16125 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16126 if (!isSExtOfVecExtract(N->getOperand(i))) {
16127 return SDValue();
16128 }
16129 }
16130
16131 // If the vector extract indices are not correct, add the appropriate
16132 // vector_shuffle.
16133 int TgtElemArrayIdx;
16134 int InputSize = Input.getValueType().getScalarSizeInBits();
16135 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16136 if (InputSize + OutputSize == 40)
16137 TgtElemArrayIdx = 0;
16138 else if (InputSize + OutputSize == 72)
16139 TgtElemArrayIdx = 1;
16140 else if (InputSize + OutputSize == 48)
16141 TgtElemArrayIdx = 2;
16142 else if (InputSize + OutputSize == 80)
16143 TgtElemArrayIdx = 3;
16144 else if (InputSize + OutputSize == 96)
16145 TgtElemArrayIdx = 4;
16146 else
16147 return SDValue();
16148
16149 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16150 CorrectElems = DAG.getDataLayout().isLittleEndian()
16151 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16152 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16153 if (Elems != CorrectElems) {
16154 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16155 }
16156
16157 // Regular lowering will catch cases where a shuffle is not needed.
16158 return SDValue();
16159}
16160
16161// Look for the pattern of a load from a narrow width to i128, feeding
16162// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16163// (LXVRZX). This node represents a zero extending load that will be matched
16164// to the Load VSX Vector Rightmost instructions.
16166 SDLoc DL(N);
16167
16168 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16169 if (N->getValueType(0) != MVT::v1i128)
16170 return SDValue();
16171
16172 SDValue Operand = N->getOperand(0);
16173 // Proceed with the transformation if the operand to the BUILD_VECTOR
16174 // is a load instruction.
16175 if (Operand.getOpcode() != ISD::LOAD)
16176 return SDValue();
16177
16178 auto *LD = cast<LoadSDNode>(Operand);
16179 EVT MemoryType = LD->getMemoryVT();
16180
16181 // This transformation is only valid if the we are loading either a byte,
16182 // halfword, word, or doubleword.
16183 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16184 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16185
16186 // Ensure that the load from the narrow width is being zero extended to i128.
16187 if (!ValidLDType ||
16188 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16189 LD->getExtensionType() != ISD::EXTLOAD))
16190 return SDValue();
16191
16192 SDValue LoadOps[] = {
16193 LD->getChain(), LD->getBasePtr(),
16194 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16195
16197 DAG.getVTList(MVT::v1i128, MVT::Other),
16198 LoadOps, MemoryType, LD->getMemOperand());
16199}
16200
16201SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16202 DAGCombinerInfo &DCI) const {
16203 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16204 "Should be called with a BUILD_VECTOR node");
16205
16206 SelectionDAG &DAG = DCI.DAG;
16207 SDLoc dl(N);
16208
16209 if (!Subtarget.hasVSX())
16210 return SDValue();
16211
16212 // The target independent DAG combiner will leave a build_vector of
16213 // float-to-int conversions intact. We can generate MUCH better code for
16214 // a float-to-int conversion of a vector of floats.
16215 SDValue FirstInput = N->getOperand(0);
16216 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16217 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16218 if (Reduced)
16219 return Reduced;
16220 }
16221
16222 // If we're building a vector out of consecutive loads, just load that
16223 // vector type.
16224 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16225 if (Reduced)
16226 return Reduced;
16227
16228 // If we're building a vector out of extended elements from another vector
16229 // we have P9 vector integer extend instructions. The code assumes legal
16230 // input types (i.e. it can't handle things like v4i16) so do not run before
16231 // legalization.
16232 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16233 Reduced = combineBVOfVecSExt(N, DAG);
16234 if (Reduced)
16235 return Reduced;
16236 }
16237
16238 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16239 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16240 // is a load from <valid narrow width> to i128.
16241 if (Subtarget.isISA3_1()) {
16242 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16243 if (BVOfZLoad)
16244 return BVOfZLoad;
16245 }
16246
16247 if (N->getValueType(0) != MVT::v2f64)
16248 return SDValue();
16249
16250 // Looking for:
16251 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16252 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16253 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16254 return SDValue();
16255 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16256 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16257 return SDValue();
16258 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16259 return SDValue();
16260
16261 SDValue Ext1 = FirstInput.getOperand(0);
16262 SDValue Ext2 = N->getOperand(1).getOperand(0);
16263 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16265 return SDValue();
16266
16267 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16268 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16269 if (!Ext1Op || !Ext2Op)
16270 return SDValue();
16271 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16272 Ext1.getOperand(0) != Ext2.getOperand(0))
16273 return SDValue();
16274
16275 int FirstElem = Ext1Op->getZExtValue();
16276 int SecondElem = Ext2Op->getZExtValue();
16277 int SubvecIdx;
16278 if (FirstElem == 0 && SecondElem == 1)
16279 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16280 else if (FirstElem == 2 && SecondElem == 3)
16281 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16282 else
16283 return SDValue();
16284
16285 SDValue SrcVec = Ext1.getOperand(0);
16286 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16288 return DAG.getNode(NodeType, dl, MVT::v2f64,
16289 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16290}
16291
16292SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16293 DAGCombinerInfo &DCI) const {
16294 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16295 N->getOpcode() == ISD::UINT_TO_FP) &&
16296 "Need an int -> FP conversion node here");
16297
16298 if (useSoftFloat() || !Subtarget.has64BitSupport())
16299 return SDValue();
16300
16301 SelectionDAG &DAG = DCI.DAG;
16302 SDLoc dl(N);
16303 SDValue Op(N, 0);
16304
16305 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16306 // from the hardware.
16307 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16308 return SDValue();
16309 if (!Op.getOperand(0).getValueType().isSimple())
16310 return SDValue();
16311 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16312 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16313 return SDValue();
16314
16315 SDValue FirstOperand(Op.getOperand(0));
16316 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16317 (FirstOperand.getValueType() == MVT::i8 ||
16318 FirstOperand.getValueType() == MVT::i16);
16319 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16320 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16321 bool DstDouble = Op.getValueType() == MVT::f64;
16322 unsigned ConvOp = Signed ?
16323 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16324 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16325 SDValue WidthConst =
16326 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16327 dl, false);
16328 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16329 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16331 DAG.getVTList(MVT::f64, MVT::Other),
16332 Ops, MVT::i8, LDN->getMemOperand());
16333 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16334
16335 // For signed conversion, we need to sign-extend the value in the VSR
16336 if (Signed) {
16337 SDValue ExtOps[] = { Ld, WidthConst };
16338 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16339 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16340 } else
16341 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16342 }
16343
16344
16345 // For i32 intermediate values, unfortunately, the conversion functions
16346 // leave the upper 32 bits of the value are undefined. Within the set of
16347 // scalar instructions, we have no method for zero- or sign-extending the
16348 // value. Thus, we cannot handle i32 intermediate values here.
16349 if (Op.getOperand(0).getValueType() == MVT::i32)
16350 return SDValue();
16351
16352 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16353 "UINT_TO_FP is supported only with FPCVT");
16354
16355 // If we have FCFIDS, then use it when converting to single-precision.
16356 // Otherwise, convert to double-precision and then round.
16357 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16358 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16360 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16361 : PPCISD::FCFID);
16362 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16363 ? MVT::f32
16364 : MVT::f64;
16365
16366 // If we're converting from a float, to an int, and back to a float again,
16367 // then we don't need the store/load pair at all.
16368 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16369 Subtarget.hasFPCVT()) ||
16370 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16371 SDValue Src = Op.getOperand(0).getOperand(0);
16372 if (Src.getValueType() == MVT::f32) {
16373 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16374 DCI.AddToWorklist(Src.getNode());
16375 } else if (Src.getValueType() != MVT::f64) {
16376 // Make sure that we don't pick up a ppc_fp128 source value.
16377 return SDValue();
16378 }
16379
16380 unsigned FCTOp =
16381 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16383
16384 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16385 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16386
16387 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16388 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16389 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16390 DCI.AddToWorklist(FP.getNode());
16391 }
16392
16393 return FP;
16394 }
16395
16396 return SDValue();
16397}
16398
16399// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16400// builtins) into loads with swaps.
16402 DAGCombinerInfo &DCI) const {
16403 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16404 // load combines.
16405 if (DCI.isBeforeLegalizeOps())
16406 return SDValue();
16407
16408 SelectionDAG &DAG = DCI.DAG;
16409 SDLoc dl(N);
16410 SDValue Chain;
16411 SDValue Base;
16412 MachineMemOperand *MMO;
16413
16414 switch (N->getOpcode()) {
16415 default:
16416 llvm_unreachable("Unexpected opcode for little endian VSX load");
16417 case ISD::LOAD: {
16419 Chain = LD->getChain();
16420 Base = LD->getBasePtr();
16421 MMO = LD->getMemOperand();
16422 // If the MMO suggests this isn't a load of a full vector, leave
16423 // things alone. For a built-in, we have to make the change for
16424 // correctness, so if there is a size problem that will be a bug.
16425 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16426 return SDValue();
16427 break;
16428 }
16431 Chain = Intrin->getChain();
16432 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16433 // us what we want. Get operand 2 instead.
16434 Base = Intrin->getOperand(2);
16435 MMO = Intrin->getMemOperand();
16436 break;
16437 }
16438 }
16439
16440 MVT VecTy = N->getValueType(0).getSimpleVT();
16441
16442 SDValue LoadOps[] = { Chain, Base };
16444 DAG.getVTList(MVT::v2f64, MVT::Other),
16445 LoadOps, MVT::v2f64, MMO);
16446
16447 DCI.AddToWorklist(Load.getNode());
16448 Chain = Load.getValue(1);
16449 SDValue Swap = DAG.getNode(
16450 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16451 DCI.AddToWorklist(Swap.getNode());
16452
16453 // Add a bitcast if the resulting load type doesn't match v2f64.
16454 if (VecTy != MVT::v2f64) {
16455 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16456 DCI.AddToWorklist(N.getNode());
16457 // Package {bitcast value, swap's chain} to match Load's shape.
16458 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16459 N, Swap.getValue(1));
16460 }
16461
16462 return Swap;
16463}
16464
16465// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16466// builtins) into stores with swaps.
16468 DAGCombinerInfo &DCI) const {
16469 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16470 // store combines.
16471 if (DCI.isBeforeLegalizeOps())
16472 return SDValue();
16473
16474 SelectionDAG &DAG = DCI.DAG;
16475 SDLoc dl(N);
16476 SDValue Chain;
16477 SDValue Base;
16478 unsigned SrcOpnd;
16479 MachineMemOperand *MMO;
16480
16481 switch (N->getOpcode()) {
16482 default:
16483 llvm_unreachable("Unexpected opcode for little endian VSX store");
16484 case ISD::STORE: {
16486 Chain = ST->getChain();
16487 Base = ST->getBasePtr();
16488 MMO = ST->getMemOperand();
16489 SrcOpnd = 1;
16490 // If the MMO suggests this isn't a store of a full vector, leave
16491 // things alone. For a built-in, we have to make the change for
16492 // correctness, so if there is a size problem that will be a bug.
16493 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16494 return SDValue();
16495 break;
16496 }
16497 case ISD::INTRINSIC_VOID: {
16499 Chain = Intrin->getChain();
16500 // Intrin->getBasePtr() oddly does not get what we want.
16501 Base = Intrin->getOperand(3);
16502 MMO = Intrin->getMemOperand();
16503 SrcOpnd = 2;
16504 break;
16505 }
16506 }
16507
16508 SDValue Src = N->getOperand(SrcOpnd);
16509 MVT VecTy = Src.getValueType().getSimpleVT();
16510
16511 // All stores are done as v2f64 and possible bit cast.
16512 if (VecTy != MVT::v2f64) {
16513 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16514 DCI.AddToWorklist(Src.getNode());
16515 }
16516
16517 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16518 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16519 DCI.AddToWorklist(Swap.getNode());
16520 Chain = Swap.getValue(1);
16521 SDValue StoreOps[] = { Chain, Swap, Base };
16523 DAG.getVTList(MVT::Other),
16524 StoreOps, VecTy, MMO);
16525 DCI.AddToWorklist(Store.getNode());
16526 return Store;
16527}
16528
16529// Handle DAG combine for STORE (FP_TO_INT F).
16530SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16531 DAGCombinerInfo &DCI) const {
16532 SelectionDAG &DAG = DCI.DAG;
16533 SDLoc dl(N);
16534 unsigned Opcode = N->getOperand(1).getOpcode();
16535 (void)Opcode;
16536 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16537
16538 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16539 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16540 && "Not a FP_TO_INT Instruction!");
16541
16542 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16543 EVT Op1VT = N->getOperand(1).getValueType();
16544 EVT ResVT = Val.getValueType();
16545
16546 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16547 return SDValue();
16548
16549 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16550 bool ValidTypeForStoreFltAsInt =
16551 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16552 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16553
16554 // TODO: Lower conversion from f128 on all VSX targets
16555 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16556 return SDValue();
16557
16558 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16559 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16560 return SDValue();
16561
16562 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16563
16564 // Set number of bytes being converted.
16565 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16566 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16567 DAG.getIntPtrConstant(ByteSize, dl, false),
16568 DAG.getValueType(Op1VT)};
16569
16571 DAG.getVTList(MVT::Other), Ops,
16572 cast<StoreSDNode>(N)->getMemoryVT(),
16573 cast<StoreSDNode>(N)->getMemOperand());
16574
16575 return Val;
16576}
16577
16578static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16579 // Check that the source of the element keeps flipping
16580 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16581 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16582 for (int i = 1, e = Mask.size(); i < e; i++) {
16583 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16584 return false;
16585 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16586 return false;
16587 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16588 }
16589 return true;
16590}
16591
16592static bool isSplatBV(SDValue Op) {
16593 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16594 return false;
16595 SDValue FirstOp;
16596
16597 // Find first non-undef input.
16598 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16599 FirstOp = Op.getOperand(i);
16600 if (!FirstOp.isUndef())
16601 break;
16602 }
16603
16604 // All inputs are undef or the same as the first non-undef input.
16605 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16606 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16607 return false;
16608 return true;
16609}
16610
16612 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16613 return Op;
16614 if (Op.getOpcode() != ISD::BITCAST)
16615 return SDValue();
16616 Op = Op.getOperand(0);
16617 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16618 return Op;
16619 return SDValue();
16620}
16621
16622// Fix up the shuffle mask to account for the fact that the result of
16623// scalar_to_vector is not in lane zero. This just takes all values in
16624// the ranges specified by the min/max indices and adds the number of
16625// elements required to ensure each element comes from the respective
16626// position in the valid lane.
16627// On little endian, that's just the corresponding element in the other
16628// half of the vector. On big endian, it is in the same half but right
16629// justified rather than left justified in that half.
16631 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16632 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16633 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16634 int LHSEltFixup =
16635 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16636 int RHSEltFixup =
16637 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16638 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16639 int Idx = ShuffV[I];
16640 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16641 ShuffV[I] += LHSEltFixup;
16642 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16643 ShuffV[I] += RHSEltFixup;
16644 }
16645}
16646
16647// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16648// the original is:
16649// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16650// In such a case, just change the shuffle mask to extract the element
16651// from the permuted index.
16653 const PPCSubtarget &Subtarget) {
16654 SDLoc dl(OrigSToV);
16655 EVT VT = OrigSToV.getValueType();
16656 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16657 "Expecting a SCALAR_TO_VECTOR here");
16658 SDValue Input = OrigSToV.getOperand(0);
16659
16660 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16661 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16662 SDValue OrigVector = Input.getOperand(0);
16663
16664 // Can't handle non-const element indices or different vector types
16665 // for the input to the extract and the output of the scalar_to_vector.
16666 if (Idx && VT == OrigVector.getValueType()) {
16667 unsigned NumElts = VT.getVectorNumElements();
16668 assert(
16669 NumElts > 1 &&
16670 "Cannot produce a permuted scalar_to_vector for one element vector");
16671 SmallVector<int, 16> NewMask(NumElts, -1);
16672 unsigned ResultInElt = NumElts / 2;
16673 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16674 NewMask[ResultInElt] = Idx->getZExtValue();
16675 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16676 }
16677 }
16678 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16679 OrigSToV.getOperand(0));
16680}
16681
16683 int HalfVec, int LHSLastElementDefined,
16684 int RHSLastElementDefined) {
16685 for (int Index : ShuffV) {
16686 if (Index < 0) // Skip explicitly undefined mask indices.
16687 continue;
16688 // Handle first input vector of the vector_shuffle.
16689 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16690 (Index > LHSLastElementDefined))
16691 return false;
16692 // Handle second input vector of the vector_shuffle.
16693 if ((RHSLastElementDefined >= 0) &&
16694 (Index > HalfVec + RHSLastElementDefined))
16695 return false;
16696 }
16697 return true;
16698}
16699
16701 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16702 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16703 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16704 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16705 // Set up the values for the shuffle vector fixup.
16706 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16707 // The last element depends on if the input comes from the LHS or RHS.
16708 //
16709 // For example:
16710 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16711 //
16712 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16713 // because elements 1 and higher of a scalar_to_vector are undefined.
16714 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16715 // because elements 1 and higher of a scalar_to_vector are undefined.
16716 // It is also not 4 because the original scalar_to_vector is wider and
16717 // actually contains two i32 elements.
16718 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16719 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16720 : FirstElt;
16721 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16722 if (SToVPermuted.getValueType() != VecShuffOperandType)
16723 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16724 return SToVPermuted;
16725}
16726
16727// On little endian subtargets, combine shuffles such as:
16728// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16729// into:
16730// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16731// because the latter can be matched to a single instruction merge.
16732// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16733// to put the value into element zero. Adjust the shuffle mask so that the
16734// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16735// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16736// nodes with elements smaller than doubleword because all the ways
16737// of getting scalar data into a vector register put the value in the
16738// rightmost element of the left half of the vector.
16739SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16740 SelectionDAG &DAG) const {
16741 SDValue LHS = SVN->getOperand(0);
16742 SDValue RHS = SVN->getOperand(1);
16743 auto Mask = SVN->getMask();
16744 int NumElts = LHS.getValueType().getVectorNumElements();
16745 SDValue Res(SVN, 0);
16746 SDLoc dl(SVN);
16747 bool IsLittleEndian = Subtarget.isLittleEndian();
16748
16749 // On big endian targets this is only useful for subtargets with direct moves.
16750 // On little endian targets it would be useful for all subtargets with VSX.
16751 // However adding special handling for LE subtargets without direct moves
16752 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16753 // which includes direct moves.
16754 if (!Subtarget.hasDirectMove())
16755 return Res;
16756
16757 // If this is not a shuffle of a shuffle and the first element comes from
16758 // the second vector, canonicalize to the commuted form. This will make it
16759 // more likely to match one of the single instruction patterns.
16760 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16761 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16762 std::swap(LHS, RHS);
16763 Res = DAG.getCommutedVectorShuffle(*SVN);
16764 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16765 }
16766
16767 // Adjust the shuffle mask if either input vector comes from a
16768 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16769 // form (to prevent the need for a swap).
16770 SmallVector<int, 16> ShuffV(Mask);
16771 SDValue SToVLHS = isScalarToVec(LHS);
16772 SDValue SToVRHS = isScalarToVec(RHS);
16773 if (SToVLHS || SToVRHS) {
16774 EVT VT = SVN->getValueType(0);
16775 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16776 int ShuffleNumElts = ShuffV.size();
16777 int HalfVec = ShuffleNumElts / 2;
16778 // The width of the "valid lane" (i.e. the lane that contains the value that
16779 // is vectorized) needs to be expressed in terms of the number of elements
16780 // of the shuffle. It is thereby the ratio of the values before and after
16781 // any bitcast, which will be set later on if the LHS or RHS are
16782 // SCALAR_TO_VECTOR nodes.
16783 unsigned LHSNumValidElts = HalfVec;
16784 unsigned RHSNumValidElts = HalfVec;
16785
16786 // Initially assume that neither input is permuted. These will be adjusted
16787 // accordingly if either input is. Note, that -1 means that all elements
16788 // are undefined.
16789 int LHSFirstElt = 0;
16790 int RHSFirstElt = ShuffleNumElts;
16791 int LHSLastElt = -1;
16792 int RHSLastElt = -1;
16793
16794 // Get the permuted scalar to vector nodes for the source(s) that come from
16795 // ISD::SCALAR_TO_VECTOR.
16796 // On big endian systems, this only makes sense for element sizes smaller
16797 // than 64 bits since for 64-bit elements, all instructions already put
16798 // the value into element zero. Since scalar size of LHS and RHS may differ
16799 // after isScalarToVec, this should be checked using their own sizes.
16800 int LHSScalarSize = 0;
16801 int RHSScalarSize = 0;
16802 if (SToVLHS) {
16803 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16804 if (!IsLittleEndian && LHSScalarSize >= 64)
16805 return Res;
16806 }
16807 if (SToVRHS) {
16808 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16809 if (!IsLittleEndian && RHSScalarSize >= 64)
16810 return Res;
16811 }
16812 if (LHSScalarSize != 0)
16814 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
16815 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
16816 if (RHSScalarSize != 0)
16818 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
16819 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
16820
16821 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
16822 return Res;
16823
16824 // Fix up the shuffle mask to reflect where the desired element actually is.
16825 // The minimum and maximum indices that correspond to element zero for both
16826 // the LHS and RHS are computed and will control which shuffle mask entries
16827 // are to be changed. For example, if the RHS is permuted, any shuffle mask
16828 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16830 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16831 LHSNumValidElts, RHSNumValidElts, Subtarget);
16832 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16833
16834 // We may have simplified away the shuffle. We won't be able to do anything
16835 // further with it here.
16836 if (!isa<ShuffleVectorSDNode>(Res))
16837 return Res;
16838 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16839 }
16840
16841 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16842 // The common case after we commuted the shuffle is that the RHS is a splat
16843 // and we have elements coming in from the splat at indices that are not
16844 // conducive to using a merge.
16845 // Example:
16846 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16847 if (!isSplatBV(TheSplat))
16848 return Res;
16849
16850 // We are looking for a mask such that all even elements are from
16851 // one vector and all odd elements from the other.
16852 if (!isAlternatingShuffMask(Mask, NumElts))
16853 return Res;
16854
16855 // Adjust the mask so we are pulling in the same index from the splat
16856 // as the index from the interesting vector in consecutive elements.
16857 if (IsLittleEndian) {
16858 // Example (even elements from first vector):
16859 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16860 if (Mask[0] < NumElts)
16861 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16862 if (ShuffV[i] < 0)
16863 continue;
16864 // If element from non-splat is undef, pick first element from splat.
16865 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16866 }
16867 // Example (odd elements from first vector):
16868 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16869 else
16870 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16871 if (ShuffV[i] < 0)
16872 continue;
16873 // If element from non-splat is undef, pick first element from splat.
16874 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16875 }
16876 } else {
16877 // Example (even elements from first vector):
16878 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16879 if (Mask[0] < NumElts)
16880 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16881 if (ShuffV[i] < 0)
16882 continue;
16883 // If element from non-splat is undef, pick first element from splat.
16884 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16885 }
16886 // Example (odd elements from first vector):
16887 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16888 else
16889 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16890 if (ShuffV[i] < 0)
16891 continue;
16892 // If element from non-splat is undef, pick first element from splat.
16893 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16894 }
16895 }
16896
16897 // If the RHS has undefs, we need to remove them since we may have created
16898 // a shuffle that adds those instead of the splat value.
16899 SDValue SplatVal =
16900 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16901 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16902
16903 if (IsLittleEndian)
16904 RHS = TheSplat;
16905 else
16906 LHS = TheSplat;
16907 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16908}
16909
16910SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16911 LSBaseSDNode *LSBase,
16912 DAGCombinerInfo &DCI) const {
16913 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16914 "Not a reverse memop pattern!");
16915
16916 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16917 auto Mask = SVN->getMask();
16918 int i = 0;
16919 auto I = Mask.rbegin();
16920 auto E = Mask.rend();
16921
16922 for (; I != E; ++I) {
16923 if (*I != i)
16924 return false;
16925 i++;
16926 }
16927 return true;
16928 };
16929
16930 SelectionDAG &DAG = DCI.DAG;
16931 EVT VT = SVN->getValueType(0);
16932
16933 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16934 return SDValue();
16935
16936 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16937 // See comment in PPCVSXSwapRemoval.cpp.
16938 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16939 if (!Subtarget.hasP9Vector())
16940 return SDValue();
16941
16942 if(!IsElementReverse(SVN))
16943 return SDValue();
16944
16945 if (LSBase->getOpcode() == ISD::LOAD) {
16946 // If the load return value 0 has more than one user except the
16947 // shufflevector instruction, it is not profitable to replace the
16948 // shufflevector with a reverse load.
16949 for (SDUse &Use : LSBase->uses())
16950 if (Use.getResNo() == 0 &&
16951 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16952 return SDValue();
16953
16954 SDLoc dl(LSBase);
16955 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16956 return DAG.getMemIntrinsicNode(
16957 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16958 LSBase->getMemoryVT(), LSBase->getMemOperand());
16959 }
16960
16961 if (LSBase->getOpcode() == ISD::STORE) {
16962 // If there are other uses of the shuffle, the swap cannot be avoided.
16963 // Forcing the use of an X-Form (since swapped stores only have
16964 // X-Forms) without removing the swap is unprofitable.
16965 if (!SVN->hasOneUse())
16966 return SDValue();
16967
16968 SDLoc dl(LSBase);
16969 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16970 LSBase->getBasePtr()};
16971 return DAG.getMemIntrinsicNode(
16972 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16973 LSBase->getMemoryVT(), LSBase->getMemOperand());
16974 }
16975
16976 llvm_unreachable("Expected a load or store node here");
16977}
16978
16979static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16980 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16981 if (IntrinsicID == Intrinsic::ppc_stdcx)
16982 StoreWidth = 8;
16983 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16984 StoreWidth = 4;
16985 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16986 StoreWidth = 2;
16987 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16988 StoreWidth = 1;
16989 else
16990 return false;
16991 return true;
16992}
16993
16996 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
16997 // (ADDC (ADDE 0, 0, C), -1) -> C
16998 SDValue LHS = N->getOperand(0);
16999 SDValue RHS = N->getOperand(1);
17000 if (LHS->getOpcode() == PPCISD::ADDE &&
17001 isNullConstant(LHS->getOperand(0)) &&
17002 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17003 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17004 }
17005 }
17006 return SDValue();
17007}
17008
17010 DAGCombinerInfo &DCI) const {
17011 SelectionDAG &DAG = DCI.DAG;
17012 SDLoc dl(N);
17013 switch (N->getOpcode()) {
17014 default: break;
17015 case ISD::ADD:
17016 return combineADD(N, DCI);
17017 case ISD::AND: {
17018 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17019 // original input as that will prevent us from selecting optimal rotates.
17020 // This only matters if the input to the extend is i32 widened to i64.
17021 SDValue Op1 = N->getOperand(0);
17022 SDValue Op2 = N->getOperand(1);
17023 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17024 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17025 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17026 Op1.getOperand(0).getValueType() != MVT::i32)
17027 break;
17028 SDValue NarrowOp = Op1.getOperand(0);
17029 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17030 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17031 break;
17032
17033 uint64_t Imm = Op2->getAsZExtVal();
17034 // Make sure that the constant is narrow enough to fit in the narrow type.
17035 if (!isUInt<32>(Imm))
17036 break;
17037 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17038 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17039 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17040 }
17041 case ISD::SHL:
17042 return combineSHL(N, DCI);
17043 case ISD::SRA:
17044 return combineSRA(N, DCI);
17045 case ISD::SRL:
17046 return combineSRL(N, DCI);
17047 case ISD::MUL:
17048 return combineMUL(N, DCI);
17049 case ISD::FMA:
17050 case PPCISD::FNMSUB:
17051 return combineFMALike(N, DCI);
17052 case PPCISD::SHL:
17053 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17054 return N->getOperand(0);
17055 break;
17056 case PPCISD::SRL:
17057 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17058 return N->getOperand(0);
17059 break;
17060 case PPCISD::SRA:
17061 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17062 if (C->isZero() || // 0 >>s V -> 0.
17063 C->isAllOnes()) // -1 >>s V -> -1.
17064 return N->getOperand(0);
17065 }
17066 break;
17067 case ISD::SIGN_EXTEND:
17068 case ISD::ZERO_EXTEND:
17069 case ISD::ANY_EXTEND:
17070 return DAGCombineExtBoolTrunc(N, DCI);
17071 case ISD::TRUNCATE:
17072 return combineTRUNCATE(N, DCI);
17073 case ISD::SETCC:
17074 if (SDValue CSCC = combineSetCC(N, DCI))
17075 return CSCC;
17076 [[fallthrough]];
17077 case ISD::SELECT_CC:
17078 return DAGCombineTruncBoolExt(N, DCI);
17079 case ISD::SINT_TO_FP:
17080 case ISD::UINT_TO_FP:
17081 return combineFPToIntToFP(N, DCI);
17083 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17084 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17085 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17086 }
17087 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17088 case ISD::STORE: {
17089
17090 EVT Op1VT = N->getOperand(1).getValueType();
17091 unsigned Opcode = N->getOperand(1).getOpcode();
17092
17093 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17094 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17095 SDValue Val = combineStoreFPToInt(N, DCI);
17096 if (Val)
17097 return Val;
17098 }
17099
17100 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17101 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17102 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17103 if (Val)
17104 return Val;
17105 }
17106
17107 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17108 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17109 N->getOperand(1).getNode()->hasOneUse() &&
17110 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17111 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17112
17113 // STBRX can only handle simple types and it makes no sense to store less
17114 // two bytes in byte-reversed order.
17115 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17116 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17117 break;
17118
17119 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17120 // Do an any-extend to 32-bits if this is a half-word input.
17121 if (BSwapOp.getValueType() == MVT::i16)
17122 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17123
17124 // If the type of BSWAP operand is wider than stored memory width
17125 // it need to be shifted to the right side before STBRX.
17126 if (Op1VT.bitsGT(mVT)) {
17127 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17128 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17129 DAG.getConstant(Shift, dl, MVT::i32));
17130 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17131 if (Op1VT == MVT::i64)
17132 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17133 }
17134
17135 SDValue Ops[] = {
17136 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17137 };
17138 return
17139 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17140 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17141 cast<StoreSDNode>(N)->getMemOperand());
17142 }
17143
17144 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17145 // So it can increase the chance of CSE constant construction.
17146 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17147 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17148 // Need to sign-extended to 64-bits to handle negative values.
17149 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17150 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17151 MemVT.getSizeInBits());
17152 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17153
17154 auto *ST = cast<StoreSDNode>(N);
17155 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17156 ST->getBasePtr(), ST->getOffset(), MemVT,
17157 ST->getMemOperand(), ST->getAddressingMode(),
17158 /*IsTruncating=*/true);
17159 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17160 // new store which will change the constant by removing non-demanded bits.
17161 return ST->isUnindexed()
17162 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17163 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17164 }
17165
17166 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17167 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17168 if (Op1VT.isSimple()) {
17169 MVT StoreVT = Op1VT.getSimpleVT();
17170 if (Subtarget.needsSwapsForVSXMemOps() &&
17171 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17172 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17173 return expandVSXStoreForLE(N, DCI);
17174 }
17175 break;
17176 }
17177 case ISD::LOAD: {
17179 EVT VT = LD->getValueType(0);
17180
17181 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17182 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17183 if (VT.isSimple()) {
17184 MVT LoadVT = VT.getSimpleVT();
17185 if (Subtarget.needsSwapsForVSXMemOps() &&
17186 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17187 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17188 return expandVSXLoadForLE(N, DCI);
17189 }
17190
17191 // We sometimes end up with a 64-bit integer load, from which we extract
17192 // two single-precision floating-point numbers. This happens with
17193 // std::complex<float>, and other similar structures, because of the way we
17194 // canonicalize structure copies. However, if we lack direct moves,
17195 // then the final bitcasts from the extracted integer values to the
17196 // floating-point numbers turn into store/load pairs. Even with direct moves,
17197 // just loading the two floating-point numbers is likely better.
17198 auto ReplaceTwoFloatLoad = [&]() {
17199 if (VT != MVT::i64)
17200 return false;
17201
17202 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17203 LD->isVolatile())
17204 return false;
17205
17206 // We're looking for a sequence like this:
17207 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17208 // t16: i64 = srl t13, Constant:i32<32>
17209 // t17: i32 = truncate t16
17210 // t18: f32 = bitcast t17
17211 // t19: i32 = truncate t13
17212 // t20: f32 = bitcast t19
17213
17214 if (!LD->hasNUsesOfValue(2, 0))
17215 return false;
17216
17217 auto UI = LD->user_begin();
17218 while (UI.getUse().getResNo() != 0) ++UI;
17219 SDNode *Trunc = *UI++;
17220 while (UI.getUse().getResNo() != 0) ++UI;
17221 SDNode *RightShift = *UI;
17222 if (Trunc->getOpcode() != ISD::TRUNCATE)
17223 std::swap(Trunc, RightShift);
17224
17225 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17226 Trunc->getValueType(0) != MVT::i32 ||
17227 !Trunc->hasOneUse())
17228 return false;
17229 if (RightShift->getOpcode() != ISD::SRL ||
17230 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17231 RightShift->getConstantOperandVal(1) != 32 ||
17232 !RightShift->hasOneUse())
17233 return false;
17234
17235 SDNode *Trunc2 = *RightShift->user_begin();
17236 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17237 Trunc2->getValueType(0) != MVT::i32 ||
17238 !Trunc2->hasOneUse())
17239 return false;
17240
17241 SDNode *Bitcast = *Trunc->user_begin();
17242 SDNode *Bitcast2 = *Trunc2->user_begin();
17243
17244 if (Bitcast->getOpcode() != ISD::BITCAST ||
17245 Bitcast->getValueType(0) != MVT::f32)
17246 return false;
17247 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17248 Bitcast2->getValueType(0) != MVT::f32)
17249 return false;
17250
17251 if (Subtarget.isLittleEndian())
17252 std::swap(Bitcast, Bitcast2);
17253
17254 // Bitcast has the second float (in memory-layout order) and Bitcast2
17255 // has the first one.
17256
17257 SDValue BasePtr = LD->getBasePtr();
17258 if (LD->isIndexed()) {
17259 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17260 "Non-pre-inc AM on PPC?");
17261 BasePtr =
17262 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17263 LD->getOffset());
17264 }
17265
17266 auto MMOFlags =
17267 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17268 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17269 LD->getPointerInfo(), LD->getAlign(),
17270 MMOFlags, LD->getAAInfo());
17271 SDValue AddPtr =
17272 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17273 BasePtr, DAG.getIntPtrConstant(4, dl));
17274 SDValue FloatLoad2 = DAG.getLoad(
17275 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17276 LD->getPointerInfo().getWithOffset(4),
17277 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17278
17279 if (LD->isIndexed()) {
17280 // Note that DAGCombine should re-form any pre-increment load(s) from
17281 // what is produced here if that makes sense.
17282 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17283 }
17284
17285 DCI.CombineTo(Bitcast2, FloatLoad);
17286 DCI.CombineTo(Bitcast, FloatLoad2);
17287
17288 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17289 SDValue(FloatLoad2.getNode(), 1));
17290 return true;
17291 };
17292
17293 if (ReplaceTwoFloatLoad())
17294 return SDValue(N, 0);
17295
17296 EVT MemVT = LD->getMemoryVT();
17297 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17298 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17299 if (LD->isUnindexed() && VT.isVector() &&
17300 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17301 // P8 and later hardware should just use LOAD.
17302 !Subtarget.hasP8Vector() &&
17303 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17304 VT == MVT::v4f32))) &&
17305 LD->getAlign() < ABIAlignment) {
17306 // This is a type-legal unaligned Altivec load.
17307 SDValue Chain = LD->getChain();
17308 SDValue Ptr = LD->getBasePtr();
17309 bool isLittleEndian = Subtarget.isLittleEndian();
17310
17311 // This implements the loading of unaligned vectors as described in
17312 // the venerable Apple Velocity Engine overview. Specifically:
17313 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17314 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17315 //
17316 // The general idea is to expand a sequence of one or more unaligned
17317 // loads into an alignment-based permutation-control instruction (lvsl
17318 // or lvsr), a series of regular vector loads (which always truncate
17319 // their input address to an aligned address), and a series of
17320 // permutations. The results of these permutations are the requested
17321 // loaded values. The trick is that the last "extra" load is not taken
17322 // from the address you might suspect (sizeof(vector) bytes after the
17323 // last requested load), but rather sizeof(vector) - 1 bytes after the
17324 // last requested vector. The point of this is to avoid a page fault if
17325 // the base address happened to be aligned. This works because if the
17326 // base address is aligned, then adding less than a full vector length
17327 // will cause the last vector in the sequence to be (re)loaded.
17328 // Otherwise, the next vector will be fetched as you might suspect was
17329 // necessary.
17330
17331 // We might be able to reuse the permutation generation from
17332 // a different base address offset from this one by an aligned amount.
17333 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17334 // optimization later.
17335 Intrinsic::ID Intr, IntrLD, IntrPerm;
17336 MVT PermCntlTy, PermTy, LDTy;
17337 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17338 : Intrinsic::ppc_altivec_lvsl;
17339 IntrLD = Intrinsic::ppc_altivec_lvx;
17340 IntrPerm = Intrinsic::ppc_altivec_vperm;
17341 PermCntlTy = MVT::v16i8;
17342 PermTy = MVT::v4i32;
17343 LDTy = MVT::v4i32;
17344
17345 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17346
17347 // Create the new MMO for the new base load. It is like the original MMO,
17348 // but represents an area in memory almost twice the vector size centered
17349 // on the original address. If the address is unaligned, we might start
17350 // reading up to (sizeof(vector)-1) bytes below the address of the
17351 // original unaligned load.
17353 MachineMemOperand *BaseMMO =
17354 MF.getMachineMemOperand(LD->getMemOperand(),
17355 -(int64_t)MemVT.getStoreSize()+1,
17356 2*MemVT.getStoreSize()-1);
17357
17358 // Create the new base load.
17359 SDValue LDXIntID =
17360 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17361 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17362 SDValue BaseLoad =
17364 DAG.getVTList(PermTy, MVT::Other),
17365 BaseLoadOps, LDTy, BaseMMO);
17366
17367 // Note that the value of IncOffset (which is provided to the next
17368 // load's pointer info offset value, and thus used to calculate the
17369 // alignment), and the value of IncValue (which is actually used to
17370 // increment the pointer value) are different! This is because we
17371 // require the next load to appear to be aligned, even though it
17372 // is actually offset from the base pointer by a lesser amount.
17373 int IncOffset = VT.getSizeInBits() / 8;
17374 int IncValue = IncOffset;
17375
17376 // Walk (both up and down) the chain looking for another load at the real
17377 // (aligned) offset (the alignment of the other load does not matter in
17378 // this case). If found, then do not use the offset reduction trick, as
17379 // that will prevent the loads from being later combined (as they would
17380 // otherwise be duplicates).
17381 if (!findConsecutiveLoad(LD, DAG))
17382 --IncValue;
17383
17385 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17386 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17387
17388 MachineMemOperand *ExtraMMO =
17389 MF.getMachineMemOperand(LD->getMemOperand(),
17390 1, 2*MemVT.getStoreSize()-1);
17391 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17392 SDValue ExtraLoad =
17394 DAG.getVTList(PermTy, MVT::Other),
17395 ExtraLoadOps, LDTy, ExtraMMO);
17396
17397 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17398 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17399
17400 // Because vperm has a big-endian bias, we must reverse the order
17401 // of the input vectors and complement the permute control vector
17402 // when generating little endian code. We have already handled the
17403 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17404 // and ExtraLoad here.
17405 SDValue Perm;
17406 if (isLittleEndian)
17407 Perm = BuildIntrinsicOp(IntrPerm,
17408 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17409 else
17410 Perm = BuildIntrinsicOp(IntrPerm,
17411 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17412
17413 if (VT != PermTy)
17414 Perm = Subtarget.hasAltivec()
17415 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17416 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17417 DAG.getTargetConstant(1, dl, MVT::i64));
17418 // second argument is 1 because this rounding
17419 // is always exact.
17420
17421 // The output of the permutation is our loaded result, the TokenFactor is
17422 // our new chain.
17423 DCI.CombineTo(N, Perm, TF);
17424 return SDValue(N, 0);
17425 }
17426 }
17427 break;
17429 bool isLittleEndian = Subtarget.isLittleEndian();
17430 unsigned IID = N->getConstantOperandVal(0);
17431 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17432 : Intrinsic::ppc_altivec_lvsl);
17433 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17434 SDValue Add = N->getOperand(1);
17435
17436 int Bits = 4 /* 16 byte alignment */;
17437
17438 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17439 APInt::getAllOnes(Bits /* alignment */)
17440 .zext(Add.getScalarValueSizeInBits()))) {
17441 SDNode *BasePtr = Add->getOperand(0).getNode();
17442 for (SDNode *U : BasePtr->users()) {
17443 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17444 U->getConstantOperandVal(0) == IID) {
17445 // We've found another LVSL/LVSR, and this address is an aligned
17446 // multiple of that one. The results will be the same, so use the
17447 // one we've just found instead.
17448
17449 return SDValue(U, 0);
17450 }
17451 }
17452 }
17453
17454 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17455 SDNode *BasePtr = Add->getOperand(0).getNode();
17456 for (SDNode *U : BasePtr->users()) {
17457 if (U->getOpcode() == ISD::ADD &&
17458 isa<ConstantSDNode>(U->getOperand(1)) &&
17459 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17460 (1ULL << Bits) ==
17461 0) {
17462 SDNode *OtherAdd = U;
17463 for (SDNode *V : OtherAdd->users()) {
17464 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17465 V->getConstantOperandVal(0) == IID) {
17466 return SDValue(V, 0);
17467 }
17468 }
17469 }
17470 }
17471 }
17472 }
17473
17474 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17475 // Expose the vabsduw/h/b opportunity for down stream
17476 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17477 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17478 IID == Intrinsic::ppc_altivec_vmaxsh ||
17479 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17480 SDValue V1 = N->getOperand(1);
17481 SDValue V2 = N->getOperand(2);
17482 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17483 V1.getSimpleValueType() == MVT::v8i16 ||
17484 V1.getSimpleValueType() == MVT::v16i8) &&
17486 // (0-a, a)
17487 if (V1.getOpcode() == ISD::SUB &&
17489 V1.getOperand(1) == V2) {
17490 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17491 }
17492 // (a, 0-a)
17493 if (V2.getOpcode() == ISD::SUB &&
17495 V2.getOperand(1) == V1) {
17496 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17497 }
17498 // (x-y, y-x)
17499 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17500 V1.getOperand(0) == V2.getOperand(1) &&
17501 V1.getOperand(1) == V2.getOperand(0)) {
17502 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17503 }
17504 }
17505 }
17506 }
17507
17508 break;
17510 switch (N->getConstantOperandVal(1)) {
17511 default:
17512 break;
17513 case Intrinsic::ppc_altivec_vsum4sbs:
17514 case Intrinsic::ppc_altivec_vsum4shs:
17515 case Intrinsic::ppc_altivec_vsum4ubs: {
17516 // These sum-across intrinsics only have a chain due to the side effect
17517 // that they may set the SAT bit. If we know the SAT bit will not be set
17518 // for some inputs, we can replace any uses of their chain with the
17519 // input chain.
17520 if (BuildVectorSDNode *BVN =
17521 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17522 APInt APSplatBits, APSplatUndef;
17523 unsigned SplatBitSize;
17524 bool HasAnyUndefs;
17525 bool BVNIsConstantSplat = BVN->isConstantSplat(
17526 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17527 !Subtarget.isLittleEndian());
17528 // If the constant splat vector is 0, the SAT bit will not be set.
17529 if (BVNIsConstantSplat && APSplatBits == 0)
17530 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17531 }
17532 return SDValue();
17533 }
17534 case Intrinsic::ppc_vsx_lxvw4x:
17535 case Intrinsic::ppc_vsx_lxvd2x:
17536 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17537 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17538 if (Subtarget.needsSwapsForVSXMemOps())
17539 return expandVSXLoadForLE(N, DCI);
17540 break;
17541 }
17542 break;
17544 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17545 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17546 if (Subtarget.needsSwapsForVSXMemOps()) {
17547 switch (N->getConstantOperandVal(1)) {
17548 default:
17549 break;
17550 case Intrinsic::ppc_vsx_stxvw4x:
17551 case Intrinsic::ppc_vsx_stxvd2x:
17552 return expandVSXStoreForLE(N, DCI);
17553 }
17554 }
17555 break;
17556 case ISD::BSWAP: {
17557 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17558 // For subtargets without LDBRX, we can still do better than the default
17559 // expansion even for 64-bit BSWAP (LOAD).
17560 bool Is64BitBswapOn64BitTgt =
17561 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17562 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17563 N->getOperand(0).hasOneUse();
17564 if (IsSingleUseNormalLd &&
17565 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17566 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17567 SDValue Load = N->getOperand(0);
17568 LoadSDNode *LD = cast<LoadSDNode>(Load);
17569 // Create the byte-swapping load.
17570 SDValue Ops[] = {
17571 LD->getChain(), // Chain
17572 LD->getBasePtr(), // Ptr
17573 DAG.getValueType(N->getValueType(0)) // VT
17574 };
17575 SDValue BSLoad =
17577 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
17578 MVT::i64 : MVT::i32, MVT::Other),
17579 Ops, LD->getMemoryVT(), LD->getMemOperand());
17580
17581 // If this is an i16 load, insert the truncate.
17582 SDValue ResVal = BSLoad;
17583 if (N->getValueType(0) == MVT::i16)
17584 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
17585
17586 // First, combine the bswap away. This makes the value produced by the
17587 // load dead.
17588 DCI.CombineTo(N, ResVal);
17589
17590 // Next, combine the load away, we give it a bogus result value but a real
17591 // chain result. The result value is dead because the bswap is dead.
17592 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
17593
17594 // Return N so it doesn't get rechecked!
17595 return SDValue(N, 0);
17596 }
17597 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17598 // before legalization so that the BUILD_PAIR is handled correctly.
17599 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17600 !IsSingleUseNormalLd)
17601 return SDValue();
17602 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
17603
17604 // Can't split volatile or atomic loads.
17605 if (!LD->isSimple())
17606 return SDValue();
17607 SDValue BasePtr = LD->getBasePtr();
17608 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
17609 LD->getPointerInfo(), LD->getAlign());
17610 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
17611 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17612 DAG.getIntPtrConstant(4, dl));
17614 LD->getMemOperand(), 4, 4);
17615 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
17616 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
17617 SDValue Res;
17618 if (Subtarget.isLittleEndian())
17619 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
17620 else
17621 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
17622 SDValue TF =
17623 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17624 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
17625 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
17626 return Res;
17627 }
17628 case PPCISD::VCMP:
17629 // If a VCMP_rec node already exists with exactly the same operands as this
17630 // node, use its result instead of this node (VCMP_rec computes both a CR6
17631 // and a normal output).
17632 //
17633 if (!N->getOperand(0).hasOneUse() &&
17634 !N->getOperand(1).hasOneUse() &&
17635 !N->getOperand(2).hasOneUse()) {
17636
17637 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17638 SDNode *VCMPrecNode = nullptr;
17639
17640 SDNode *LHSN = N->getOperand(0).getNode();
17641 for (SDNode *User : LHSN->users())
17642 if (User->getOpcode() == PPCISD::VCMP_rec &&
17643 User->getOperand(1) == N->getOperand(1) &&
17644 User->getOperand(2) == N->getOperand(2) &&
17645 User->getOperand(0) == N->getOperand(0)) {
17646 VCMPrecNode = User;
17647 break;
17648 }
17649
17650 // If there is no VCMP_rec node, or if the flag value has a single use,
17651 // don't transform this.
17652 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
17653 break;
17654
17655 // Look at the (necessarily single) use of the flag value. If it has a
17656 // chain, this transformation is more complex. Note that multiple things
17657 // could use the value result, which we should ignore.
17658 SDNode *FlagUser = nullptr;
17659 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17660 FlagUser == nullptr; ++UI) {
17661 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17662 SDNode *User = UI->getUser();
17663 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17664 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
17665 FlagUser = User;
17666 break;
17667 }
17668 }
17669 }
17670
17671 // If the user is a MFOCRF instruction, we know this is safe.
17672 // Otherwise we give up for right now.
17673 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17674 return SDValue(VCMPrecNode, 0);
17675 }
17676 break;
17677 case ISD::BR_CC: {
17678 // If this is a branch on an altivec predicate comparison, lower this so
17679 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17680 // lowering is done pre-legalize, because the legalizer lowers the predicate
17681 // compare down to code that is difficult to reassemble.
17682 // This code also handles branches that depend on the result of a store
17683 // conditional.
17684 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17685 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
17686
17687 int CompareOpc;
17688 bool isDot;
17689
17690 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17691 break;
17692
17693 // Since we are doing this pre-legalize, the RHS can be a constant of
17694 // arbitrary bitwidth which may cause issues when trying to get the value
17695 // from the underlying APInt.
17696 auto RHSAPInt = RHS->getAsAPIntVal();
17697 if (!RHSAPInt.isIntN(64))
17698 break;
17699
17700 unsigned Val = RHSAPInt.getZExtValue();
17701 auto isImpossibleCompare = [&]() {
17702 // If this is a comparison against something other than 0/1, then we know
17703 // that the condition is never/always true.
17704 if (Val != 0 && Val != 1) {
17705 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17706 return N->getOperand(0);
17707 // Always !=, turn it into an unconditional branch.
17708 return DAG.getNode(ISD::BR, dl, MVT::Other,
17709 N->getOperand(0), N->getOperand(4));
17710 }
17711 return SDValue();
17712 };
17713 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17714 unsigned StoreWidth = 0;
17715 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17716 isStoreConditional(LHS, StoreWidth)) {
17717 if (SDValue Impossible = isImpossibleCompare())
17718 return Impossible;
17719 PPC::Predicate CompOpc;
17720 // eq 0 => ne
17721 // ne 0 => eq
17722 // eq 1 => eq
17723 // ne 1 => ne
17724 if (Val == 0)
17725 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17726 else
17727 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17728
17729 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
17730 DAG.getConstant(StoreWidth, dl, MVT::i32)};
17731 auto *MemNode = cast<MemSDNode>(LHS);
17732 SDValue ConstSt = DAG.getMemIntrinsicNode(
17734 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
17735 MemNode->getMemoryVT(), MemNode->getMemOperand());
17736
17737 SDValue InChain;
17738 // Unchain the branch from the original store conditional.
17739 if (N->getOperand(0) == LHS.getValue(1))
17740 InChain = LHS.getOperand(0);
17741 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
17742 SmallVector<SDValue, 4> InChains;
17743 SDValue InTF = N->getOperand(0);
17744 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17745 if (InTF.getOperand(i) != LHS.getValue(1))
17746 InChains.push_back(InTF.getOperand(i));
17747 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
17748 }
17749
17750 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
17751 DAG.getConstant(CompOpc, dl, MVT::i32),
17752 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
17753 ConstSt.getValue(2));
17754 }
17755
17756 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17757 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
17758 assert(isDot && "Can't compare against a vector result!");
17759
17760 if (SDValue Impossible = isImpossibleCompare())
17761 return Impossible;
17762
17763 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17764 // Create the PPCISD altivec 'dot' comparison node.
17765 SDValue Ops[] = {
17766 LHS.getOperand(2), // LHS of compare
17767 LHS.getOperand(3), // RHS of compare
17768 DAG.getConstant(CompareOpc, dl, MVT::i32)
17769 };
17770 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
17771 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
17772
17773 // Unpack the result based on how the target uses it.
17774 PPC::Predicate CompOpc;
17775 switch (LHS.getConstantOperandVal(1)) {
17776 default: // Can't happen, don't crash on invalid number though.
17777 case 0: // Branch on the value of the EQ bit of CR6.
17778 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17779 break;
17780 case 1: // Branch on the inverted value of the EQ bit of CR6.
17781 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17782 break;
17783 case 2: // Branch on the value of the LT bit of CR6.
17784 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17785 break;
17786 case 3: // Branch on the inverted value of the LT bit of CR6.
17787 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17788 break;
17789 }
17790
17791 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
17792 DAG.getConstant(CompOpc, dl, MVT::i32),
17793 DAG.getRegister(PPC::CR6, MVT::i32),
17794 N->getOperand(4), CompNode.getValue(1));
17795 }
17796 break;
17797 }
17798 case ISD::BUILD_VECTOR:
17799 return DAGCombineBuildVector(N, DCI);
17800 case PPCISD::ADDC:
17801 return DAGCombineAddc(N, DCI);
17802 }
17803
17804 return SDValue();
17805}
17806
17807SDValue
17809 SelectionDAG &DAG,
17810 SmallVectorImpl<SDNode *> &Created) const {
17811 // fold (sdiv X, pow2)
17812 EVT VT = N->getValueType(0);
17813 if (VT == MVT::i64 && !Subtarget.isPPC64())
17814 return SDValue();
17815 if ((VT != MVT::i32 && VT != MVT::i64) ||
17816 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
17817 return SDValue();
17818
17819 SDLoc DL(N);
17820 SDValue N0 = N->getOperand(0);
17821
17822 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17823 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17824 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
17825
17826 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
17827 Created.push_back(Op.getNode());
17828
17829 if (IsNegPow2) {
17830 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
17831 Created.push_back(Op.getNode());
17832 }
17833
17834 return Op;
17835}
17836
17837//===----------------------------------------------------------------------===//
17838// Inline Assembly Support
17839//===----------------------------------------------------------------------===//
17840
17842 KnownBits &Known,
17843 const APInt &DemandedElts,
17844 const SelectionDAG &DAG,
17845 unsigned Depth) const {
17846 Known.resetAll();
17847 switch (Op.getOpcode()) {
17848 default: break;
17849 case PPCISD::LBRX: {
17850 // lhbrx is known to have the top bits cleared out.
17851 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
17852 Known.Zero = 0xFFFF0000;
17853 break;
17854 }
17855 case PPCISD::ADDE: {
17856 if (Op.getResNo() == 0) {
17857 // (0|1), _ = ADDE 0, 0, CARRY
17858 SDValue LHS = Op.getOperand(0);
17859 SDValue RHS = Op.getOperand(1);
17860 if (isNullConstant(LHS) && isNullConstant(RHS))
17861 Known.Zero = ~1ULL;
17862 }
17863 break;
17864 }
17866 switch (Op.getConstantOperandVal(0)) {
17867 default: break;
17868 case Intrinsic::ppc_altivec_vcmpbfp_p:
17869 case Intrinsic::ppc_altivec_vcmpeqfp_p:
17870 case Intrinsic::ppc_altivec_vcmpequb_p:
17871 case Intrinsic::ppc_altivec_vcmpequh_p:
17872 case Intrinsic::ppc_altivec_vcmpequw_p:
17873 case Intrinsic::ppc_altivec_vcmpequd_p:
17874 case Intrinsic::ppc_altivec_vcmpequq_p:
17875 case Intrinsic::ppc_altivec_vcmpgefp_p:
17876 case Intrinsic::ppc_altivec_vcmpgtfp_p:
17877 case Intrinsic::ppc_altivec_vcmpgtsb_p:
17878 case Intrinsic::ppc_altivec_vcmpgtsh_p:
17879 case Intrinsic::ppc_altivec_vcmpgtsw_p:
17880 case Intrinsic::ppc_altivec_vcmpgtsd_p:
17881 case Intrinsic::ppc_altivec_vcmpgtsq_p:
17882 case Intrinsic::ppc_altivec_vcmpgtub_p:
17883 case Intrinsic::ppc_altivec_vcmpgtuh_p:
17884 case Intrinsic::ppc_altivec_vcmpgtuw_p:
17885 case Intrinsic::ppc_altivec_vcmpgtud_p:
17886 case Intrinsic::ppc_altivec_vcmpgtuq_p:
17887 Known.Zero = ~1U; // All bits but the low one are known to be zero.
17888 break;
17889 }
17890 break;
17891 }
17893 switch (Op.getConstantOperandVal(1)) {
17894 default:
17895 break;
17896 case Intrinsic::ppc_load2r:
17897 // Top bits are cleared for load2r (which is the same as lhbrx).
17898 Known.Zero = 0xFFFF0000;
17899 break;
17900 }
17901 break;
17902 }
17903 }
17904}
17905
17907 switch (Subtarget.getCPUDirective()) {
17908 default: break;
17909 case PPC::DIR_970:
17910 case PPC::DIR_PWR4:
17911 case PPC::DIR_PWR5:
17912 case PPC::DIR_PWR5X:
17913 case PPC::DIR_PWR6:
17914 case PPC::DIR_PWR6X:
17915 case PPC::DIR_PWR7:
17916 case PPC::DIR_PWR8:
17917 case PPC::DIR_PWR9:
17918 case PPC::DIR_PWR10:
17919 case PPC::DIR_PWR11:
17920 case PPC::DIR_PWR_FUTURE: {
17921 if (!ML)
17922 break;
17923
17925 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17926 // so that we can decrease cache misses and branch-prediction misses.
17927 // Actual alignment of the loop will depend on the hotness check and other
17928 // logic in alignBlocks.
17929 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17930 return Align(32);
17931 }
17932
17933 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17934
17935 // For small loops (between 5 and 8 instructions), align to a 32-byte
17936 // boundary so that the entire loop fits in one instruction-cache line.
17937 uint64_t LoopSize = 0;
17938 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17939 for (const MachineInstr &J : **I) {
17940 LoopSize += TII->getInstSizeInBytes(J);
17941 if (LoopSize > 32)
17942 break;
17943 }
17944
17945 if (LoopSize > 16 && LoopSize <= 32)
17946 return Align(32);
17947
17948 break;
17949 }
17950 }
17951
17953}
17954
17955/// getConstraintType - Given a constraint, return the type of
17956/// constraint it is for this target.
17959 if (Constraint.size() == 1) {
17960 switch (Constraint[0]) {
17961 default: break;
17962 case 'b':
17963 case 'r':
17964 case 'f':
17965 case 'd':
17966 case 'v':
17967 case 'y':
17968 return C_RegisterClass;
17969 case 'Z':
17970 // FIXME: While Z does indicate a memory constraint, it specifically
17971 // indicates an r+r address (used in conjunction with the 'y' modifier
17972 // in the replacement string). Currently, we're forcing the base
17973 // register to be r0 in the asm printer (which is interpreted as zero)
17974 // and forming the complete address in the second register. This is
17975 // suboptimal.
17976 return C_Memory;
17977 }
17978 } else if (Constraint == "wc") { // individual CR bits.
17979 return C_RegisterClass;
17980 } else if (Constraint == "wa" || Constraint == "wd" ||
17981 Constraint == "wf" || Constraint == "ws" ||
17982 Constraint == "wi" || Constraint == "ww") {
17983 return C_RegisterClass; // VSX registers.
17984 }
17985 return TargetLowering::getConstraintType(Constraint);
17986}
17987
17988/// Examine constraint type and operand type and determine a weight value.
17989/// This object must already have been set up with the operand type
17990/// and the current alternative constraint selected.
17993 AsmOperandInfo &info, const char *constraint) const {
17995 Value *CallOperandVal = info.CallOperandVal;
17996 // If we don't have a value, we can't do a match,
17997 // but allow it at the lowest weight.
17998 if (!CallOperandVal)
17999 return CW_Default;
18000 Type *type = CallOperandVal->getType();
18001
18002 // Look at the constraint type.
18003 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18004 return CW_Register; // an individual CR bit.
18005 else if ((StringRef(constraint) == "wa" ||
18006 StringRef(constraint) == "wd" ||
18007 StringRef(constraint) == "wf") &&
18008 type->isVectorTy())
18009 return CW_Register;
18010 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18011 return CW_Register; // just hold 64-bit integers data.
18012 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18013 return CW_Register;
18014 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18015 return CW_Register;
18016
18017 switch (*constraint) {
18018 default:
18020 break;
18021 case 'b':
18022 if (type->isIntegerTy())
18023 weight = CW_Register;
18024 break;
18025 case 'f':
18026 if (type->isFloatTy())
18027 weight = CW_Register;
18028 break;
18029 case 'd':
18030 if (type->isDoubleTy())
18031 weight = CW_Register;
18032 break;
18033 case 'v':
18034 if (type->isVectorTy())
18035 weight = CW_Register;
18036 break;
18037 case 'y':
18038 weight = CW_Register;
18039 break;
18040 case 'Z':
18041 weight = CW_Memory;
18042 break;
18043 }
18044 return weight;
18045}
18046
18047std::pair<unsigned, const TargetRegisterClass *>
18049 StringRef Constraint,
18050 MVT VT) const {
18051 if (Constraint.size() == 1) {
18052 // GCC RS6000 Constraint Letters
18053 switch (Constraint[0]) {
18054 case 'b': // R1-R31
18055 if (VT == MVT::i64 && Subtarget.isPPC64())
18056 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18057 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18058 case 'r': // R0-R31
18059 if (VT == MVT::i64 && Subtarget.isPPC64())
18060 return std::make_pair(0U, &PPC::G8RCRegClass);
18061 return std::make_pair(0U, &PPC::GPRCRegClass);
18062 // 'd' and 'f' constraints are both defined to be "the floating point
18063 // registers", where one is for 32-bit and the other for 64-bit. We don't
18064 // really care overly much here so just give them all the same reg classes.
18065 case 'd':
18066 case 'f':
18067 if (Subtarget.hasSPE()) {
18068 if (VT == MVT::f32 || VT == MVT::i32)
18069 return std::make_pair(0U, &PPC::GPRCRegClass);
18070 if (VT == MVT::f64 || VT == MVT::i64)
18071 return std::make_pair(0U, &PPC::SPERCRegClass);
18072 } else {
18073 if (VT == MVT::f32 || VT == MVT::i32)
18074 return std::make_pair(0U, &PPC::F4RCRegClass);
18075 if (VT == MVT::f64 || VT == MVT::i64)
18076 return std::make_pair(0U, &PPC::F8RCRegClass);
18077 }
18078 break;
18079 case 'v':
18080 if (Subtarget.hasAltivec() && VT.isVector())
18081 return std::make_pair(0U, &PPC::VRRCRegClass);
18082 else if (Subtarget.hasVSX())
18083 // Scalars in Altivec registers only make sense with VSX.
18084 return std::make_pair(0U, &PPC::VFRCRegClass);
18085 break;
18086 case 'y': // crrc
18087 return std::make_pair(0U, &PPC::CRRCRegClass);
18088 }
18089 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18090 // An individual CR bit.
18091 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18092 } else if ((Constraint == "wa" || Constraint == "wd" ||
18093 Constraint == "wf" || Constraint == "wi") &&
18094 Subtarget.hasVSX()) {
18095 // A VSX register for either a scalar (FP) or vector. There is no
18096 // support for single precision scalars on subtargets prior to Power8.
18097 if (VT.isVector())
18098 return std::make_pair(0U, &PPC::VSRCRegClass);
18099 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18100 return std::make_pair(0U, &PPC::VSSRCRegClass);
18101 return std::make_pair(0U, &PPC::VSFRCRegClass);
18102 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18103 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18104 return std::make_pair(0U, &PPC::VSSRCRegClass);
18105 else
18106 return std::make_pair(0U, &PPC::VSFRCRegClass);
18107 } else if (Constraint == "lr") {
18108 if (VT == MVT::i64)
18109 return std::make_pair(0U, &PPC::LR8RCRegClass);
18110 else
18111 return std::make_pair(0U, &PPC::LRRCRegClass);
18112 }
18113
18114 // Handle special cases of physical registers that are not properly handled
18115 // by the base class.
18116 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18117 // If we name a VSX register, we can't defer to the base class because it
18118 // will not recognize the correct register (their names will be VSL{0-31}
18119 // and V{0-31} so they won't match). So we match them here.
18120 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18121 int VSNum = atoi(Constraint.data() + 3);
18122 assert(VSNum >= 0 && VSNum <= 63 &&
18123 "Attempted to access a vsr out of range");
18124 if (VSNum < 32)
18125 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18126 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18127 }
18128
18129 // For float registers, we can't defer to the base class as it will match
18130 // the SPILLTOVSRRC class.
18131 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18132 int RegNum = atoi(Constraint.data() + 2);
18133 if (RegNum > 31 || RegNum < 0)
18134 report_fatal_error("Invalid floating point register number");
18135 if (VT == MVT::f32 || VT == MVT::i32)
18136 return Subtarget.hasSPE()
18137 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18138 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18139 if (VT == MVT::f64 || VT == MVT::i64)
18140 return Subtarget.hasSPE()
18141 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18142 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18143 }
18144 }
18145
18146 std::pair<unsigned, const TargetRegisterClass *> R =
18148
18149 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18150 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18151 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18152 // register.
18153 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18154 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18155 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18156 PPC::GPRCRegClass.contains(R.first))
18157 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18158 PPC::sub_32, &PPC::G8RCRegClass),
18159 &PPC::G8RCRegClass);
18160
18161 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18162 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18163 R.first = PPC::CR0;
18164 R.second = &PPC::CRRCRegClass;
18165 }
18166 // FIXME: This warning should ideally be emitted in the front end.
18167 const auto &TM = getTargetMachine();
18168 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18169 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18170 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18171 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18172 errs() << "warning: vector registers 20 to 32 are reserved in the "
18173 "default AIX AltiVec ABI and cannot be used\n";
18174 }
18175
18176 return R;
18177}
18178
18179/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18180/// vector. If it is invalid, don't add anything to Ops.
18182 StringRef Constraint,
18183 std::vector<SDValue> &Ops,
18184 SelectionDAG &DAG) const {
18185 SDValue Result;
18186
18187 // Only support length 1 constraints.
18188 if (Constraint.size() > 1)
18189 return;
18190
18191 char Letter = Constraint[0];
18192 switch (Letter) {
18193 default: break;
18194 case 'I':
18195 case 'J':
18196 case 'K':
18197 case 'L':
18198 case 'M':
18199 case 'N':
18200 case 'O':
18201 case 'P': {
18203 if (!CST) return; // Must be an immediate to match.
18204 SDLoc dl(Op);
18205 int64_t Value = CST->getSExtValue();
18206 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18207 // numbers are printed as such.
18208 switch (Letter) {
18209 default: llvm_unreachable("Unknown constraint letter!");
18210 case 'I': // "I" is a signed 16-bit constant.
18211 if (isInt<16>(Value))
18212 Result = DAG.getTargetConstant(Value, dl, TCVT);
18213 break;
18214 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18216 Result = DAG.getTargetConstant(Value, dl, TCVT);
18217 break;
18218 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18220 Result = DAG.getTargetConstant(Value, dl, TCVT);
18221 break;
18222 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18223 if (isUInt<16>(Value))
18224 Result = DAG.getTargetConstant(Value, dl, TCVT);
18225 break;
18226 case 'M': // "M" is a constant that is greater than 31.
18227 if (Value > 31)
18228 Result = DAG.getTargetConstant(Value, dl, TCVT);
18229 break;
18230 case 'N': // "N" is a positive constant that is an exact power of two.
18231 if (Value > 0 && isPowerOf2_64(Value))
18232 Result = DAG.getTargetConstant(Value, dl, TCVT);
18233 break;
18234 case 'O': // "O" is the constant zero.
18235 if (Value == 0)
18236 Result = DAG.getTargetConstant(Value, dl, TCVT);
18237 break;
18238 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18239 if (isInt<16>(-Value))
18240 Result = DAG.getTargetConstant(Value, dl, TCVT);
18241 break;
18242 }
18243 break;
18244 }
18245 }
18246
18247 if (Result.getNode()) {
18248 Ops.push_back(Result);
18249 return;
18250 }
18251
18252 // Handle standard constraint letters.
18254}
18255
18258 SelectionDAG &DAG) const {
18259 if (I.getNumOperands() <= 1)
18260 return;
18261 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18262 return;
18263 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18264 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18265 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18266 return;
18267
18268 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18269 Ops.push_back(DAG.getMDNode(MDN));
18270}
18271
18272// isLegalAddressingMode - Return true if the addressing mode represented
18273// by AM is legal for this target, for a load/store of the specified type.
18275 const AddrMode &AM, Type *Ty,
18276 unsigned AS,
18277 Instruction *I) const {
18278 // Vector type r+i form is supported since power9 as DQ form. We don't check
18279 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18280 // imm form is preferred and the offset can be adjusted to use imm form later
18281 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18282 // max offset to check legal addressing mode, we should be a little aggressive
18283 // to contain other offsets for that LSRUse.
18284 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18285 return false;
18286
18287 // PPC allows a sign-extended 16-bit immediate field.
18288 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18289 return false;
18290
18291 // No global is ever allowed as a base.
18292 if (AM.BaseGV)
18293 return false;
18294
18295 // PPC only support r+r,
18296 switch (AM.Scale) {
18297 case 0: // "r+i" or just "i", depending on HasBaseReg.
18298 break;
18299 case 1:
18300 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18301 return false;
18302 // Otherwise we have r+r or r+i.
18303 break;
18304 case 2:
18305 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18306 return false;
18307 // Allow 2*r as r+r.
18308 break;
18309 default:
18310 // No other scales are supported.
18311 return false;
18312 }
18313
18314 return true;
18315}
18316
18317SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18318 SelectionDAG &DAG) const {
18320 MachineFrameInfo &MFI = MF.getFrameInfo();
18321 MFI.setReturnAddressIsTaken(true);
18322
18323 SDLoc dl(Op);
18324 unsigned Depth = Op.getConstantOperandVal(0);
18325
18326 // Make sure the function does not optimize away the store of the RA to
18327 // the stack.
18328 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18329 FuncInfo->setLRStoreRequired();
18330 auto PtrVT = getPointerTy(MF.getDataLayout());
18331
18332 if (Depth > 0) {
18333 // The link register (return address) is saved in the caller's frame
18334 // not the callee's stack frame. So we must get the caller's frame
18335 // address and load the return address at the LR offset from there.
18336 SDValue FrameAddr =
18337 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18339 SDValue Offset =
18340 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18341 Subtarget.getScalarIntVT());
18342 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18343 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18345 }
18346
18347 // Just load the return address off the stack.
18348 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18349 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18351}
18352
18353SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18354 SelectionDAG &DAG) const {
18355 SDLoc dl(Op);
18356 unsigned Depth = Op.getConstantOperandVal(0);
18357
18358 MachineFunction &MF = DAG.getMachineFunction();
18359 MachineFrameInfo &MFI = MF.getFrameInfo();
18360 MFI.setFrameAddressIsTaken(true);
18361
18362 EVT PtrVT = getPointerTy(MF.getDataLayout());
18363 bool isPPC64 = PtrVT == MVT::i64;
18364
18365 // Naked functions never have a frame pointer, and so we use r1. For all
18366 // other functions, this decision must be delayed until during PEI.
18367 unsigned FrameReg;
18368 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18369 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18370 else
18371 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18372
18373 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18374 PtrVT);
18375 while (Depth--)
18376 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18377 FrameAddr, MachinePointerInfo());
18378 return FrameAddr;
18379}
18380
18381#define GET_REGISTER_MATCHER
18382#include "PPCGenAsmMatcher.inc"
18383
18385 const MachineFunction &MF) const {
18386 bool IsPPC64 = Subtarget.isPPC64();
18387
18388 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18389 if (!Is64Bit && VT != LLT::scalar(32))
18390 report_fatal_error("Invalid register global variable type");
18391
18393 if (!Reg)
18394 return Reg;
18395
18396 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18397 // Need followup investigation as to why.
18398 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18399 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18400 StringRef(RegName) + "\"."));
18401
18402 // Convert GPR to GP8R register for 64bit.
18403 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18404 Reg = Reg.id() - PPC::R0 + PPC::X0;
18405
18406 return Reg;
18407}
18408
18410 // 32-bit SVR4 ABI access everything as got-indirect.
18411 if (Subtarget.is32BitELFABI())
18412 return true;
18413
18414 // AIX accesses everything indirectly through the TOC, which is similar to
18415 // the GOT.
18416 if (Subtarget.isAIXABI())
18417 return true;
18418
18420 // If it is small or large code model, module locals are accessed
18421 // indirectly by loading their address from .toc/.got.
18422 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18423 return true;
18424
18425 // JumpTable and BlockAddress are accessed as got-indirect.
18427 return true;
18428
18430 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18431
18432 return false;
18433}
18434
18435bool
18437 // The PowerPC target isn't yet aware of offsets.
18438 return false;
18439}
18440
18442 const CallInst &I,
18443 MachineFunction &MF,
18444 unsigned Intrinsic) const {
18445 switch (Intrinsic) {
18446 case Intrinsic::ppc_atomicrmw_xchg_i128:
18447 case Intrinsic::ppc_atomicrmw_add_i128:
18448 case Intrinsic::ppc_atomicrmw_sub_i128:
18449 case Intrinsic::ppc_atomicrmw_nand_i128:
18450 case Intrinsic::ppc_atomicrmw_and_i128:
18451 case Intrinsic::ppc_atomicrmw_or_i128:
18452 case Intrinsic::ppc_atomicrmw_xor_i128:
18453 case Intrinsic::ppc_cmpxchg_i128:
18454 Info.opc = ISD::INTRINSIC_W_CHAIN;
18455 Info.memVT = MVT::i128;
18456 Info.ptrVal = I.getArgOperand(0);
18457 Info.offset = 0;
18458 Info.align = Align(16);
18461 return true;
18462 case Intrinsic::ppc_atomic_load_i128:
18463 Info.opc = ISD::INTRINSIC_W_CHAIN;
18464 Info.memVT = MVT::i128;
18465 Info.ptrVal = I.getArgOperand(0);
18466 Info.offset = 0;
18467 Info.align = Align(16);
18469 return true;
18470 case Intrinsic::ppc_atomic_store_i128:
18471 Info.opc = ISD::INTRINSIC_VOID;
18472 Info.memVT = MVT::i128;
18473 Info.ptrVal = I.getArgOperand(2);
18474 Info.offset = 0;
18475 Info.align = Align(16);
18477 return true;
18478 case Intrinsic::ppc_altivec_lvx:
18479 case Intrinsic::ppc_altivec_lvxl:
18480 case Intrinsic::ppc_altivec_lvebx:
18481 case Intrinsic::ppc_altivec_lvehx:
18482 case Intrinsic::ppc_altivec_lvewx:
18483 case Intrinsic::ppc_vsx_lxvd2x:
18484 case Intrinsic::ppc_vsx_lxvw4x:
18485 case Intrinsic::ppc_vsx_lxvd2x_be:
18486 case Intrinsic::ppc_vsx_lxvw4x_be:
18487 case Intrinsic::ppc_vsx_lxvl:
18488 case Intrinsic::ppc_vsx_lxvll: {
18489 EVT VT;
18490 switch (Intrinsic) {
18491 case Intrinsic::ppc_altivec_lvebx:
18492 VT = MVT::i8;
18493 break;
18494 case Intrinsic::ppc_altivec_lvehx:
18495 VT = MVT::i16;
18496 break;
18497 case Intrinsic::ppc_altivec_lvewx:
18498 VT = MVT::i32;
18499 break;
18500 case Intrinsic::ppc_vsx_lxvd2x:
18501 case Intrinsic::ppc_vsx_lxvd2x_be:
18502 VT = MVT::v2f64;
18503 break;
18504 default:
18505 VT = MVT::v4i32;
18506 break;
18507 }
18508
18509 Info.opc = ISD::INTRINSIC_W_CHAIN;
18510 Info.memVT = VT;
18511 Info.ptrVal = I.getArgOperand(0);
18512 Info.offset = -VT.getStoreSize()+1;
18513 Info.size = 2*VT.getStoreSize()-1;
18514 Info.align = Align(1);
18515 Info.flags = MachineMemOperand::MOLoad;
18516 return true;
18517 }
18518 case Intrinsic::ppc_altivec_stvx:
18519 case Intrinsic::ppc_altivec_stvxl:
18520 case Intrinsic::ppc_altivec_stvebx:
18521 case Intrinsic::ppc_altivec_stvehx:
18522 case Intrinsic::ppc_altivec_stvewx:
18523 case Intrinsic::ppc_vsx_stxvd2x:
18524 case Intrinsic::ppc_vsx_stxvw4x:
18525 case Intrinsic::ppc_vsx_stxvd2x_be:
18526 case Intrinsic::ppc_vsx_stxvw4x_be:
18527 case Intrinsic::ppc_vsx_stxvl:
18528 case Intrinsic::ppc_vsx_stxvll: {
18529 EVT VT;
18530 switch (Intrinsic) {
18531 case Intrinsic::ppc_altivec_stvebx:
18532 VT = MVT::i8;
18533 break;
18534 case Intrinsic::ppc_altivec_stvehx:
18535 VT = MVT::i16;
18536 break;
18537 case Intrinsic::ppc_altivec_stvewx:
18538 VT = MVT::i32;
18539 break;
18540 case Intrinsic::ppc_vsx_stxvd2x:
18541 case Intrinsic::ppc_vsx_stxvd2x_be:
18542 VT = MVT::v2f64;
18543 break;
18544 default:
18545 VT = MVT::v4i32;
18546 break;
18547 }
18548
18549 Info.opc = ISD::INTRINSIC_VOID;
18550 Info.memVT = VT;
18551 Info.ptrVal = I.getArgOperand(1);
18552 Info.offset = -VT.getStoreSize()+1;
18553 Info.size = 2*VT.getStoreSize()-1;
18554 Info.align = Align(1);
18555 Info.flags = MachineMemOperand::MOStore;
18556 return true;
18557 }
18558 case Intrinsic::ppc_stdcx:
18559 case Intrinsic::ppc_stwcx:
18560 case Intrinsic::ppc_sthcx:
18561 case Intrinsic::ppc_stbcx: {
18562 EVT VT;
18563 auto Alignment = Align(8);
18564 switch (Intrinsic) {
18565 case Intrinsic::ppc_stdcx:
18566 VT = MVT::i64;
18567 break;
18568 case Intrinsic::ppc_stwcx:
18569 VT = MVT::i32;
18570 Alignment = Align(4);
18571 break;
18572 case Intrinsic::ppc_sthcx:
18573 VT = MVT::i16;
18574 Alignment = Align(2);
18575 break;
18576 case Intrinsic::ppc_stbcx:
18577 VT = MVT::i8;
18578 Alignment = Align(1);
18579 break;
18580 }
18581 Info.opc = ISD::INTRINSIC_W_CHAIN;
18582 Info.memVT = VT;
18583 Info.ptrVal = I.getArgOperand(0);
18584 Info.offset = 0;
18585 Info.align = Alignment;
18587 return true;
18588 }
18589 default:
18590 break;
18591 }
18592
18593 return false;
18594}
18595
18596/// It returns EVT::Other if the type should be determined using generic
18597/// target-independent logic.
18599 LLVMContext &Context, const MemOp &Op,
18600 const AttributeList &FuncAttributes) const {
18601 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18602 // We should use Altivec/VSX loads and stores when available. For unaligned
18603 // addresses, unaligned VSX loads are only fast starting with the P8.
18604 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18605 if (Op.isMemset() && Subtarget.hasVSX()) {
18606 uint64_t TailSize = Op.size() % 16;
18607 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18608 // element if vector element type matches tail store. For tail size
18609 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18610 if (TailSize > 2 && TailSize <= 4) {
18611 return MVT::v8i16;
18612 }
18613 return MVT::v4i32;
18614 }
18615 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
18616 return MVT::v4i32;
18617 }
18618 }
18619
18620 if (Subtarget.isPPC64()) {
18621 return MVT::i64;
18622 }
18623
18624 return MVT::i32;
18625}
18626
18627/// Returns true if it is beneficial to convert a load of a constant
18628/// to just the constant itself.
18630 Type *Ty) const {
18631 assert(Ty->isIntegerTy());
18632
18633 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18634 return !(BitSize == 0 || BitSize > 64);
18635}
18636
18638 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18639 return false;
18640 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18641 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18642 return NumBits1 == 64 && NumBits2 == 32;
18643}
18644
18646 if (!VT1.isInteger() || !VT2.isInteger())
18647 return false;
18648 unsigned NumBits1 = VT1.getSizeInBits();
18649 unsigned NumBits2 = VT2.getSizeInBits();
18650 return NumBits1 == 64 && NumBits2 == 32;
18651}
18652
18654 // Generally speaking, zexts are not free, but they are free when they can be
18655 // folded with other operations.
18656 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18657 EVT MemVT = LD->getMemoryVT();
18658 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18659 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18660 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18661 LD->getExtensionType() == ISD::ZEXTLOAD))
18662 return true;
18663 }
18664
18665 // FIXME: Add other cases...
18666 // - 32-bit shifts with a zext to i64
18667 // - zext after ctlz, bswap, etc.
18668 // - zext after and by a constant mask
18669
18670 return TargetLowering::isZExtFree(Val, VT2);
18671}
18672
18673bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18674 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18675 "invalid fpext types");
18676 // Extending to float128 is not free.
18677 if (DestVT == MVT::f128)
18678 return false;
18679 return true;
18680}
18681
18683 return isInt<16>(Imm) || isUInt<16>(Imm);
18684}
18685
18687 return isInt<16>(Imm) || isUInt<16>(Imm);
18688}
18689
18692 unsigned *Fast) const {
18694 return false;
18695
18696 // PowerPC supports unaligned memory access for simple non-vector types.
18697 // Although accessing unaligned addresses is not as efficient as accessing
18698 // aligned addresses, it is generally more efficient than manual expansion,
18699 // and generally only traps for software emulation when crossing page
18700 // boundaries.
18701
18702 if (!VT.isSimple())
18703 return false;
18704
18705 if (VT.isFloatingPoint() && !VT.isVector() &&
18706 !Subtarget.allowsUnalignedFPAccess())
18707 return false;
18708
18709 if (VT.getSimpleVT().isVector()) {
18710 if (Subtarget.hasVSX()) {
18711 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18712 VT != MVT::v4f32 && VT != MVT::v4i32)
18713 return false;
18714 } else {
18715 return false;
18716 }
18717 }
18718
18719 if (VT == MVT::ppcf128)
18720 return false;
18721
18722 if (Fast)
18723 *Fast = 1;
18724
18725 return true;
18726}
18727
18729 SDValue C) const {
18730 // Check integral scalar types.
18731 if (!VT.isScalarInteger())
18732 return false;
18733 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18734 if (!ConstNode->getAPIntValue().isSignedIntN(64))
18735 return false;
18736 // This transformation will generate >= 2 operations. But the following
18737 // cases will generate <= 2 instructions during ISEL. So exclude them.
18738 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18739 // HW instruction, ie. MULLI
18740 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18741 // instruction is needed than case 1, ie. MULLI and RLDICR
18742 int64_t Imm = ConstNode->getSExtValue();
18743 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
18744 Imm >>= Shift;
18745 if (isInt<16>(Imm))
18746 return false;
18747 uint64_t UImm = static_cast<uint64_t>(Imm);
18748 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
18749 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
18750 return true;
18751 }
18752 return false;
18753}
18754
18760
18762 Type *Ty) const {
18763 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18764 return false;
18765 switch (Ty->getScalarType()->getTypeID()) {
18766 case Type::FloatTyID:
18767 case Type::DoubleTyID:
18768 return true;
18769 case Type::FP128TyID:
18770 return Subtarget.hasP9Vector();
18771 default:
18772 return false;
18773 }
18774}
18775
18776// FIXME: add more patterns which are not profitable to hoist.
18778 if (!I->hasOneUse())
18779 return true;
18780
18781 Instruction *User = I->user_back();
18782 assert(User && "A single use instruction with no uses.");
18783
18784 switch (I->getOpcode()) {
18785 case Instruction::FMul: {
18786 // Don't break FMA, PowerPC prefers FMA.
18787 if (User->getOpcode() != Instruction::FSub &&
18788 User->getOpcode() != Instruction::FAdd)
18789 return true;
18790
18792 const Function *F = I->getFunction();
18793 const DataLayout &DL = F->getDataLayout();
18794 Type *Ty = User->getOperand(0)->getType();
18795 bool AllowContract = I->getFastMathFlags().allowContract() &&
18796 User->getFastMathFlags().allowContract();
18797
18798 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
18800 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
18801 }
18802 case Instruction::Load: {
18803 // Don't break "store (load float*)" pattern, this pattern will be combined
18804 // to "store (load int32)" in later InstCombine pass. See function
18805 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18806 // cycles than loading a 32 bit integer.
18807 LoadInst *LI = cast<LoadInst>(I);
18808 // For the loads that combineLoadToOperationType does nothing, like
18809 // ordered load, it should be profitable to hoist them.
18810 // For swifterror load, it can only be used for pointer to pointer type, so
18811 // later type check should get rid of this case.
18812 if (!LI->isUnordered())
18813 return true;
18814
18815 if (User->getOpcode() != Instruction::Store)
18816 return true;
18817
18818 if (I->getType()->getTypeID() != Type::FloatTyID)
18819 return true;
18820
18821 return false;
18822 }
18823 default:
18824 return true;
18825 }
18826 return true;
18827}
18828
18829const MCPhysReg *
18831 // LR is a callee-save register, but we must treat it as clobbered by any call
18832 // site. Hence we include LR in the scratch registers, which are in turn added
18833 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18834 // to CTR, which is used by any indirect call.
18835 static const MCPhysReg ScratchRegs[] = {
18836 PPC::X12, PPC::LR8, PPC::CTR8, 0
18837 };
18838
18839 return ScratchRegs;
18840}
18841
18843 const Constant *PersonalityFn) const {
18844 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18845}
18846
18848 const Constant *PersonalityFn) const {
18849 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18850}
18851
18852bool
18854 EVT VT , unsigned DefinedValues) const {
18855 if (VT == MVT::v2i64)
18856 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18857
18858 if (Subtarget.hasVSX())
18859 return true;
18860
18862}
18863
18865 if (DisableILPPref || Subtarget.enableMachineScheduler())
18867
18868 return Sched::ILP;
18869}
18870
18871// Create a fast isel object.
18872FastISel *
18874 const TargetLibraryInfo *LibInfo) const {
18875 return PPC::createFastISel(FuncInfo, LibInfo);
18876}
18877
18878// 'Inverted' means the FMA opcode after negating one multiplicand.
18879// For example, (fma -a b c) = (fnmsub a b c)
18880static unsigned invertFMAOpcode(unsigned Opc) {
18881 switch (Opc) {
18882 default:
18883 llvm_unreachable("Invalid FMA opcode for PowerPC!");
18884 case ISD::FMA:
18885 return PPCISD::FNMSUB;
18886 case PPCISD::FNMSUB:
18887 return ISD::FMA;
18888 }
18889}
18890
18892 bool LegalOps, bool OptForSize,
18894 unsigned Depth) const {
18896 return SDValue();
18897
18898 unsigned Opc = Op.getOpcode();
18899 EVT VT = Op.getValueType();
18900 SDNodeFlags Flags = Op.getNode()->getFlags();
18901
18902 switch (Opc) {
18903 case PPCISD::FNMSUB:
18904 if (!Op.hasOneUse() || !isTypeLegal(VT))
18905 break;
18906
18908 SDValue N0 = Op.getOperand(0);
18909 SDValue N1 = Op.getOperand(1);
18910 SDValue N2 = Op.getOperand(2);
18911 SDLoc Loc(Op);
18912
18914 SDValue NegN2 =
18915 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18916
18917 if (!NegN2)
18918 return SDValue();
18919
18920 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18921 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18922 // These transformations may change sign of zeroes. For example,
18923 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18924 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18925 // Try and choose the cheaper one to negate.
18927 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18928 N0Cost, Depth + 1);
18929
18931 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18932 N1Cost, Depth + 1);
18933
18934 if (NegN0 && N0Cost <= N1Cost) {
18935 Cost = std::min(N0Cost, N2Cost);
18936 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18937 } else if (NegN1) {
18938 Cost = std::min(N1Cost, N2Cost);
18939 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18940 }
18941 }
18942
18943 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18944 if (isOperationLegal(ISD::FMA, VT)) {
18945 Cost = N2Cost;
18946 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18947 }
18948
18949 break;
18950 }
18951
18952 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18953 Cost, Depth);
18954}
18955
18956// Override to enable LOAD_STACK_GUARD lowering on Linux.
18958 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18959 return true;
18961}
18962
18964 bool ForCodeSize) const {
18965 if (!VT.isSimple() || !Subtarget.hasVSX())
18966 return false;
18967
18968 switch(VT.getSimpleVT().SimpleTy) {
18969 default:
18970 // For FP types that are currently not supported by PPC backend, return
18971 // false. Examples: f16, f80.
18972 return false;
18973 case MVT::f32:
18974 case MVT::f64: {
18975 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18976 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18977 return true;
18978 }
18979 bool IsExact;
18980 APSInt IntResult(16, false);
18981 // The rounding mode doesn't really matter because we only care about floats
18982 // that can be converted to integers exactly.
18983 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18984 // For exact values in the range [-16, 15] we can materialize the float.
18985 if (IsExact && IntResult <= 15 && IntResult >= -16)
18986 return true;
18987 return Imm.isZero();
18988 }
18989 case MVT::ppcf128:
18990 return Imm.isPosZero();
18991 }
18992}
18993
18994// For vector shift operation op, fold
18995// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18997 SelectionDAG &DAG) {
18998 SDValue N0 = N->getOperand(0);
18999 SDValue N1 = N->getOperand(1);
19000 EVT VT = N0.getValueType();
19001 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19002 unsigned Opcode = N->getOpcode();
19003 unsigned TargetOpcode;
19004
19005 switch (Opcode) {
19006 default:
19007 llvm_unreachable("Unexpected shift operation");
19008 case ISD::SHL:
19010 break;
19011 case ISD::SRL:
19013 break;
19014 case ISD::SRA:
19016 break;
19017 }
19018
19019 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19020 N1->getOpcode() == ISD::AND)
19021 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19022 if (Mask->getZExtValue() == OpSizeInBits - 1)
19023 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19024
19025 return SDValue();
19026}
19027
19028SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19029 DAGCombinerInfo &DCI) const {
19030 EVT VT = N->getValueType(0);
19031 assert(VT.isVector() && "Vector type expected.");
19032
19033 unsigned Opc = N->getOpcode();
19034 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19035 "Unexpected opcode.");
19036
19037 if (!isOperationLegal(Opc, VT))
19038 return SDValue();
19039
19040 EVT EltTy = VT.getScalarType();
19041 unsigned EltBits = EltTy.getSizeInBits();
19042 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19043 return SDValue();
19044
19045 SDValue N1 = N->getOperand(1);
19046 uint64_t SplatBits = 0;
19047 bool AddSplatCase = false;
19048 unsigned OpcN1 = N1.getOpcode();
19049 if (OpcN1 == PPCISD::VADD_SPLAT &&
19051 AddSplatCase = true;
19052 SplatBits = N1.getConstantOperandVal(0);
19053 }
19054
19055 if (!AddSplatCase) {
19056 if (OpcN1 != ISD::BUILD_VECTOR)
19057 return SDValue();
19058
19059 unsigned SplatBitSize;
19060 bool HasAnyUndefs;
19061 APInt APSplatBits, APSplatUndef;
19062 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19063 bool BVNIsConstantSplat =
19064 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19065 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19066 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19067 return SDValue();
19068 SplatBits = APSplatBits.getZExtValue();
19069 }
19070
19071 SDLoc DL(N);
19072 SDValue N0 = N->getOperand(0);
19073 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19074 // shift vector, which means the max value is 31/63. A shift vector of all
19075 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19076 // -16 to 15 range.
19077 if (SplatBits == (EltBits - 1)) {
19078 unsigned NewOpc;
19079 switch (Opc) {
19080 case ISD::SHL:
19081 NewOpc = PPCISD::SHL;
19082 break;
19083 case ISD::SRL:
19084 NewOpc = PPCISD::SRL;
19085 break;
19086 case ISD::SRA:
19087 NewOpc = PPCISD::SRA;
19088 break;
19089 }
19090 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19091 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19092 }
19093
19094 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19095 return SDValue();
19096
19097 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19098 // before the BUILD_VECTOR is replaced by a load.
19099 if (EltTy != MVT::i64 || SplatBits != 1)
19100 return SDValue();
19101
19102 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19103}
19104
19105SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19106 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19107 return Value;
19108
19109 if (N->getValueType(0).isVector())
19110 return combineVectorShift(N, DCI);
19111
19112 SDValue N0 = N->getOperand(0);
19113 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19114 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19115 N0.getOpcode() != ISD::SIGN_EXTEND ||
19116 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19117 N->getValueType(0) != MVT::i64)
19118 return SDValue();
19119
19120 // We can't save an operation here if the value is already extended, and
19121 // the existing shift is easier to combine.
19122 SDValue ExtsSrc = N0.getOperand(0);
19123 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19124 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19125 return SDValue();
19126
19127 SDLoc DL(N0);
19128 SDValue ShiftBy = SDValue(CN1, 0);
19129 // We want the shift amount to be i32 on the extswli, but the shift could
19130 // have an i64.
19131 if (ShiftBy.getValueType() == MVT::i64)
19132 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19133
19134 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19135 ShiftBy);
19136}
19137
19138SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19139 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19140 return Value;
19141
19142 if (N->getValueType(0).isVector())
19143 return combineVectorShift(N, DCI);
19144
19145 return SDValue();
19146}
19147
19148SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19149 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19150 return Value;
19151
19152 if (N->getValueType(0).isVector())
19153 return combineVectorShift(N, DCI);
19154
19155 return SDValue();
19156}
19157
19158// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19159// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19160// When C is zero, the equation (addi Z, -C) can be simplified to Z
19161// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19163 const PPCSubtarget &Subtarget) {
19164 if (!Subtarget.isPPC64())
19165 return SDValue();
19166
19167 SDValue LHS = N->getOperand(0);
19168 SDValue RHS = N->getOperand(1);
19169
19170 auto isZextOfCompareWithConstant = [](SDValue Op) {
19171 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19172 Op.getValueType() != MVT::i64)
19173 return false;
19174
19175 SDValue Cmp = Op.getOperand(0);
19176 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19177 Cmp.getOperand(0).getValueType() != MVT::i64)
19178 return false;
19179
19180 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19181 int64_t NegConstant = 0 - Constant->getSExtValue();
19182 // Due to the limitations of the addi instruction,
19183 // -C is required to be [-32768, 32767].
19184 return isInt<16>(NegConstant);
19185 }
19186
19187 return false;
19188 };
19189
19190 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19191 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19192
19193 // If there is a pattern, canonicalize a zext operand to the RHS.
19194 if (LHSHasPattern && !RHSHasPattern)
19195 std::swap(LHS, RHS);
19196 else if (!LHSHasPattern && !RHSHasPattern)
19197 return SDValue();
19198
19199 SDLoc DL(N);
19200 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19201 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19202 SDValue Cmp = RHS.getOperand(0);
19203 SDValue Z = Cmp.getOperand(0);
19204 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19205 int64_t NegConstant = 0 - Constant->getSExtValue();
19206
19207 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19208 default: break;
19209 case ISD::SETNE: {
19210 // when C == 0
19211 // --> addze X, (addic Z, -1).carry
19212 // /
19213 // add X, (zext(setne Z, C))--
19214 // \ when -32768 <= -C <= 32767 && C != 0
19215 // --> addze X, (addic (addi Z, -C), -1).carry
19216 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19217 DAG.getConstant(NegConstant, DL, MVT::i64));
19218 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19219 SDValue Addc =
19220 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19221 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19222 DAG.getConstant(0, DL, CarryType));
19223 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19224 DAG.getConstant(0, DL, MVT::i64),
19225 SDValue(Addc.getNode(), 1));
19226 }
19227 case ISD::SETEQ: {
19228 // when C == 0
19229 // --> addze X, (subfic Z, 0).carry
19230 // /
19231 // add X, (zext(sete Z, C))--
19232 // \ when -32768 <= -C <= 32767 && C != 0
19233 // --> addze X, (subfic (addi Z, -C), 0).carry
19234 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19235 DAG.getConstant(NegConstant, DL, MVT::i64));
19236 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19237 SDValue Subc =
19238 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19239 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19240 DAG.getConstant(0, DL, CarryType));
19241 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19242 DAG.getConstant(1UL, DL, CarryType));
19243 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19244 DAG.getConstant(0, DL, MVT::i64), Invert);
19245 }
19246 }
19247
19248 return SDValue();
19249}
19250
19251// Transform
19252// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19253// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19254// In this case both C1 and C2 must be known constants.
19255// C1+C2 must fit into a 34 bit signed integer.
19257 const PPCSubtarget &Subtarget) {
19258 if (!Subtarget.isUsingPCRelativeCalls())
19259 return SDValue();
19260
19261 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19262 // If we find that node try to cast the Global Address and the Constant.
19263 SDValue LHS = N->getOperand(0);
19264 SDValue RHS = N->getOperand(1);
19265
19266 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19267 std::swap(LHS, RHS);
19268
19269 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19270 return SDValue();
19271
19272 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19275
19276 // Check that both casts succeeded.
19277 if (!GSDN || !ConstNode)
19278 return SDValue();
19279
19280 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19281 SDLoc DL(GSDN);
19282
19283 // The signed int offset needs to fit in 34 bits.
19284 if (!isInt<34>(NewOffset))
19285 return SDValue();
19286
19287 // The new global address is a copy of the old global address except
19288 // that it has the updated Offset.
19289 SDValue GA =
19290 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19291 NewOffset, GSDN->getTargetFlags());
19292 SDValue MatPCRel =
19293 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19294 return MatPCRel;
19295}
19296
19297SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19298 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19299 return Value;
19300
19301 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19302 return Value;
19303
19304 return SDValue();
19305}
19306
19307// Detect TRUNCATE operations on bitcasts of float128 values.
19308// What we are looking for here is the situtation where we extract a subset
19309// of bits from a 128 bit float.
19310// This can be of two forms:
19311// 1) BITCAST of f128 feeding TRUNCATE
19312// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19313// The reason this is required is because we do not have a legal i128 type
19314// and so we want to prevent having to store the f128 and then reload part
19315// of it.
19316SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19317 DAGCombinerInfo &DCI) const {
19318 // If we are using CRBits then try that first.
19319 if (Subtarget.useCRBits()) {
19320 // Check if CRBits did anything and return that if it did.
19321 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19322 return CRTruncValue;
19323 }
19324
19325 SDLoc dl(N);
19326 SDValue Op0 = N->getOperand(0);
19327
19328 // Looking for a truncate of i128 to i64.
19329 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19330 return SDValue();
19331
19332 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19333
19334 // SRL feeding TRUNCATE.
19335 if (Op0.getOpcode() == ISD::SRL) {
19336 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19337 // The right shift has to be by 64 bits.
19338 if (!ConstNode || ConstNode->getZExtValue() != 64)
19339 return SDValue();
19340
19341 // Switch the element number to extract.
19342 EltToExtract = EltToExtract ? 0 : 1;
19343 // Update Op0 past the SRL.
19344 Op0 = Op0.getOperand(0);
19345 }
19346
19347 // BITCAST feeding a TRUNCATE possibly via SRL.
19348 if (Op0.getOpcode() == ISD::BITCAST &&
19349 Op0.getValueType() == MVT::i128 &&
19350 Op0.getOperand(0).getValueType() == MVT::f128) {
19351 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19352 return DCI.DAG.getNode(
19353 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19354 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19355 }
19356 return SDValue();
19357}
19358
19359SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19360 SelectionDAG &DAG = DCI.DAG;
19361
19362 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19363 if (!ConstOpOrElement)
19364 return SDValue();
19365
19366 // An imul is usually smaller than the alternative sequence for legal type.
19368 isOperationLegal(ISD::MUL, N->getValueType(0)))
19369 return SDValue();
19370
19371 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19372 switch (this->Subtarget.getCPUDirective()) {
19373 default:
19374 // TODO: enhance the condition for subtarget before pwr8
19375 return false;
19376 case PPC::DIR_PWR8:
19377 // type mul add shl
19378 // scalar 4 1 1
19379 // vector 7 2 2
19380 return true;
19381 case PPC::DIR_PWR9:
19382 case PPC::DIR_PWR10:
19383 case PPC::DIR_PWR11:
19385 // type mul add shl
19386 // scalar 5 2 2
19387 // vector 7 2 2
19388
19389 // The cycle RATIO of related operations are showed as a table above.
19390 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19391 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19392 // are 4, it is always profitable; but for 3 instrs patterns
19393 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19394 // So we should only do it for vector type.
19395 return IsAddOne && IsNeg ? VT.isVector() : true;
19396 }
19397 };
19398
19399 EVT VT = N->getValueType(0);
19400 SDLoc DL(N);
19401
19402 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19403 bool IsNeg = MulAmt.isNegative();
19404 APInt MulAmtAbs = MulAmt.abs();
19405
19406 if ((MulAmtAbs - 1).isPowerOf2()) {
19407 // (mul x, 2^N + 1) => (add (shl x, N), x)
19408 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19409
19410 if (!IsProfitable(IsNeg, true, VT))
19411 return SDValue();
19412
19413 SDValue Op0 = N->getOperand(0);
19414 SDValue Op1 =
19415 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19416 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19417 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19418
19419 if (!IsNeg)
19420 return Res;
19421
19422 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19423 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19424 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19425 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19426
19427 if (!IsProfitable(IsNeg, false, VT))
19428 return SDValue();
19429
19430 SDValue Op0 = N->getOperand(0);
19431 SDValue Op1 =
19432 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19433 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19434
19435 if (!IsNeg)
19436 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19437 else
19438 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19439
19440 } else {
19441 return SDValue();
19442 }
19443}
19444
19445// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19446// in combiner since we need to check SD flags and other subtarget features.
19447SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19448 DAGCombinerInfo &DCI) const {
19449 SDValue N0 = N->getOperand(0);
19450 SDValue N1 = N->getOperand(1);
19451 SDValue N2 = N->getOperand(2);
19452 SDNodeFlags Flags = N->getFlags();
19453 EVT VT = N->getValueType(0);
19454 SelectionDAG &DAG = DCI.DAG;
19455 const TargetOptions &Options = getTargetMachine().Options;
19456 unsigned Opc = N->getOpcode();
19458 bool LegalOps = !DCI.isBeforeLegalizeOps();
19459 SDLoc Loc(N);
19460
19461 if (!isOperationLegal(ISD::FMA, VT))
19462 return SDValue();
19463
19464 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19465 // since (fnmsub a b c)=-0 while c-ab=+0.
19466 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19467 return SDValue();
19468
19469 // (fma (fneg a) b c) => (fnmsub a b c)
19470 // (fnmsub (fneg a) b c) => (fma a b c)
19471 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19472 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19473
19474 // (fma a (fneg b) c) => (fnmsub a b c)
19475 // (fnmsub a (fneg b) c) => (fma a b c)
19476 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19477 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19478
19479 return SDValue();
19480}
19481
19482bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19483 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19484 if (!Subtarget.is64BitELFABI())
19485 return false;
19486
19487 // If not a tail call then no need to proceed.
19488 if (!CI->isTailCall())
19489 return false;
19490
19491 // If sibling calls have been disabled and tail-calls aren't guaranteed
19492 // there is no reason to duplicate.
19493 auto &TM = getTargetMachine();
19494 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19495 return false;
19496
19497 // Can't tail call a function called indirectly, or if it has variadic args.
19498 const Function *Callee = CI->getCalledFunction();
19499 if (!Callee || Callee->isVarArg())
19500 return false;
19501
19502 // Make sure the callee and caller calling conventions are eligible for tco.
19503 const Function *Caller = CI->getParent()->getParent();
19504 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19505 CI->getCallingConv()))
19506 return false;
19507
19508 // If the function is local then we have a good chance at tail-calling it
19509 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19510}
19511
19512bool PPCTargetLowering::
19513isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19514 const Value *Mask = AndI.getOperand(1);
19515 // If the mask is suitable for andi. or andis. we should sink the and.
19516 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19517 // Can't handle constants wider than 64-bits.
19518 if (CI->getBitWidth() > 64)
19519 return false;
19520 int64_t ConstVal = CI->getZExtValue();
19521 return isUInt<16>(ConstVal) ||
19522 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19523 }
19524
19525 // For non-constant masks, we can always use the record-form and.
19526 return true;
19527}
19528
19529/// getAddrModeForFlags - Based on the set of address flags, select the most
19530/// optimal instruction format to match by.
19531PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19532 // This is not a node we should be handling here.
19533 if (Flags == PPC::MOF_None)
19534 return PPC::AM_None;
19535 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19536 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
19537 if ((Flags & FlagSet) == FlagSet)
19538 return PPC::AM_DForm;
19539 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
19540 if ((Flags & FlagSet) == FlagSet)
19541 return PPC::AM_DSForm;
19542 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
19543 if ((Flags & FlagSet) == FlagSet)
19544 return PPC::AM_DQForm;
19545 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
19546 if ((Flags & FlagSet) == FlagSet)
19547 return PPC::AM_PrefixDForm;
19548 // If no other forms are selected, return an X-Form as it is the most
19549 // general addressing mode.
19550 return PPC::AM_XForm;
19551}
19552
19553/// Set alignment flags based on whether or not the Frame Index is aligned.
19554/// Utilized when computing flags for address computation when selecting
19555/// load and store instructions.
19556static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19557 SelectionDAG &DAG) {
19558 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19559 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
19560 if (!FI)
19561 return;
19563 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
19564 // If this is (add $FI, $S16Imm), the alignment flags are already set
19565 // based on the immediate. We just need to clear the alignment flags
19566 // if the FI alignment is weaker.
19567 if ((FrameIndexAlign % 4) != 0)
19568 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19569 if ((FrameIndexAlign % 16) != 0)
19570 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19571 // If the address is a plain FrameIndex, set alignment flags based on
19572 // FI alignment.
19573 if (!IsAdd) {
19574 if ((FrameIndexAlign % 4) == 0)
19575 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19576 if ((FrameIndexAlign % 16) == 0)
19577 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19578 }
19579}
19580
19581/// Given a node, compute flags that are used for address computation when
19582/// selecting load and store instructions. The flags computed are stored in
19583/// FlagSet. This function takes into account whether the node is a constant,
19584/// an ADD, OR, or a constant, and computes the address flags accordingly.
19585static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19586 SelectionDAG &DAG) {
19587 // Set the alignment flags for the node depending on if the node is
19588 // 4-byte or 16-byte aligned.
19589 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19590 if ((Imm & 0x3) == 0)
19591 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19592 if ((Imm & 0xf) == 0)
19593 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19594 };
19595
19597 // All 32-bit constants can be computed as LIS + Disp.
19598 const APInt &ConstImm = CN->getAPIntValue();
19599 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
19600 FlagSet |= PPC::MOF_AddrIsSImm32;
19601 SetAlignFlagsForImm(ConstImm.getZExtValue());
19602 setAlignFlagsForFI(N, FlagSet, DAG);
19603 }
19604 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
19605 FlagSet |= PPC::MOF_RPlusSImm34;
19606 else // Let constant materialization handle large constants.
19607 FlagSet |= PPC::MOF_NotAddNorCst;
19608 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19609 // This address can be represented as an addition of:
19610 // - Register + Imm16 (possibly a multiple of 4/16)
19611 // - Register + Imm34
19612 // - Register + PPCISD::Lo
19613 // - Register + Register
19614 // In any case, we won't have to match this as Base + Zero.
19615 SDValue RHS = N.getOperand(1);
19617 const APInt &ConstImm = CN->getAPIntValue();
19618 if (ConstImm.isSignedIntN(16)) {
19619 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19620 SetAlignFlagsForImm(ConstImm.getZExtValue());
19621 setAlignFlagsForFI(N, FlagSet, DAG);
19622 }
19623 if (ConstImm.isSignedIntN(34))
19624 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19625 else
19626 FlagSet |= PPC::MOF_RPlusR; // Register.
19627 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
19628 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19629 else
19630 FlagSet |= PPC::MOF_RPlusR;
19631 } else { // The address computation is not a constant or an addition.
19632 setAlignFlagsForFI(N, FlagSet, DAG);
19633 FlagSet |= PPC::MOF_NotAddNorCst;
19634 }
19635}
19636
19644
19645/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19646/// the address flags of the load/store instruction that is to be matched.
19647unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19648 SelectionDAG &DAG) const {
19649 unsigned FlagSet = PPC::MOF_None;
19650
19651 // Compute subtarget flags.
19652 if (!Subtarget.hasP9Vector())
19653 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19654 else
19655 FlagSet |= PPC::MOF_SubtargetP9;
19656
19657 if (Subtarget.hasPrefixInstrs())
19658 FlagSet |= PPC::MOF_SubtargetP10;
19659
19660 if (Subtarget.hasSPE())
19661 FlagSet |= PPC::MOF_SubtargetSPE;
19662
19663 // Check if we have a PCRel node and return early.
19664 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19665 return FlagSet;
19666
19667 // If the node is the paired load/store intrinsics, compute flags for
19668 // address computation and return early.
19669 unsigned ParentOp = Parent->getOpcode();
19670 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19671 (ParentOp == ISD::INTRINSIC_VOID))) {
19672 unsigned ID = Parent->getConstantOperandVal(1);
19673 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19674 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19675 ? Parent->getOperand(2)
19676 : Parent->getOperand(3);
19677 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
19678 FlagSet |= PPC::MOF_Vector;
19679 return FlagSet;
19680 }
19681 }
19682
19683 // Mark this as something we don't want to handle here if it is atomic
19684 // or pre-increment instruction.
19685 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
19686 if (LSB->isIndexed())
19687 return PPC::MOF_None;
19688
19689 // Compute in-memory type flags. This is based on if there are scalars,
19690 // floats or vectors.
19691 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
19692 assert(MN && "Parent should be a MemSDNode!");
19693 EVT MemVT = MN->getMemoryVT();
19694 unsigned Size = MemVT.getSizeInBits();
19695 if (MemVT.isScalarInteger()) {
19696 assert(Size <= 128 &&
19697 "Not expecting scalar integers larger than 16 bytes!");
19698 if (Size < 32)
19699 FlagSet |= PPC::MOF_SubWordInt;
19700 else if (Size == 32)
19701 FlagSet |= PPC::MOF_WordInt;
19702 else
19703 FlagSet |= PPC::MOF_DoubleWordInt;
19704 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19705 if (Size == 128)
19706 FlagSet |= PPC::MOF_Vector;
19707 else if (Size == 256) {
19708 assert(Subtarget.pairedVectorMemops() &&
19709 "256-bit vectors are only available when paired vector memops is "
19710 "enabled!");
19711 FlagSet |= PPC::MOF_Vector;
19712 } else
19713 llvm_unreachable("Not expecting illegal vectors!");
19714 } else { // Floating point type: can be scalar, f128 or vector types.
19715 if (Size == 32 || Size == 64)
19716 FlagSet |= PPC::MOF_ScalarFloat;
19717 else if (MemVT == MVT::f128 || MemVT.isVector())
19718 FlagSet |= PPC::MOF_Vector;
19719 else
19720 llvm_unreachable("Not expecting illegal scalar floats!");
19721 }
19722
19723 // Compute flags for address computation.
19724 computeFlagsForAddressComputation(N, FlagSet, DAG);
19725
19726 // Compute type extension flags.
19727 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
19728 switch (LN->getExtensionType()) {
19729 case ISD::SEXTLOAD:
19730 FlagSet |= PPC::MOF_SExt;
19731 break;
19732 case ISD::EXTLOAD:
19733 case ISD::ZEXTLOAD:
19734 FlagSet |= PPC::MOF_ZExt;
19735 break;
19736 case ISD::NON_EXTLOAD:
19737 FlagSet |= PPC::MOF_NoExt;
19738 break;
19739 }
19740 } else
19741 FlagSet |= PPC::MOF_NoExt;
19742
19743 // For integers, no extension is the same as zero extension.
19744 // We set the extension mode to zero extension so we don't have
19745 // to add separate entries in AddrModesMap for loads and stores.
19746 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19747 FlagSet |= PPC::MOF_ZExt;
19748 FlagSet &= ~PPC::MOF_NoExt;
19749 }
19750
19751 // If we don't have prefixed instructions, 34-bit constants should be
19752 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19753 bool IsNonP1034BitConst =
19755 FlagSet) == PPC::MOF_RPlusSImm34;
19756 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19757 IsNonP1034BitConst)
19758 FlagSet |= PPC::MOF_NotAddNorCst;
19759
19760 return FlagSet;
19761}
19762
19763/// SelectForceXFormMode - Given the specified address, force it to be
19764/// represented as an indexed [r+r] operation (an XForm instruction).
19766 SDValue &Base,
19767 SelectionDAG &DAG) const {
19768
19770 int16_t ForceXFormImm = 0;
19771 if (provablyDisjointOr(DAG, N) &&
19772 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
19773 Disp = N.getOperand(0);
19774 Base = N.getOperand(1);
19775 return Mode;
19776 }
19777
19778 // If the address is the result of an add, we will utilize the fact that the
19779 // address calculation includes an implicit add. However, we can reduce
19780 // register pressure if we do not materialize a constant just for use as the
19781 // index register. We only get rid of the add if it is not an add of a
19782 // value and a 16-bit signed constant and both have a single use.
19783 if (N.getOpcode() == ISD::ADD &&
19784 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
19785 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
19786 Disp = N.getOperand(0);
19787 Base = N.getOperand(1);
19788 return Mode;
19789 }
19790
19791 // Otherwise, use R0 as the base register.
19792 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19793 N.getValueType());
19794 Base = N;
19795
19796 return Mode;
19797}
19798
19800 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19801 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19802 EVT ValVT = Val.getValueType();
19803 // If we are splitting a scalar integer into f64 parts (i.e. so they
19804 // can be placed into VFRC registers), we need to zero extend and
19805 // bitcast the values. This will ensure the value is placed into a
19806 // VSR using direct moves or stack operations as needed.
19807 if (PartVT == MVT::f64 &&
19808 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
19809 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
19810 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
19811 Parts[0] = Val;
19812 return true;
19813 }
19814 return false;
19815}
19816
19817SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19818 SelectionDAG &DAG) const {
19819 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19821 EVT RetVT = Op.getValueType();
19822 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
19823 SDValue Callee =
19824 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
19825 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
19827 for (const SDValue &N : Op->op_values()) {
19828 EVT ArgVT = N.getValueType();
19829 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
19830 TargetLowering::ArgListEntry Entry(N, ArgTy);
19831 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
19832 Entry.IsZExt = !Entry.IsSExt;
19833 Args.push_back(Entry);
19834 }
19835
19836 SDValue InChain = DAG.getEntryNode();
19837 SDValue TCChain = InChain;
19838 const Function &F = DAG.getMachineFunction().getFunction();
19839 bool isTailCall =
19840 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
19841 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
19842 if (isTailCall)
19843 InChain = TCChain;
19844 CLI.setDebugLoc(SDLoc(Op))
19845 .setChain(InChain)
19846 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
19847 .setTailCall(isTailCall)
19848 .setSExtResult(SignExtend)
19849 .setZExtResult(!SignExtend)
19851 return TLI.LowerCallTo(CLI).first;
19852}
19853
19854SDValue PPCTargetLowering::lowerLibCallBasedOnType(
19855 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
19856 SelectionDAG &DAG) const {
19857 if (Op.getValueType() == MVT::f32)
19858 return lowerToLibCall(LibCallFloatName, Op, DAG);
19859
19860 if (Op.getValueType() == MVT::f64)
19861 return lowerToLibCall(LibCallDoubleName, Op, DAG);
19862
19863 return SDValue();
19864}
19865
19866bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
19867 SDNodeFlags Flags = Op.getNode()->getFlags();
19868 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
19869 Flags.hasNoNaNs() && Flags.hasNoInfs();
19870}
19871
19872bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
19873 return Op.getNode()->getFlags().hasApproximateFuncs();
19874}
19875
19876bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
19878}
19879
19880SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
19881 const char *LibCallFloatName,
19882 const char *LibCallDoubleNameFinite,
19883 const char *LibCallFloatNameFinite,
19884 SDValue Op,
19885 SelectionDAG &DAG) const {
19886 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
19887 return SDValue();
19888
19889 if (!isLowringToMASSFiniteSafe(Op))
19890 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
19891 DAG);
19892
19893 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
19894 LibCallDoubleNameFinite, Op, DAG);
19895}
19896
19897SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
19898 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
19899 "__xl_powf_finite", Op, DAG);
19900}
19901
19902SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
19903 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
19904 "__xl_sinf_finite", Op, DAG);
19905}
19906
19907SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
19908 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
19909 "__xl_cosf_finite", Op, DAG);
19910}
19911
19912SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
19913 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
19914 "__xl_logf_finite", Op, DAG);
19915}
19916
19917SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
19918 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
19919 "__xl_log10f_finite", Op, DAG);
19920}
19921
19922SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
19923 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
19924 "__xl_expf_finite", Op, DAG);
19925}
19926
19927// If we happen to match to an aligned D-Form, check if the Frame Index is
19928// adequately aligned. If it is not, reset the mode to match to X-Form.
19929static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
19932 return;
19933 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
19936}
19937
19938/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
19939/// compute the address flags of the node, get the optimal address mode based
19940/// on the flags, and set the Base and Disp based on the address mode.
19942 SDValue N, SDValue &Disp,
19943 SDValue &Base,
19944 SelectionDAG &DAG,
19945 MaybeAlign Align) const {
19946 SDLoc DL(Parent);
19947
19948 // Compute the address flags.
19949 unsigned Flags = computeMOFlags(Parent, N, DAG);
19950
19951 // Get the optimal address mode based on the Flags.
19952 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
19953
19954 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
19955 // Select an X-Form load if it is not.
19956 setXFormForUnalignedFI(N, Flags, Mode);
19957
19958 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
19959 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
19960 assert(Subtarget.isUsingPCRelativeCalls() &&
19961 "Must be using PC-Relative calls when a valid PC-Relative node is "
19962 "present!");
19963 Mode = PPC::AM_PCRel;
19964 }
19965
19966 // Set Base and Disp accordingly depending on the address mode.
19967 switch (Mode) {
19968 case PPC::AM_DForm:
19969 case PPC::AM_DSForm:
19970 case PPC::AM_DQForm: {
19971 // This is a register plus a 16-bit immediate. The base will be the
19972 // register and the displacement will be the immediate unless it
19973 // isn't sufficiently aligned.
19974 if (Flags & PPC::MOF_RPlusSImm16) {
19975 SDValue Op0 = N.getOperand(0);
19976 SDValue Op1 = N.getOperand(1);
19977 int16_t Imm = Op1->getAsZExtVal();
19978 if (!Align || isAligned(*Align, Imm)) {
19979 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
19980 Base = Op0;
19982 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19983 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19984 }
19985 break;
19986 }
19987 }
19988 // This is a register plus the @lo relocation. The base is the register
19989 // and the displacement is the global address.
19990 else if (Flags & PPC::MOF_RPlusLo) {
19991 Disp = N.getOperand(1).getOperand(0); // The global address.
19996 Base = N.getOperand(0);
19997 break;
19998 }
19999 // This is a constant address at most 32 bits. The base will be
20000 // zero or load-immediate-shifted and the displacement will be
20001 // the low 16 bits of the address.
20002 else if (Flags & PPC::MOF_AddrIsSImm32) {
20003 auto *CN = cast<ConstantSDNode>(N);
20004 EVT CNType = CN->getValueType(0);
20005 uint64_t CNImm = CN->getZExtValue();
20006 // If this address fits entirely in a 16-bit sext immediate field, codegen
20007 // this as "d, 0".
20008 int16_t Imm;
20009 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20010 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20011 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20012 CNType);
20013 break;
20014 }
20015 // Handle 32-bit sext immediate with LIS + Addr mode.
20016 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20017 (!Align || isAligned(*Align, CNImm))) {
20018 int32_t Addr = (int32_t)CNImm;
20019 // Otherwise, break this down into LIS + Disp.
20020 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20021 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20022 MVT::i32);
20023 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20024 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20025 break;
20026 }
20027 }
20028 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20029 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20031 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20032 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20033 } else
20034 Base = N;
20035 break;
20036 }
20037 case PPC::AM_PrefixDForm: {
20038 int64_t Imm34 = 0;
20039 unsigned Opcode = N.getOpcode();
20040 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20041 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20042 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20043 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20044 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20045 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20046 else
20047 Base = N.getOperand(0);
20048 } else if (isIntS34Immediate(N, Imm34)) {
20049 // The address is a 34-bit signed immediate.
20050 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20051 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20052 }
20053 break;
20054 }
20055 case PPC::AM_PCRel: {
20056 // When selecting PC-Relative instructions, "Base" is not utilized as
20057 // we select the address as [PC+imm].
20058 Disp = N;
20059 break;
20060 }
20061 case PPC::AM_None:
20062 break;
20063 default: { // By default, X-Form is always available to be selected.
20064 // When a frame index is not aligned, we also match by XForm.
20066 Base = FI ? N : N.getOperand(1);
20067 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20068 N.getValueType())
20069 : N.getOperand(0);
20070 break;
20071 }
20072 }
20073 return Mode;
20074}
20075
20077 bool Return,
20078 bool IsVarArg) const {
20079 switch (CC) {
20080 case CallingConv::Cold:
20081 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20082 default:
20083 return CC_PPC64_ELF;
20084 }
20085}
20086
20088 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20089}
20090
20093 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20094 if (shouldInlineQuadwordAtomics() && Size == 128)
20096
20097 switch (AI->getOperation()) {
20103 default:
20105 }
20106
20107 llvm_unreachable("unreachable atomicrmw operation");
20108}
20109
20117
20118static Intrinsic::ID
20120 switch (BinOp) {
20121 default:
20122 llvm_unreachable("Unexpected AtomicRMW BinOp");
20124 return Intrinsic::ppc_atomicrmw_xchg_i128;
20125 case AtomicRMWInst::Add:
20126 return Intrinsic::ppc_atomicrmw_add_i128;
20127 case AtomicRMWInst::Sub:
20128 return Intrinsic::ppc_atomicrmw_sub_i128;
20129 case AtomicRMWInst::And:
20130 return Intrinsic::ppc_atomicrmw_and_i128;
20131 case AtomicRMWInst::Or:
20132 return Intrinsic::ppc_atomicrmw_or_i128;
20133 case AtomicRMWInst::Xor:
20134 return Intrinsic::ppc_atomicrmw_xor_i128;
20136 return Intrinsic::ppc_atomicrmw_nand_i128;
20137 }
20138}
20139
20141 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20142 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20143 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20144 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20145 Type *ValTy = Incr->getType();
20146 assert(ValTy->getPrimitiveSizeInBits() == 128);
20147 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20148 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20149 Value *IncrHi =
20150 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20151 Value *LoHi = Builder.CreateIntrinsic(
20153 {AlignedAddr, IncrLo, IncrHi});
20154 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20155 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20156 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20157 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20158 return Builder.CreateOr(
20159 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20160}
20161
20163 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20164 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20165 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20166 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20167 Type *ValTy = CmpVal->getType();
20168 assert(ValTy->getPrimitiveSizeInBits() == 128);
20169 Function *IntCmpXchg =
20170 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20171 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20172 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20173 Value *CmpHi =
20174 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20175 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20176 Value *NewHi =
20177 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20178 emitLeadingFence(Builder, CI, Ord);
20179 Value *LoHi =
20180 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20181 emitTrailingFence(Builder, CI, Ord);
20182 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20183 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20184 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20185 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20186 return Builder.CreateOr(
20187 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20188}
20189
20191 return Subtarget.useCRBits();
20192}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
static bool Enabled
Definition Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
bool isDenormal() const
Definition APFloat.h:1432
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
APInt abs() const
Get the absolute value.
Definition APInt.h:1796
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1723
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
arg_iterator arg_begin()
Definition Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:191
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1078
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:181
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ TargetConstantPool
Definition ISDOpcodes.h:184
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ TargetExternalSymbol
Definition ISDOpcodes.h:185
@ TargetJumpTable
Definition ISDOpcodes.h:183
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:181
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:133
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:200
@ MO_TPREL_HA
Definition PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:113
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:140
@ MO_TPREL_LO
Definition PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:160
@ MO_HA
Definition PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ SETBC
SETBC - The ISA 3.1 (P10) SETBC instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ SETBCR
SETBCR - The ISA 3.1 (P10) SETBCR instruction.
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ VSRQ
VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ ADDC
These nodes represent PPC arithmetic operations with carry.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ Define
Register definition.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.