LLVM 22.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM, STI), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186 const MVT RegVT = Subtarget.getScalarIntVT();
187
188 // Set up the register classes.
189 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
190 if (!useSoftFloat()) {
191 if (hasSPE()) {
192 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
193 // EFPU2 APU only supports f32
194 if (!Subtarget.hasEFPU2())
195 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
196 } else {
197 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
198 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
199 }
200 }
201
204
205 // PowerPC uses addo_carry,subo_carry to propagate carry.
208
209 // On P10, the default lowering generates better code using the
210 // setbc instruction.
211 if (!Subtarget.hasP10Vector()) {
214 if (isPPC64) {
217 }
218 }
219
220 // Match BITREVERSE to customized fast code sequence in the td file.
223
224 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
225 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
226
227 // Custom lower inline assembly to check for special registers.
228 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
229 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
230
231 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232 for (MVT VT : MVT::integer_valuetypes()) {
235 }
236
237 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
238 setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
239
240 if (Subtarget.isISA3_0()) {
241 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
242 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
243 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
244 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
245 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
246 } else {
247 // No extending loads from f16 or HW conversions back and forth.
248 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
249 setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
250 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
251 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
252 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
253 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
254 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
255 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
256 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
257 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
258 }
259
260 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
261
262 // PowerPC has pre-inc load and store's.
273 if (!Subtarget.hasSPE()) {
278 }
279
280 if (Subtarget.useCRBits()) {
282
283 if (isPPC64 || Subtarget.hasFPCVT()) {
288
290 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
292 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
293
298
300 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
302 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
303 } else {
308 }
309
310 // PowerPC does not support direct load/store of condition registers.
311 setOperationAction(ISD::LOAD, MVT::i1, Custom);
312 setOperationAction(ISD::STORE, MVT::i1, Custom);
313
314 // FIXME: Remove this once the ANDI glue bug is fixed:
315 if (ANDIGlueBug)
317
318 for (MVT VT : MVT::integer_valuetypes()) {
321 setTruncStoreAction(VT, MVT::i1, Expand);
322 }
323
324 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
325 }
326
327 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328 // PPC (the libcall is not available).
333
334 // We do not currently implement these libm ops for PowerPC.
335 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
336 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
337 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
338 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
339 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
341
342 // PowerPC has no SREM/UREM instructions unless we are on P9
343 // On P9 we may use a hardware instruction to compute the remainder.
344 // When the result of both the remainder and the division is required it is
345 // more efficient to compute the remainder from the result of the division
346 // rather than use the remainder instruction. The instructions are legalized
347 // directly because the DivRemPairsPass performs the transformation at the IR
348 // level.
349 if (Subtarget.isISA3_0()) {
354 } else {
359 }
360
361 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
370
371 // Handle constrained floating-point operations of scalar.
372 // TODO: Handle SPE specific operation.
378
383
384 if (!Subtarget.hasSPE()) {
387 }
388
389 if (Subtarget.hasVSX()) {
392 }
393
394 if (Subtarget.hasFSQRT()) {
397 }
398
399 if (Subtarget.hasFPRND()) {
404
409 }
410
411 // We don't support sin/cos/sqrt/fmod/pow
412 setOperationAction(ISD::FSIN , MVT::f64, Expand);
413 setOperationAction(ISD::FCOS , MVT::f64, Expand);
414 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
416 setOperationAction(ISD::FPOW , MVT::f64, Expand);
417 setOperationAction(ISD::FSIN , MVT::f32, Expand);
418 setOperationAction(ISD::FCOS , MVT::f32, Expand);
419 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
421 setOperationAction(ISD::FPOW , MVT::f32, Expand);
422
423 // MASS transformation for LLVM intrinsics with replicating fast-math flag
424 // to be consistent to PPCGenScalarMASSEntries pass
425 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
426 setOperationAction(ISD::FSIN , MVT::f64, Custom);
427 setOperationAction(ISD::FCOS , MVT::f64, Custom);
428 setOperationAction(ISD::FPOW , MVT::f64, Custom);
429 setOperationAction(ISD::FLOG, MVT::f64, Custom);
430 setOperationAction(ISD::FLOG10, MVT::f64, Custom);
431 setOperationAction(ISD::FEXP, MVT::f64, Custom);
432 setOperationAction(ISD::FSIN , MVT::f32, Custom);
433 setOperationAction(ISD::FCOS , MVT::f32, Custom);
434 setOperationAction(ISD::FPOW , MVT::f32, Custom);
435 setOperationAction(ISD::FLOG, MVT::f32, Custom);
436 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
437 setOperationAction(ISD::FEXP, MVT::f32, Custom);
438 }
439
440 if (Subtarget.hasSPE()) {
443 } else {
444 setOperationAction(ISD::FMA , MVT::f64, Legal);
445 setOperationAction(ISD::FMA , MVT::f32, Legal);
447 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
448 }
449
450 if (Subtarget.hasSPE())
451 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
452
453 // If we're enabling GP optimizations, use hardware square root
454 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
455 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
456
457 if (!Subtarget.hasFSQRT() &&
458 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
459 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
460
461 if (Subtarget.hasFCPSGN()) {
464 } else {
467 }
468
469 if (Subtarget.hasFPRND()) {
470 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
471 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
472 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
473 setOperationAction(ISD::FROUND, MVT::f64, Legal);
474
475 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
476 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
477 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
478 setOperationAction(ISD::FROUND, MVT::f32, Legal);
479 }
480
481 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482 // instruction xxbrd to speed up scalar BSWAP64.
483 if (Subtarget.isISA3_1()) {
486 } else {
489 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490 }
491
492 // CTPOP or CTTZ were introduced in P8/P9 respectively
493 if (Subtarget.isISA3_0()) {
494 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
495 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
496 } else {
497 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
498 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
499 }
500
501 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
504 } else {
507 }
508
509 // PowerPC does not have ROTR
512
513 if (!Subtarget.useCRBits()) {
514 // PowerPC does not have Select
519 }
520
521 // PowerPC wants to turn select_cc of FP into fsel when possible.
524
525 // PowerPC wants to optimize integer setcc a bit
526 if (!Subtarget.useCRBits())
528
529 if (Subtarget.hasFPU()) {
533
537 }
538
539 // PowerPC does not have BRCOND which requires SetCC
540 if (!Subtarget.useCRBits())
541 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
542
543 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
544
545 if (Subtarget.hasSPE()) {
546 // SPE has built-in conversions
553
554 // SPE supports signaling compare of f32/f64.
557 } else {
558 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
561
562 // PowerPC does not have [U|S]INT_TO_FP
567 }
568
569 if (Subtarget.hasDirectMove() && isPPC64) {
570 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
571 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
572 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
573 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
574
583 } else {
584 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
585 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
586 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
587 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
588 }
589
590 // We cannot sextinreg(i1). Expand to shifts.
592
593 // Custom handling for PowerPC ucmp instruction
595 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
596
597 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
599 // support continuation, user-level threading, and etc.. As a result, no
600 // other SjLj exception interfaces are implemented and please don't build
601 // your own exception handling based on them.
602 // LLVM/Clang supports zero-cost DWARF exception handling.
605
606 // We want to legalize GlobalAddress and ConstantPool nodes into the
607 // appropriate instructions to materialize the address.
618
619 // TRAP is legal.
620 setOperationAction(ISD::TRAP, MVT::Other, Legal);
621
622 // TRAMPOLINE is custom lowered.
623 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
624 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
625
626 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
627 setOperationAction(ISD::VASTART , MVT::Other, Custom);
628
629 if (Subtarget.is64BitELFABI()) {
630 // VAARG always uses double-word chunks, so promote anything smaller.
631 setOperationAction(ISD::VAARG, MVT::i1, Promote);
632 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
633 setOperationAction(ISD::VAARG, MVT::i8, Promote);
634 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
635 setOperationAction(ISD::VAARG, MVT::i16, Promote);
636 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
637 setOperationAction(ISD::VAARG, MVT::i32, Promote);
638 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
639 setOperationAction(ISD::VAARG, MVT::Other, Expand);
640 } else if (Subtarget.is32BitELFABI()) {
641 // VAARG is custom lowered with the 32-bit SVR4 ABI.
642 setOperationAction(ISD::VAARG, MVT::Other, Custom);
643 setOperationAction(ISD::VAARG, MVT::i64, Custom);
644 } else
645 setOperationAction(ISD::VAARG, MVT::Other, Expand);
646
647 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
648 if (Subtarget.is32BitELFABI())
649 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
650 else
651 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
652
653 // Use the default implementation.
654 setOperationAction(ISD::VAEND , MVT::Other, Expand);
655 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
656 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
657 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
658 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
659 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
660 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
663
664 if (Subtarget.isISA3_0() && isPPC64) {
665 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
666 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
667 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
668 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
669 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
670 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
671 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
672 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
673 }
674
675 // We want to custom lower some of our intrinsics.
681
682 // To handle counter-based loop conditions.
685
690
691 // Comparisons that require checking two conditions.
692 if (Subtarget.hasSPE()) {
697 }
710
713
714 if (Subtarget.has64BitSupport()) {
715 // They also have instructions for converting between i64 and fp.
724 // This is just the low 32 bits of a (signed) fp->i64 conversion.
725 // We cannot do this with Promote because i64 is not a legal type.
728
729 if (Subtarget.hasLFIWAX() || isPPC64) {
732 }
733 } else {
734 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
735 if (Subtarget.hasSPE()) {
738 } else {
741 }
742 }
743
744 // With the instructions enabled under FPCVT, we can do everything.
745 if (Subtarget.hasFPCVT()) {
746 if (Subtarget.has64BitSupport()) {
755 }
756
765 }
766
767 if (Subtarget.use64BitRegs()) {
768 // 64-bit PowerPC implementations can support i64 types directly
769 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
770 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
772 // 64-bit PowerPC wants to expand i128 shifts itself.
776 } else {
777 // 32-bit PowerPC wants to expand i64 shifts itself.
781 }
782
783 // PowerPC has better expansions for funnel shifts than the generic
784 // TargetLowering::expandFunnelShift.
785 if (Subtarget.has64BitSupport()) {
788 }
791
792 if (Subtarget.hasVSX()) {
793 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
794 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
795 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
796 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
799 }
800
801 if (Subtarget.hasAltivec()) {
802 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
807 }
808 // First set operation action for all vector types to expand. Then we
809 // will selectively turn on ones that can be effectively codegen'd.
811 // add/sub are legal for all supported vector VT's.
814
815 // For v2i64, these are only valid with P8Vector. This is corrected after
816 // the loop.
817 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
822 }
823 else {
828 }
829
830 if (Subtarget.hasVSX()) {
831 setOperationAction(ISD::FMAXNUM, VT, Legal);
832 setOperationAction(ISD::FMINNUM, VT, Legal);
833 }
834
835 // Vector instructions introduced in P8
836 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
839 }
840 else {
843 }
844
845 // Vector instructions introduced in P9
846 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
848 else
850
851 // We promote all shuffles to v16i8.
853 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
854
855 // We promote all non-typed operations to v4i32.
857 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
859 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
861 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
862 setOperationAction(ISD::LOAD , VT, Promote);
863 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
865 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
868 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
869 setOperationAction(ISD::STORE, VT, Promote);
870 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
871
872 // No other operations are legal.
880 setOperationAction(ISD::FNEG, VT, Expand);
881 setOperationAction(ISD::FSQRT, VT, Expand);
882 setOperationAction(ISD::FLOG, VT, Expand);
883 setOperationAction(ISD::FLOG10, VT, Expand);
884 setOperationAction(ISD::FLOG2, VT, Expand);
885 setOperationAction(ISD::FEXP, VT, Expand);
886 setOperationAction(ISD::FEXP2, VT, Expand);
887 setOperationAction(ISD::FSIN, VT, Expand);
888 setOperationAction(ISD::FCOS, VT, Expand);
889 setOperationAction(ISD::FABS, VT, Expand);
890 setOperationAction(ISD::FFLOOR, VT, Expand);
891 setOperationAction(ISD::FCEIL, VT, Expand);
892 setOperationAction(ISD::FTRUNC, VT, Expand);
893 setOperationAction(ISD::FRINT, VT, Expand);
894 setOperationAction(ISD::FLDEXP, VT, Expand);
895 setOperationAction(ISD::FNEARBYINT, VT, Expand);
906 setOperationAction(ISD::FPOW, VT, Expand);
911
912 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
913 setTruncStoreAction(VT, InnerVT, Expand);
916 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
917 }
918 }
920 if (!Subtarget.hasP8Vector()) {
921 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
922 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
923 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
924 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
925 }
926
927 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
928 // with merges, splats, etc.
930
931 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
932 // are cheap, so handle them before they get expanded to scalar.
938
939 setOperationAction(ISD::AND , MVT::v4i32, Legal);
940 setOperationAction(ISD::OR , MVT::v4i32, Legal);
941 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
942 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
944 Subtarget.useCRBits() ? Legal : Expand);
945 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
954 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
955 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
956 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
957 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
958
959 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
960 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
961 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
962 if (Subtarget.hasAltivec())
963 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
965 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
966 if (Subtarget.hasP8Altivec())
967 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
968
969 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
970 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
971 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
972 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
973
974 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
975 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
976
977 if (Subtarget.hasVSX()) {
978 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
979 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
981 }
982
983 if (Subtarget.hasP8Altivec())
984 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
985 else
986 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
987
988 if (Subtarget.isISA3_1()) {
989 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
990 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
991 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
992 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
993 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
994 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
995 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
996 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
997 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
998 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
999 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1000 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1001 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1002 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1003 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1004 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1005 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1006 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1007 }
1008
1009 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1010 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1011
1014 // LE is P8+/64-bit so direct moves are supported and these operations
1015 // are legal. The custom transformation requires 64-bit since we need a
1016 // pair of stores that will cover a 128-bit load for P10.
1017 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1021 }
1022
1027
1028 // Altivec does not contain unordered floating-point compare instructions
1029 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1030 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1031 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1032 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1033
1034 if (Subtarget.hasVSX()) {
1037 if (Subtarget.hasP8Vector()) {
1040 }
1041 if (Subtarget.hasDirectMove() && isPPC64) {
1050 }
1052
1053 // The nearbyint variants are not allowed to raise the inexact exception
1054 // so we can only code-gen them with fpexcept.ignore.
1057
1058 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1059 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1060 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1061 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1062 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1063 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1064 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1065 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1066
1067 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1068 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1069 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1070 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1071 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1072
1073 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1074 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1075
1076 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1077 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1078
1079 // Share the Altivec comparison restrictions.
1080 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1081 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1082 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1083 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1084
1085 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1086 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1087
1089
1090 if (Subtarget.hasP8Vector())
1091 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1092
1093 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1094
1095 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1096 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1097 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1098
1099 if (Subtarget.hasP8Altivec()) {
1100 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1101 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1102 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1103
1104 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1105 // SRL, but not for SRA because of the instructions available:
1106 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1107 // doing
1108 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1109 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1110 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1111
1112 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1113 }
1114 else {
1115 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1116 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1117 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1118
1119 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1120
1121 // VSX v2i64 only supports non-arithmetic operations.
1122 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1123 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1124 }
1125
1126 if (Subtarget.isISA3_1())
1127 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1128 else
1129 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1130
1131 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1132 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1133 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1134 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1135
1137
1146
1147 // Custom handling for partial vectors of integers converted to
1148 // floating point. We already have optimal handling for v2i32 through
1149 // the DAG combine, so those aren't necessary.
1166
1167 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1168 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1169 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1170 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1173
1176
1177 // Handle constrained floating-point operations of vector.
1178 // The predictor is `hasVSX` because altivec instruction has
1179 // no exception but VSX vector instruction has.
1193
1207
1208 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1209 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1210
1211 for (MVT FPT : MVT::fp_valuetypes())
1212 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1213
1214 // Expand the SELECT to SELECT_CC
1216
1217 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1218 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1219
1220 // No implementation for these ops for PowerPC.
1221 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1222 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1223 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1224 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1225 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1226 setOperationAction(ISD::FREM, MVT::f128, Expand);
1227 }
1228
1229 if (Subtarget.hasP8Altivec()) {
1230 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1231 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1232 }
1233
1234 if (Subtarget.hasP9Vector()) {
1237
1238 // Test data class instructions store results in CR bits.
1239 if (Subtarget.useCRBits()) {
1244 }
1245
1246 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1247 // SRL, but not for SRA because of the instructions available:
1248 // VS{RL} and VS{RL}O.
1249 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1250 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1251 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1252
1253 setOperationAction(ISD::FADD, MVT::f128, Legal);
1254 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1255 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1256 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1257 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1258
1259 setOperationAction(ISD::FMA, MVT::f128, Legal);
1266
1267 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1268 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1269 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1270 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1271 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1272 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1273
1276 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1277
1278 // Handle constrained floating-point operations of fp128
1294 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1295 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1296 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1297 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1298 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1299 } else if (Subtarget.hasVSX()) {
1300 setOperationAction(ISD::LOAD, MVT::f128, Promote);
1301 setOperationAction(ISD::STORE, MVT::f128, Promote);
1302
1303 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1304 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1305
1306 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1307 // fp_to_uint and int_to_fp.
1310
1311 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1312 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1313 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1314 setOperationAction(ISD::FABS, MVT::f128, Expand);
1315 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1316 setOperationAction(ISD::FMA, MVT::f128, Expand);
1318
1319 // Expand the fp_extend if the target type is fp128.
1320 setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1322
1323 // Expand the fp_round if the source type is fp128.
1324 for (MVT VT : {MVT::f32, MVT::f64}) {
1327 }
1328
1332 setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1333
1334 // Lower following f128 select_cc pattern:
1335 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1337
1338 // We need to handle f128 SELECT_CC with integer result type.
1340 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1341 }
1342
1343 if (Subtarget.hasP9Altivec()) {
1344 if (Subtarget.isISA3_1()) {
1349 } else {
1352 }
1360
1361 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1362 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1363 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1364 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1365 }
1366
1367 if (Subtarget.hasP10Vector()) {
1369 }
1370 }
1371
1372 if (Subtarget.pairedVectorMemops()) {
1373 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1374 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1375 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1376 }
1377 if (Subtarget.hasMMA()) {
1378 if (Subtarget.isISAFuture()) {
1379 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1380 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1381 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1382 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1383 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1384 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1385 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1386 } else {
1387 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1388 }
1389 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1390 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1392 }
1393
1394 if (Subtarget.has64BitSupport())
1395 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1396
1397 if (Subtarget.isISA3_1())
1398 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1399
1400 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1401
1402 if (!isPPC64) {
1403 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1404 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1405 }
1406
1408 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1409 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1411 }
1412
1414
1415 if (Subtarget.hasAltivec()) {
1416 // Altivec instructions set fields to all zeros or all ones.
1418 }
1419
1422 else if (isPPC64)
1424 else
1426
1427 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1428
1429 // We have target-specific dag combine patterns for the following nodes:
1432 if (Subtarget.hasFPCVT())
1434 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1435 if (Subtarget.useCRBits())
1436 setTargetDAGCombine(ISD::BRCOND);
1439
1441
1443
1444 if (Subtarget.useCRBits()) {
1446 }
1447
1448 // With 32 condition bits, we don't need to sink (and duplicate) compares
1449 // aggressively in CodeGenPrep.
1450 if (Subtarget.useCRBits()) {
1452 }
1453
1454 // TODO: The default entry number is set to 64. This stops most jump table
1455 // generation on PPC. But it is good for current PPC HWs because the indirect
1456 // branch instruction mtctr to the jump table may lead to bad branch predict.
1457 // Re-evaluate this value on future HWs that can do better with mtctr.
1459
1460 // The default minimum of largest number in a BitTest cluster is 3.
1462
1464 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1465
1466 auto CPUDirective = Subtarget.getCPUDirective();
1467 switch (CPUDirective) {
1468 default: break;
1469 case PPC::DIR_970:
1470 case PPC::DIR_A2:
1471 case PPC::DIR_E500:
1472 case PPC::DIR_E500mc:
1473 case PPC::DIR_E5500:
1474 case PPC::DIR_PWR4:
1475 case PPC::DIR_PWR5:
1476 case PPC::DIR_PWR5X:
1477 case PPC::DIR_PWR6:
1478 case PPC::DIR_PWR6X:
1479 case PPC::DIR_PWR7:
1480 case PPC::DIR_PWR8:
1481 case PPC::DIR_PWR9:
1482 case PPC::DIR_PWR10:
1483 case PPC::DIR_PWR11:
1487 break;
1488 }
1489
1490 if (Subtarget.enableMachineScheduler())
1492 else
1494
1496
1497 // The Freescale cores do better with aggressive inlining of memcpy and
1498 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1499 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1500 MaxStoresPerMemset = 32;
1502 MaxStoresPerMemcpy = 32;
1506 } else if (CPUDirective == PPC::DIR_A2) {
1507 // The A2 also benefits from (very) aggressive inlining of memcpy and
1508 // friends. The overhead of a the function call, even when warm, can be
1509 // over one hundred cycles.
1510 MaxStoresPerMemset = 128;
1511 MaxStoresPerMemcpy = 128;
1512 MaxStoresPerMemmove = 128;
1513 MaxLoadsPerMemcmp = 128;
1514 } else {
1517 }
1518
1519 // Enable generation of STXVP instructions by default for mcpu=future.
1520 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1521 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1522 DisableAutoPairedVecSt = false;
1523
1524 IsStrictFPEnabled = true;
1525
1526 // Let the subtarget (CPU) decide if a predictable select is more expensive
1527 // than the corresponding branch. This information is used in CGP to decide
1528 // when to convert selects into branches.
1529 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1530
1532}
1533
1534// *********************************** NOTE ************************************
1535// For selecting load and store instructions, the addressing modes are defined
1536// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1537// patterns to match the load the store instructions.
1538//
1539// The TD definitions for the addressing modes correspond to their respective
1540// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1541// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1542// address mode flags of a particular node. Afterwards, the computed address
1543// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1544// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1545// accordingly, based on the preferred addressing mode.
1546//
1547// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1548// MemOpFlags contains all the possible flags that can be used to compute the
1549// optimal addressing mode for load and store instructions.
1550// AddrMode contains all the possible load and store addressing modes available
1551// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1552//
1553// When adding new load and store instructions, it is possible that new address
1554// flags may need to be added into MemOpFlags, and a new addressing mode will
1555// need to be added to AddrMode. An entry of the new addressing mode (consisting
1556// of the minimal and main distinguishing address flags for the new load/store
1557// instructions) will need to be added into initializeAddrModeMap() below.
1558// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1559// need to be updated to account for selecting the optimal addressing mode.
1560// *****************************************************************************
1561/// Initialize the map that relates the different addressing modes of the load
1562/// and store instructions to a set of flags. This ensures the load/store
1563/// instruction is correctly matched during instruction selection.
1564void PPCTargetLowering::initializeAddrModeMap() {
1565 AddrModesMap[PPC::AM_DForm] = {
1566 // LWZ, STW
1571 // LBZ, LHZ, STB, STH
1576 // LHA
1581 // LFS, LFD, STFS, STFD
1586 };
1587 AddrModesMap[PPC::AM_DSForm] = {
1588 // LWA
1592 // LD, STD
1596 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1600 };
1601 AddrModesMap[PPC::AM_DQForm] = {
1602 // LXV, STXV
1606 };
1607 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1609 // TODO: Add mapping for quadword load/store.
1610}
1611
1612/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1613/// the desired ByVal argument alignment.
1614static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1615 if (MaxAlign == MaxMaxAlign)
1616 return;
1617 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1618 if (MaxMaxAlign >= 32 &&
1619 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1620 MaxAlign = Align(32);
1621 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1622 MaxAlign < 16)
1623 MaxAlign = Align(16);
1624 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1625 Align EltAlign;
1626 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1627 if (EltAlign > MaxAlign)
1628 MaxAlign = EltAlign;
1629 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1630 for (auto *EltTy : STy->elements()) {
1631 Align EltAlign;
1632 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1633 if (EltAlign > MaxAlign)
1634 MaxAlign = EltAlign;
1635 if (MaxAlign == MaxMaxAlign)
1636 break;
1637 }
1638 }
1639}
1640
1641/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1642/// function arguments in the caller parameter area.
1644 const DataLayout &DL) const {
1645 // 16byte and wider vectors are passed on 16byte boundary.
1646 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1647 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1648 if (Subtarget.hasAltivec())
1649 getMaxByValAlign(Ty, Alignment, Align(16));
1650 return Alignment;
1651}
1652
1654 return Subtarget.useSoftFloat();
1655}
1656
1658 return Subtarget.hasSPE();
1659}
1660
1662 return VT.isScalarInteger();
1663}
1664
1666 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1667 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1668 return false;
1669
1670 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1671 if (VTy->getScalarType()->isIntegerTy()) {
1672 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1673 if (ElemSizeInBits == 32) {
1674 Index = Subtarget.isLittleEndian() ? 2 : 1;
1675 return true;
1676 }
1677 if (ElemSizeInBits == 64) {
1678 Index = Subtarget.isLittleEndian() ? 1 : 0;
1679 return true;
1680 }
1681 }
1682 }
1683 return false;
1684}
1685
1687 EVT VT) const {
1688 if (!VT.isVector())
1689 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1690
1692}
1693
1695 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1696 return true;
1697}
1698
1699//===----------------------------------------------------------------------===//
1700// Node matching predicates, for use by the tblgen matching code.
1701//===----------------------------------------------------------------------===//
1702
1703/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1706 return CFP->getValueAPF().isZero();
1707 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1708 // Maybe this has already been legalized into the constant pool?
1709 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1710 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1711 return CFP->getValueAPF().isZero();
1712 }
1713 return false;
1714}
1715
1716/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1717/// true if Op is undef or if it matches the specified value.
1718static bool isConstantOrUndef(int Op, int Val) {
1719 return Op < 0 || Op == Val;
1720}
1721
1722/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1723/// VPKUHUM instruction.
1724/// The ShuffleKind distinguishes between big-endian operations with
1725/// two different inputs (0), either-endian operations with two identical
1726/// inputs (1), and little-endian operations with two different inputs (2).
1727/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1729 SelectionDAG &DAG) {
1730 bool IsLE = DAG.getDataLayout().isLittleEndian();
1731 if (ShuffleKind == 0) {
1732 if (IsLE)
1733 return false;
1734 for (unsigned i = 0; i != 16; ++i)
1735 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1736 return false;
1737 } else if (ShuffleKind == 2) {
1738 if (!IsLE)
1739 return false;
1740 for (unsigned i = 0; i != 16; ++i)
1741 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1742 return false;
1743 } else if (ShuffleKind == 1) {
1744 unsigned j = IsLE ? 0 : 1;
1745 for (unsigned i = 0; i != 8; ++i)
1746 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1747 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1748 return false;
1749 }
1750 return true;
1751}
1752
1753/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1754/// VPKUWUM instruction.
1755/// The ShuffleKind distinguishes between big-endian operations with
1756/// two different inputs (0), either-endian operations with two identical
1757/// inputs (1), and little-endian operations with two different inputs (2).
1758/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1760 SelectionDAG &DAG) {
1761 bool IsLE = DAG.getDataLayout().isLittleEndian();
1762 if (ShuffleKind == 0) {
1763 if (IsLE)
1764 return false;
1765 for (unsigned i = 0; i != 16; i += 2)
1766 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1767 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1768 return false;
1769 } else if (ShuffleKind == 2) {
1770 if (!IsLE)
1771 return false;
1772 for (unsigned i = 0; i != 16; i += 2)
1773 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1774 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1775 return false;
1776 } else if (ShuffleKind == 1) {
1777 unsigned j = IsLE ? 0 : 2;
1778 for (unsigned i = 0; i != 8; i += 2)
1779 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1780 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1781 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1782 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1783 return false;
1784 }
1785 return true;
1786}
1787
1788/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1789/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1790/// current subtarget.
1791///
1792/// The ShuffleKind distinguishes between big-endian operations with
1793/// two different inputs (0), either-endian operations with two identical
1794/// inputs (1), and little-endian operations with two different inputs (2).
1795/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1797 SelectionDAG &DAG) {
1798 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1799 if (!Subtarget.hasP8Vector())
1800 return false;
1801
1802 bool IsLE = DAG.getDataLayout().isLittleEndian();
1803 if (ShuffleKind == 0) {
1804 if (IsLE)
1805 return false;
1806 for (unsigned i = 0; i != 16; i += 4)
1807 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1808 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1809 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1810 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1811 return false;
1812 } else if (ShuffleKind == 2) {
1813 if (!IsLE)
1814 return false;
1815 for (unsigned i = 0; i != 16; i += 4)
1816 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1817 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1818 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1819 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1820 return false;
1821 } else if (ShuffleKind == 1) {
1822 unsigned j = IsLE ? 0 : 4;
1823 for (unsigned i = 0; i != 8; i += 4)
1824 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1825 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1826 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1827 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1828 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1829 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1830 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1831 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1832 return false;
1833 }
1834 return true;
1835}
1836
1837/// isVMerge - Common function, used to match vmrg* shuffles.
1838///
1839static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1840 unsigned LHSStart, unsigned RHSStart) {
1841 if (N->getValueType(0) != MVT::v16i8)
1842 return false;
1843 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1844 "Unsupported merge size!");
1845
1846 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1847 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1848 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1849 LHSStart+j+i*UnitSize) ||
1850 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1851 RHSStart+j+i*UnitSize))
1852 return false;
1853 }
1854 return true;
1855}
1856
1857/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1858/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1859/// The ShuffleKind distinguishes between big-endian merges with two
1860/// different inputs (0), either-endian merges with two identical inputs (1),
1861/// and little-endian merges with two different inputs (2). For the latter,
1862/// the input operands are swapped (see PPCInstrAltivec.td).
1864 unsigned ShuffleKind, SelectionDAG &DAG) {
1865 if (DAG.getDataLayout().isLittleEndian()) {
1866 if (ShuffleKind == 1) // unary
1867 return isVMerge(N, UnitSize, 0, 0);
1868 else if (ShuffleKind == 2) // swapped
1869 return isVMerge(N, UnitSize, 0, 16);
1870 else
1871 return false;
1872 } else {
1873 if (ShuffleKind == 1) // unary
1874 return isVMerge(N, UnitSize, 8, 8);
1875 else if (ShuffleKind == 0) // normal
1876 return isVMerge(N, UnitSize, 8, 24);
1877 else
1878 return false;
1879 }
1880}
1881
1882/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1883/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1884/// The ShuffleKind distinguishes between big-endian merges with two
1885/// different inputs (0), either-endian merges with two identical inputs (1),
1886/// and little-endian merges with two different inputs (2). For the latter,
1887/// the input operands are swapped (see PPCInstrAltivec.td).
1889 unsigned ShuffleKind, SelectionDAG &DAG) {
1890 if (DAG.getDataLayout().isLittleEndian()) {
1891 if (ShuffleKind == 1) // unary
1892 return isVMerge(N, UnitSize, 8, 8);
1893 else if (ShuffleKind == 2) // swapped
1894 return isVMerge(N, UnitSize, 8, 24);
1895 else
1896 return false;
1897 } else {
1898 if (ShuffleKind == 1) // unary
1899 return isVMerge(N, UnitSize, 0, 0);
1900 else if (ShuffleKind == 0) // normal
1901 return isVMerge(N, UnitSize, 0, 16);
1902 else
1903 return false;
1904 }
1905}
1906
1907/**
1908 * Common function used to match vmrgew and vmrgow shuffles
1909 *
1910 * The indexOffset determines whether to look for even or odd words in
1911 * the shuffle mask. This is based on the of the endianness of the target
1912 * machine.
1913 * - Little Endian:
1914 * - Use offset of 0 to check for odd elements
1915 * - Use offset of 4 to check for even elements
1916 * - Big Endian:
1917 * - Use offset of 0 to check for even elements
1918 * - Use offset of 4 to check for odd elements
1919 * A detailed description of the vector element ordering for little endian and
1920 * big endian can be found at
1921 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1922 * Targeting your applications - what little endian and big endian IBM XL C/C++
1923 * compiler differences mean to you
1924 *
1925 * The mask to the shuffle vector instruction specifies the indices of the
1926 * elements from the two input vectors to place in the result. The elements are
1927 * numbered in array-access order, starting with the first vector. These vectors
1928 * are always of type v16i8, thus each vector will contain 16 elements of size
1929 * 8. More info on the shuffle vector can be found in the
1930 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1931 * Language Reference.
1932 *
1933 * The RHSStartValue indicates whether the same input vectors are used (unary)
1934 * or two different input vectors are used, based on the following:
1935 * - If the instruction uses the same vector for both inputs, the range of the
1936 * indices will be 0 to 15. In this case, the RHSStart value passed should
1937 * be 0.
1938 * - If the instruction has two different vectors then the range of the
1939 * indices will be 0 to 31. In this case, the RHSStart value passed should
1940 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1941 * to 31 specify elements in the second vector).
1942 *
1943 * \param[in] N The shuffle vector SD Node to analyze
1944 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1945 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1946 * vector to the shuffle_vector instruction
1947 * \return true iff this shuffle vector represents an even or odd word merge
1948 */
1949static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1950 unsigned RHSStartValue) {
1951 if (N->getValueType(0) != MVT::v16i8)
1952 return false;
1953
1954 for (unsigned i = 0; i < 2; ++i)
1955 for (unsigned j = 0; j < 4; ++j)
1956 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1957 i*RHSStartValue+j+IndexOffset) ||
1958 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1959 i*RHSStartValue+j+IndexOffset+8))
1960 return false;
1961 return true;
1962}
1963
1964/**
1965 * Determine if the specified shuffle mask is suitable for the vmrgew or
1966 * vmrgow instructions.
1967 *
1968 * \param[in] N The shuffle vector SD Node to analyze
1969 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1970 * \param[in] ShuffleKind Identify the type of merge:
1971 * - 0 = big-endian merge with two different inputs;
1972 * - 1 = either-endian merge with two identical inputs;
1973 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1974 * little-endian merges).
1975 * \param[in] DAG The current SelectionDAG
1976 * \return true iff this shuffle mask
1977 */
1979 unsigned ShuffleKind, SelectionDAG &DAG) {
1980 if (DAG.getDataLayout().isLittleEndian()) {
1981 unsigned indexOffset = CheckEven ? 4 : 0;
1982 if (ShuffleKind == 1) // Unary
1983 return isVMerge(N, indexOffset, 0);
1984 else if (ShuffleKind == 2) // swapped
1985 return isVMerge(N, indexOffset, 16);
1986 else
1987 return false;
1988 }
1989 else {
1990 unsigned indexOffset = CheckEven ? 0 : 4;
1991 if (ShuffleKind == 1) // Unary
1992 return isVMerge(N, indexOffset, 0);
1993 else if (ShuffleKind == 0) // Normal
1994 return isVMerge(N, indexOffset, 16);
1995 else
1996 return false;
1997 }
1998 return false;
1999}
2000
2001/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2002/// amount, otherwise return -1.
2003/// The ShuffleKind distinguishes between big-endian operations with two
2004/// different inputs (0), either-endian operations with two identical inputs
2005/// (1), and little-endian operations with two different inputs (2). For the
2006/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2007int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2008 SelectionDAG &DAG) {
2009 if (N->getValueType(0) != MVT::v16i8)
2010 return -1;
2011
2013
2014 // Find the first non-undef value in the shuffle mask.
2015 unsigned i;
2016 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2017 /*search*/;
2018
2019 if (i == 16) return -1; // all undef.
2020
2021 // Otherwise, check to see if the rest of the elements are consecutively
2022 // numbered from this value.
2023 unsigned ShiftAmt = SVOp->getMaskElt(i);
2024 if (ShiftAmt < i) return -1;
2025
2026 ShiftAmt -= i;
2027 bool isLE = DAG.getDataLayout().isLittleEndian();
2028
2029 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2030 // Check the rest of the elements to see if they are consecutive.
2031 for (++i; i != 16; ++i)
2032 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2033 return -1;
2034 } else if (ShuffleKind == 1) {
2035 // Check the rest of the elements to see if they are consecutive.
2036 for (++i; i != 16; ++i)
2037 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2038 return -1;
2039 } else
2040 return -1;
2041
2042 if (isLE)
2043 ShiftAmt = 16 - ShiftAmt;
2044
2045 return ShiftAmt;
2046}
2047
2048/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2049/// specifies a splat of a single element that is suitable for input to
2050/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2052 EVT VT = N->getValueType(0);
2053 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2054 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2055
2056 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2057 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2058
2059 // The consecutive indices need to specify an element, not part of two
2060 // different elements. So abandon ship early if this isn't the case.
2061 if (N->getMaskElt(0) % EltSize != 0)
2062 return false;
2063
2064 // This is a splat operation if each element of the permute is the same, and
2065 // if the value doesn't reference the second vector.
2066 unsigned ElementBase = N->getMaskElt(0);
2067
2068 // FIXME: Handle UNDEF elements too!
2069 if (ElementBase >= 16)
2070 return false;
2071
2072 // Check that the indices are consecutive, in the case of a multi-byte element
2073 // splatted with a v16i8 mask.
2074 for (unsigned i = 1; i != EltSize; ++i)
2075 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2076 return false;
2077
2078 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2079 // An UNDEF element is a sequence of UNDEF bytes.
2080 if (N->getMaskElt(i) < 0) {
2081 for (unsigned j = 1; j != EltSize; ++j)
2082 if (N->getMaskElt(i + j) >= 0)
2083 return false;
2084 } else
2085 for (unsigned j = 0; j != EltSize; ++j)
2086 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2087 return false;
2088 }
2089 return true;
2090}
2091
2092/// Check that the mask is shuffling N byte elements. Within each N byte
2093/// element of the mask, the indices could be either in increasing or
2094/// decreasing order as long as they are consecutive.
2095/// \param[in] N the shuffle vector SD Node to analyze
2096/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2097/// Word/DoubleWord/QuadWord).
2098/// \param[in] StepLen the delta indices number among the N byte element, if
2099/// the mask is in increasing/decreasing order then it is 1/-1.
2100/// \return true iff the mask is shuffling N byte elements.
2101static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2102 int StepLen) {
2103 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2104 "Unexpected element width.");
2105 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2106
2107 unsigned NumOfElem = 16 / Width;
2108 unsigned MaskVal[16]; // Width is never greater than 16
2109 for (unsigned i = 0; i < NumOfElem; ++i) {
2110 MaskVal[0] = N->getMaskElt(i * Width);
2111 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2112 return false;
2113 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2114 return false;
2115 }
2116
2117 for (unsigned int j = 1; j < Width; ++j) {
2118 MaskVal[j] = N->getMaskElt(i * Width + j);
2119 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2120 return false;
2121 }
2122 }
2123 }
2124
2125 return true;
2126}
2127
2128bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2129 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2130 if (!isNByteElemShuffleMask(N, 4, 1))
2131 return false;
2132
2133 // Now we look at mask elements 0,4,8,12
2134 unsigned M0 = N->getMaskElt(0) / 4;
2135 unsigned M1 = N->getMaskElt(4) / 4;
2136 unsigned M2 = N->getMaskElt(8) / 4;
2137 unsigned M3 = N->getMaskElt(12) / 4;
2138 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2139 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2140
2141 // Below, let H and L be arbitrary elements of the shuffle mask
2142 // where H is in the range [4,7] and L is in the range [0,3].
2143 // H, 1, 2, 3 or L, 5, 6, 7
2144 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2145 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2146 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2147 InsertAtByte = IsLE ? 12 : 0;
2148 Swap = M0 < 4;
2149 return true;
2150 }
2151 // 0, H, 2, 3 or 4, L, 6, 7
2152 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2153 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2154 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2155 InsertAtByte = IsLE ? 8 : 4;
2156 Swap = M1 < 4;
2157 return true;
2158 }
2159 // 0, 1, H, 3 or 4, 5, L, 7
2160 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2161 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2162 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2163 InsertAtByte = IsLE ? 4 : 8;
2164 Swap = M2 < 4;
2165 return true;
2166 }
2167 // 0, 1, 2, H or 4, 5, 6, L
2168 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2169 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2170 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2171 InsertAtByte = IsLE ? 0 : 12;
2172 Swap = M3 < 4;
2173 return true;
2174 }
2175
2176 // If both vector operands for the shuffle are the same vector, the mask will
2177 // contain only elements from the first one and the second one will be undef.
2178 if (N->getOperand(1).isUndef()) {
2179 ShiftElts = 0;
2180 Swap = true;
2181 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2182 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2183 InsertAtByte = IsLE ? 12 : 0;
2184 return true;
2185 }
2186 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2187 InsertAtByte = IsLE ? 8 : 4;
2188 return true;
2189 }
2190 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2191 InsertAtByte = IsLE ? 4 : 8;
2192 return true;
2193 }
2194 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2195 InsertAtByte = IsLE ? 0 : 12;
2196 return true;
2197 }
2198 }
2199
2200 return false;
2201}
2202
2204 bool &Swap, bool IsLE) {
2205 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2206 // Ensure each byte index of the word is consecutive.
2207 if (!isNByteElemShuffleMask(N, 4, 1))
2208 return false;
2209
2210 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2211 unsigned M0 = N->getMaskElt(0) / 4;
2212 unsigned M1 = N->getMaskElt(4) / 4;
2213 unsigned M2 = N->getMaskElt(8) / 4;
2214 unsigned M3 = N->getMaskElt(12) / 4;
2215
2216 // If both vector operands for the shuffle are the same vector, the mask will
2217 // contain only elements from the first one and the second one will be undef.
2218 if (N->getOperand(1).isUndef()) {
2219 assert(M0 < 4 && "Indexing into an undef vector?");
2220 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2221 return false;
2222
2223 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2224 Swap = false;
2225 return true;
2226 }
2227
2228 // Ensure each word index of the ShuffleVector Mask is consecutive.
2229 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2230 return false;
2231
2232 if (IsLE) {
2233 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2234 // Input vectors don't need to be swapped if the leading element
2235 // of the result is one of the 3 left elements of the second vector
2236 // (or if there is no shift to be done at all).
2237 Swap = false;
2238 ShiftElts = (8 - M0) % 8;
2239 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2240 // Input vectors need to be swapped if the leading element
2241 // of the result is one of the 3 left elements of the first vector
2242 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2243 Swap = true;
2244 ShiftElts = (4 - M0) % 4;
2245 }
2246
2247 return true;
2248 } else { // BE
2249 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2250 // Input vectors don't need to be swapped if the leading element
2251 // of the result is one of the 4 elements of the first vector.
2252 Swap = false;
2253 ShiftElts = M0;
2254 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2255 // Input vectors need to be swapped if the leading element
2256 // of the result is one of the 4 elements of the right vector.
2257 Swap = true;
2258 ShiftElts = M0 - 4;
2259 }
2260
2261 return true;
2262 }
2263}
2264
2266 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2267
2268 if (!isNByteElemShuffleMask(N, Width, -1))
2269 return false;
2270
2271 for (int i = 0; i < 16; i += Width)
2272 if (N->getMaskElt(i) != i + Width - 1)
2273 return false;
2274
2275 return true;
2276}
2277
2281
2285
2289
2293
2294/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2295/// if the inputs to the instruction should be swapped and set \p DM to the
2296/// value for the immediate.
2297/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2298/// AND element 0 of the result comes from the first input (LE) or second input
2299/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2300/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2301/// mask.
2303 bool &Swap, bool IsLE) {
2304 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2305
2306 // Ensure each byte index of the double word is consecutive.
2307 if (!isNByteElemShuffleMask(N, 8, 1))
2308 return false;
2309
2310 unsigned M0 = N->getMaskElt(0) / 8;
2311 unsigned M1 = N->getMaskElt(8) / 8;
2312 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2313
2314 // If both vector operands for the shuffle are the same vector, the mask will
2315 // contain only elements from the first one and the second one will be undef.
2316 if (N->getOperand(1).isUndef()) {
2317 if ((M0 | M1) < 2) {
2318 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2319 Swap = false;
2320 return true;
2321 } else
2322 return false;
2323 }
2324
2325 if (IsLE) {
2326 if (M0 > 1 && M1 < 2) {
2327 Swap = false;
2328 } else if (M0 < 2 && M1 > 1) {
2329 M0 = (M0 + 2) % 4;
2330 M1 = (M1 + 2) % 4;
2331 Swap = true;
2332 } else
2333 return false;
2334
2335 // Note: if control flow comes here that means Swap is already set above
2336 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2337 return true;
2338 } else { // BE
2339 if (M0 < 2 && M1 > 1) {
2340 Swap = false;
2341 } else if (M0 > 1 && M1 < 2) {
2342 M0 = (M0 + 2) % 4;
2343 M1 = (M1 + 2) % 4;
2344 Swap = true;
2345 } else
2346 return false;
2347
2348 // Note: if control flow comes here that means Swap is already set above
2349 DM = (M0 << 1) + (M1 & 1);
2350 return true;
2351 }
2352}
2353
2354
2355/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2356/// appropriate for PPC mnemonics (which have a big endian bias - namely
2357/// elements are counted from the left of the vector register).
2358unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2359 SelectionDAG &DAG) {
2361 assert(isSplatShuffleMask(SVOp, EltSize));
2362 EVT VT = SVOp->getValueType(0);
2363
2364 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2365 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2366 : SVOp->getMaskElt(0);
2367
2368 if (DAG.getDataLayout().isLittleEndian())
2369 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2370 else
2371 return SVOp->getMaskElt(0) / EltSize;
2372}
2373
2374/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2375/// by using a vspltis[bhw] instruction of the specified element size, return
2376/// the constant being splatted. The ByteSize field indicates the number of
2377/// bytes of each element [124] -> [bhw].
2379 SDValue OpVal;
2380
2381 // If ByteSize of the splat is bigger than the element size of the
2382 // build_vector, then we have a case where we are checking for a splat where
2383 // multiple elements of the buildvector are folded together into a single
2384 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2385 unsigned EltSize = 16/N->getNumOperands();
2386 if (EltSize < ByteSize) {
2387 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2388 SDValue UniquedVals[4];
2389 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2390
2391 // See if all of the elements in the buildvector agree across.
2392 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2393 if (N->getOperand(i).isUndef()) continue;
2394 // If the element isn't a constant, bail fully out.
2395 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2396
2397 if (!UniquedVals[i&(Multiple-1)].getNode())
2398 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2399 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2400 return SDValue(); // no match.
2401 }
2402
2403 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2404 // either constant or undef values that are identical for each chunk. See
2405 // if these chunks can form into a larger vspltis*.
2406
2407 // Check to see if all of the leading entries are either 0 or -1. If
2408 // neither, then this won't fit into the immediate field.
2409 bool LeadingZero = true;
2410 bool LeadingOnes = true;
2411 for (unsigned i = 0; i != Multiple-1; ++i) {
2412 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2413
2414 LeadingZero &= isNullConstant(UniquedVals[i]);
2415 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2416 }
2417 // Finally, check the least significant entry.
2418 if (LeadingZero) {
2419 if (!UniquedVals[Multiple-1].getNode())
2420 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2421 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2422 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2423 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2424 }
2425 if (LeadingOnes) {
2426 if (!UniquedVals[Multiple-1].getNode())
2427 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2428 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2429 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2430 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2431 }
2432
2433 return SDValue();
2434 }
2435
2436 // Check to see if this buildvec has a single non-undef value in its elements.
2437 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2438 if (N->getOperand(i).isUndef()) continue;
2439 if (!OpVal.getNode())
2440 OpVal = N->getOperand(i);
2441 else if (OpVal != N->getOperand(i))
2442 return SDValue();
2443 }
2444
2445 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2446
2447 unsigned ValSizeInBytes = EltSize;
2448 uint64_t Value = 0;
2449 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2450 Value = CN->getZExtValue();
2451 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2452 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2453 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2454 }
2455
2456 // If the splat value is larger than the element value, then we can never do
2457 // this splat. The only case that we could fit the replicated bits into our
2458 // immediate field for would be zero, and we prefer to use vxor for it.
2459 if (ValSizeInBytes < ByteSize) return SDValue();
2460
2461 // If the element value is larger than the splat value, check if it consists
2462 // of a repeated bit pattern of size ByteSize.
2463 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2464 return SDValue();
2465
2466 // Properly sign extend the value.
2467 int MaskVal = SignExtend32(Value, ByteSize * 8);
2468
2469 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2470 if (MaskVal == 0) return SDValue();
2471
2472 // Finally, if this value fits in a 5 bit sext field, return it
2473 if (SignExtend32<5>(MaskVal) == MaskVal)
2474 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2475 return SDValue();
2476}
2477
2478//===----------------------------------------------------------------------===//
2479// Addressing Mode Selection
2480//===----------------------------------------------------------------------===//
2481
2482/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2483/// or 64-bit immediate, and if the value can be accurately represented as a
2484/// sign extension from a 16-bit value. If so, this returns true and the
2485/// immediate.
2486bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2487 if (!isa<ConstantSDNode>(N))
2488 return false;
2489
2490 Imm = (int16_t)N->getAsZExtVal();
2491 if (N->getValueType(0) == MVT::i32)
2492 return Imm == (int32_t)N->getAsZExtVal();
2493 else
2494 return Imm == (int64_t)N->getAsZExtVal();
2495}
2497 return isIntS16Immediate(Op.getNode(), Imm);
2498}
2499
2500/// Used when computing address flags for selecting loads and stores.
2501/// If we have an OR, check if the LHS and RHS are provably disjoint.
2502/// An OR of two provably disjoint values is equivalent to an ADD.
2503/// Most PPC load/store instructions compute the effective address as a sum,
2504/// so doing this conversion is useful.
2505static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2506 if (N.getOpcode() != ISD::OR)
2507 return false;
2508 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2509 if (!LHSKnown.Zero.getBoolValue())
2510 return false;
2511 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2512 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2513}
2514
2515/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2516/// be represented as an indexed [r+r] operation.
2518 SDValue &Index,
2519 SelectionDAG &DAG) const {
2520 for (SDNode *U : N->users()) {
2521 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2522 if (Memop->getMemoryVT() == MVT::f64) {
2523 Base = N.getOperand(0);
2524 Index = N.getOperand(1);
2525 return true;
2526 }
2527 }
2528 }
2529 return false;
2530}
2531
2532/// isIntS34Immediate - This method tests if value of node given can be
2533/// accurately represented as a sign extension from a 34-bit value. If so,
2534/// this returns true and the immediate.
2535bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2536 if (!isa<ConstantSDNode>(N))
2537 return false;
2538
2539 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2540 return isInt<34>(Imm);
2541}
2543 return isIntS34Immediate(Op.getNode(), Imm);
2544}
2545
2546/// SelectAddressRegReg - Given the specified addressed, check to see if it
2547/// can be represented as an indexed [r+r] operation. Returns false if it
2548/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2549/// non-zero and N can be represented by a base register plus a signed 16-bit
2550/// displacement, make a more precise judgement by checking (displacement % \p
2551/// EncodingAlignment).
2553 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2554 MaybeAlign EncodingAlignment) const {
2555 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2556 // a [pc+imm].
2558 return false;
2559
2560 int16_t Imm = 0;
2561 if (N.getOpcode() == ISD::ADD) {
2562 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2563 // SPE load/store can only handle 8-bit offsets.
2564 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2565 return true;
2566 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2567 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2568 return false; // r+i
2569 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2570 return false; // r+i
2571
2572 Base = N.getOperand(0);
2573 Index = N.getOperand(1);
2574 return true;
2575 } else if (N.getOpcode() == ISD::OR) {
2576 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2577 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2578 return false; // r+i can fold it if we can.
2579
2580 // If this is an or of disjoint bitfields, we can codegen this as an add
2581 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2582 // disjoint.
2583 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2584
2585 if (LHSKnown.Zero.getBoolValue()) {
2586 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2587 // If all of the bits are known zero on the LHS or RHS, the add won't
2588 // carry.
2589 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2590 Base = N.getOperand(0);
2591 Index = N.getOperand(1);
2592 return true;
2593 }
2594 }
2595 }
2596
2597 return false;
2598}
2599
2600// If we happen to be doing an i64 load or store into a stack slot that has
2601// less than a 4-byte alignment, then the frame-index elimination may need to
2602// use an indexed load or store instruction (because the offset may not be a
2603// multiple of 4). The extra register needed to hold the offset comes from the
2604// register scavenger, and it is possible that the scavenger will need to use
2605// an emergency spill slot. As a result, we need to make sure that a spill slot
2606// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2607// stack slot.
2608static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2609 // FIXME: This does not handle the LWA case.
2610 if (VT != MVT::i64)
2611 return;
2612
2613 // NOTE: We'll exclude negative FIs here, which come from argument
2614 // lowering, because there are no known test cases triggering this problem
2615 // using packed structures (or similar). We can remove this exclusion if
2616 // we find such a test case. The reason why this is so test-case driven is
2617 // because this entire 'fixup' is only to prevent crashes (from the
2618 // register scavenger) on not-really-valid inputs. For example, if we have:
2619 // %a = alloca i1
2620 // %b = bitcast i1* %a to i64*
2621 // store i64* a, i64 b
2622 // then the store should really be marked as 'align 1', but is not. If it
2623 // were marked as 'align 1' then the indexed form would have been
2624 // instruction-selected initially, and the problem this 'fixup' is preventing
2625 // won't happen regardless.
2626 if (FrameIdx < 0)
2627 return;
2628
2630 MachineFrameInfo &MFI = MF.getFrameInfo();
2631
2632 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2633 return;
2634
2635 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2636 FuncInfo->setHasNonRISpills();
2637}
2638
2639/// Returns true if the address N can be represented by a base register plus
2640/// a signed 16-bit displacement [r+imm], and if it is not better
2641/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2642/// displacements that are multiples of that value.
2644 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2645 MaybeAlign EncodingAlignment) const {
2646 // FIXME dl should come from parent load or store, not from address
2647 SDLoc dl(N);
2648
2649 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2650 // a [pc+imm].
2652 return false;
2653
2654 // If this can be more profitably realized as r+r, fail.
2655 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2656 return false;
2657
2658 if (N.getOpcode() == ISD::ADD) {
2659 int16_t imm = 0;
2660 if (isIntS16Immediate(N.getOperand(1), imm) &&
2661 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2662 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2663 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2664 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2665 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2666 } else {
2667 Base = N.getOperand(0);
2668 }
2669 return true; // [r+i]
2670 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2671 // Match LOAD (ADD (X, Lo(G))).
2672 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2673 "Cannot handle constant offsets yet!");
2674 Disp = N.getOperand(1).getOperand(0); // The global address.
2679 Base = N.getOperand(0);
2680 return true; // [&g+r]
2681 }
2682 } else if (N.getOpcode() == ISD::OR) {
2683 int16_t imm = 0;
2684 if (isIntS16Immediate(N.getOperand(1), imm) &&
2685 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2686 // If this is an or of disjoint bitfields, we can codegen this as an add
2687 // (for better address arithmetic) if the LHS and RHS of the OR are
2688 // provably disjoint.
2689 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2690
2691 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2692 // If all of the bits are known zero on the LHS or RHS, the add won't
2693 // carry.
2694 if (FrameIndexSDNode *FI =
2695 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2696 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2697 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2698 } else {
2699 Base = N.getOperand(0);
2700 }
2701 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2702 return true;
2703 }
2704 }
2705 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2706 // Loading from a constant address.
2707
2708 // If this address fits entirely in a 16-bit sext immediate field, codegen
2709 // this as "d, 0"
2710 int16_t Imm;
2711 if (isIntS16Immediate(CN, Imm) &&
2712 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2713 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2714 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2715 CN->getValueType(0));
2716 return true;
2717 }
2718
2719 // Handle 32-bit sext immediates with LIS + addr mode.
2720 if ((CN->getValueType(0) == MVT::i32 ||
2721 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2722 (!EncodingAlignment ||
2723 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2724 int Addr = (int)CN->getZExtValue();
2725
2726 // Otherwise, break this down into an LIS + disp.
2727 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2728
2729 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2730 MVT::i32);
2731 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2732 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2733 return true;
2734 }
2735 }
2736
2737 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2739 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2740 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2741 } else
2742 Base = N;
2743 return true; // [r+0]
2744}
2745
2746/// Similar to the 16-bit case but for instructions that take a 34-bit
2747/// displacement field (prefixed loads/stores).
2749 SDValue &Base,
2750 SelectionDAG &DAG) const {
2751 // Only on 64-bit targets.
2752 if (N.getValueType() != MVT::i64)
2753 return false;
2754
2755 SDLoc dl(N);
2756 int64_t Imm = 0;
2757
2758 if (N.getOpcode() == ISD::ADD) {
2759 if (!isIntS34Immediate(N.getOperand(1), Imm))
2760 return false;
2761 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2762 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2763 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2764 else
2765 Base = N.getOperand(0);
2766 return true;
2767 }
2768
2769 if (N.getOpcode() == ISD::OR) {
2770 if (!isIntS34Immediate(N.getOperand(1), Imm))
2771 return false;
2772 // If this is an or of disjoint bitfields, we can codegen this as an add
2773 // (for better address arithmetic) if the LHS and RHS of the OR are
2774 // provably disjoint.
2775 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2776 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2777 return false;
2778 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2779 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2780 else
2781 Base = N.getOperand(0);
2782 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2783 return true;
2784 }
2785
2786 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2787 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2788 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2789 return true;
2790 }
2791
2792 return false;
2793}
2794
2795/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2796/// represented as an indexed [r+r] operation.
2798 SDValue &Index,
2799 SelectionDAG &DAG) const {
2800 // Check to see if we can easily represent this as an [r+r] address. This
2801 // will fail if it thinks that the address is more profitably represented as
2802 // reg+imm, e.g. where imm = 0.
2803 if (SelectAddressRegReg(N, Base, Index, DAG))
2804 return true;
2805
2806 // If the address is the result of an add, we will utilize the fact that the
2807 // address calculation includes an implicit add. However, we can reduce
2808 // register pressure if we do not materialize a constant just for use as the
2809 // index register. We only get rid of the add if it is not an add of a
2810 // value and a 16-bit signed constant and both have a single use.
2811 int16_t imm = 0;
2812 if (N.getOpcode() == ISD::ADD &&
2813 (!isIntS16Immediate(N.getOperand(1), imm) ||
2814 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2815 Base = N.getOperand(0);
2816 Index = N.getOperand(1);
2817 return true;
2818 }
2819
2820 // Otherwise, do it the hard way, using R0 as the base register.
2821 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2822 N.getValueType());
2823 Index = N;
2824 return true;
2825}
2826
2827template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2828 Ty *PCRelCand = dyn_cast<Ty>(N);
2829 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2830}
2831
2832/// Returns true if this address is a PC Relative address.
2833/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2834/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2836 // This is a materialize PC Relative node. Always select this as PC Relative.
2837 Base = N;
2838 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2839 return true;
2844 return true;
2845 return false;
2846}
2847
2848/// Returns true if we should use a direct load into vector instruction
2849/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2850static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2851
2852 // If there are any other uses other than scalar to vector, then we should
2853 // keep it as a scalar load -> direct move pattern to prevent multiple
2854 // loads.
2856 if (!LD)
2857 return false;
2858
2859 EVT MemVT = LD->getMemoryVT();
2860 if (!MemVT.isSimple())
2861 return false;
2862 switch(MemVT.getSimpleVT().SimpleTy) {
2863 case MVT::i64:
2864 break;
2865 case MVT::i32:
2866 if (!ST.hasP8Vector())
2867 return false;
2868 break;
2869 case MVT::i16:
2870 case MVT::i8:
2871 if (!ST.hasP9Vector())
2872 return false;
2873 break;
2874 default:
2875 return false;
2876 }
2877
2878 SDValue LoadedVal(N, 0);
2879 if (!LoadedVal.hasOneUse())
2880 return false;
2881
2882 for (SDUse &Use : LD->uses())
2883 if (Use.getResNo() == 0 &&
2884 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2885 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2886 return false;
2887
2888 return true;
2889}
2890
2891/// getPreIndexedAddressParts - returns true by value, base pointer and
2892/// offset pointer and addressing mode by reference if the node's address
2893/// can be legally represented as pre-indexed load / store address.
2895 SDValue &Offset,
2897 SelectionDAG &DAG) const {
2898 if (DisablePPCPreinc) return false;
2899
2900 bool isLoad = true;
2901 SDValue Ptr;
2902 EVT VT;
2903 Align Alignment;
2904 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2905 Ptr = LD->getBasePtr();
2906 VT = LD->getMemoryVT();
2907 Alignment = LD->getAlign();
2908 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2909 Ptr = ST->getBasePtr();
2910 VT = ST->getMemoryVT();
2911 Alignment = ST->getAlign();
2912 isLoad = false;
2913 } else
2914 return false;
2915
2916 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2917 // instructions because we can fold these into a more efficient instruction
2918 // instead, (such as LXSD).
2919 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2920 return false;
2921 }
2922
2923 // PowerPC doesn't have preinc load/store instructions for vectors
2924 if (VT.isVector())
2925 return false;
2926
2927 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2928 // Common code will reject creating a pre-inc form if the base pointer
2929 // is a frame index, or if N is a store and the base pointer is either
2930 // the same as or a predecessor of the value being stored. Check for
2931 // those situations here, and try with swapped Base/Offset instead.
2932 bool Swap = false;
2933
2935 Swap = true;
2936 else if (!isLoad) {
2937 SDValue Val = cast<StoreSDNode>(N)->getValue();
2938 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2939 Swap = true;
2940 }
2941
2942 if (Swap)
2944
2945 AM = ISD::PRE_INC;
2946 return true;
2947 }
2948
2949 // LDU/STU can only handle immediates that are a multiple of 4.
2950 if (VT != MVT::i64) {
2951 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2952 return false;
2953 } else {
2954 // LDU/STU need an address with at least 4-byte alignment.
2955 if (Alignment < Align(4))
2956 return false;
2957
2958 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2959 return false;
2960 }
2961
2962 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2963 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2964 // sext i32 to i64 when addr mode is r+i.
2965 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2966 LD->getExtensionType() == ISD::SEXTLOAD &&
2968 return false;
2969 }
2970
2971 AM = ISD::PRE_INC;
2972 return true;
2973}
2974
2975//===----------------------------------------------------------------------===//
2976// LowerOperation implementation
2977//===----------------------------------------------------------------------===//
2978
2979/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2980/// and LoOpFlags to the target MO flags.
2981static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2982 unsigned &HiOpFlags, unsigned &LoOpFlags,
2983 const GlobalValue *GV = nullptr) {
2984 HiOpFlags = PPCII::MO_HA;
2985 LoOpFlags = PPCII::MO_LO;
2986
2987 // Don't use the pic base if not in PIC relocation model.
2988 if (IsPIC) {
2989 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
2990 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
2991 }
2992}
2993
2994static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2995 SelectionDAG &DAG) {
2996 SDLoc DL(HiPart);
2997 EVT PtrVT = HiPart.getValueType();
2998 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2999
3000 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3001 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3002
3003 // With PIC, the first instruction is actually "GR+hi(&G)".
3004 if (isPIC)
3005 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3006 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3007
3008 // Generate non-pic code that has direct accesses to the constant pool.
3009 // The address of the global is just (hi(&g)+lo(&g)).
3010 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3011}
3012
3014 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3015 FuncInfo->setUsesTOCBasePtr();
3016}
3017
3021
3022SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3023 SDValue GA) const {
3024 EVT VT = Subtarget.getScalarIntVT();
3025 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3026 : Subtarget.isAIXABI()
3027 ? DAG.getRegister(PPC::R2, VT)
3028 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3029 SDValue Ops[] = { GA, Reg };
3030 return DAG.getMemIntrinsicNode(
3031 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3034}
3035
3036SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3037 SelectionDAG &DAG) const {
3038 EVT PtrVT = Op.getValueType();
3039 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3040 const Constant *C = CP->getConstVal();
3041
3042 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3043 // The actual address of the GlobalValue is stored in the TOC.
3044 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3045 if (Subtarget.isUsingPCRelativeCalls()) {
3046 SDLoc DL(CP);
3047 EVT Ty = getPointerTy(DAG.getDataLayout());
3048 SDValue ConstPool = DAG.getTargetConstantPool(
3049 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3050 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3051 }
3052 setUsesTOCBasePtr(DAG);
3053 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3054 return getTOCEntry(DAG, SDLoc(CP), GA);
3055 }
3056
3057 unsigned MOHiFlag, MOLoFlag;
3058 bool IsPIC = isPositionIndependent();
3059 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3060
3061 if (IsPIC && Subtarget.isSVR4ABI()) {
3062 SDValue GA =
3063 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3064 return getTOCEntry(DAG, SDLoc(CP), GA);
3065 }
3066
3067 SDValue CPIHi =
3068 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3069 SDValue CPILo =
3070 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3071 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3072}
3073
3074// For 64-bit PowerPC, prefer the more compact relative encodings.
3075// This trades 32 bits per jump table entry for one or two instructions
3076// on the jump site.
3083
3086 return false;
3087 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3088 return true;
3090}
3091
3093 SelectionDAG &DAG) const {
3094 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3096
3097 switch (getTargetMachine().getCodeModel()) {
3098 case CodeModel::Small:
3099 case CodeModel::Medium:
3101 default:
3102 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3104 }
3105}
3106
3107const MCExpr *
3109 unsigned JTI,
3110 MCContext &Ctx) const {
3111 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3113
3114 switch (getTargetMachine().getCodeModel()) {
3115 case CodeModel::Small:
3116 case CodeModel::Medium:
3118 default:
3119 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3120 }
3121}
3122
3123SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3124 EVT PtrVT = Op.getValueType();
3126
3127 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3128 if (Subtarget.isUsingPCRelativeCalls()) {
3129 SDLoc DL(JT);
3130 EVT Ty = getPointerTy(DAG.getDataLayout());
3131 SDValue GA =
3132 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3133 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3134 return MatAddr;
3135 }
3136
3137 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3138 // The actual address of the GlobalValue is stored in the TOC.
3139 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3140 setUsesTOCBasePtr(DAG);
3141 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3142 return getTOCEntry(DAG, SDLoc(JT), GA);
3143 }
3144
3145 unsigned MOHiFlag, MOLoFlag;
3146 bool IsPIC = isPositionIndependent();
3147 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3148
3149 if (IsPIC && Subtarget.isSVR4ABI()) {
3150 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3152 return getTOCEntry(DAG, SDLoc(GA), GA);
3153 }
3154
3155 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3156 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3157 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3158}
3159
3160SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3161 SelectionDAG &DAG) const {
3162 EVT PtrVT = Op.getValueType();
3163 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3164 const BlockAddress *BA = BASDN->getBlockAddress();
3165
3166 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3167 if (Subtarget.isUsingPCRelativeCalls()) {
3168 SDLoc DL(BASDN);
3169 EVT Ty = getPointerTy(DAG.getDataLayout());
3170 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3172 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3173 return MatAddr;
3174 }
3175
3176 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3177 // The actual BlockAddress is stored in the TOC.
3178 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3179 setUsesTOCBasePtr(DAG);
3180 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3181 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3182 }
3183
3184 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3185 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3186 return getTOCEntry(
3187 DAG, SDLoc(BASDN),
3188 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3189
3190 unsigned MOHiFlag, MOLoFlag;
3191 bool IsPIC = isPositionIndependent();
3192 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3193 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3194 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3195 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3196}
3197
3198SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3199 SelectionDAG &DAG) const {
3200 if (Subtarget.isAIXABI())
3201 return LowerGlobalTLSAddressAIX(Op, DAG);
3202
3203 return LowerGlobalTLSAddressLinux(Op, DAG);
3204}
3205
3206/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3207/// and then apply the update.
3209 SelectionDAG &DAG,
3210 const TargetMachine &TM) {
3211 // Initialize TLS model opt setting lazily:
3212 // (1) Use initial-exec for single TLS var references within current function.
3213 // (2) Use local-dynamic for multiple TLS var references within current
3214 // function.
3215 PPCFunctionInfo *FuncInfo =
3217 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3219 // Iterate over all instructions within current function, collect all TLS
3220 // global variables (global variables taken as the first parameter to
3221 // Intrinsic::threadlocal_address).
3222 const Function &Func = DAG.getMachineFunction().getFunction();
3223 for (const BasicBlock &BB : Func)
3224 for (const Instruction &I : BB)
3225 if (I.getOpcode() == Instruction::Call)
3226 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3227 if (Function *CF = CI->getCalledFunction())
3228 if (CF->isDeclaration() &&
3229 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3230 if (const GlobalValue *GV =
3231 dyn_cast<GlobalValue>(I.getOperand(0))) {
3232 TLSModel::Model GVModel = TM.getTLSModel(GV);
3233 if (GVModel == TLSModel::LocalDynamic)
3234 TLSGV.insert(GV);
3235 }
3236
3237 unsigned TLSGVCnt = TLSGV.size();
3238 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3239 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3240 FuncInfo->setAIXFuncUseTLSIEForLD();
3242 }
3243
3244 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3245 LLVM_DEBUG(
3246 dbgs() << DAG.getMachineFunction().getName()
3247 << " function is using the TLS-IE model for TLS-LD access.\n");
3248 Model = TLSModel::InitialExec;
3249 }
3250}
3251
3252SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3253 SelectionDAG &DAG) const {
3254 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3255
3256 if (DAG.getTarget().useEmulatedTLS())
3257 report_fatal_error("Emulated TLS is not yet supported on AIX");
3258
3259 SDLoc dl(GA);
3260 const GlobalValue *GV = GA->getGlobal();
3261 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3262 bool Is64Bit = Subtarget.isPPC64();
3264
3265 // Apply update to the TLS model.
3266 if (Subtarget.hasAIXShLibTLSModelOpt())
3268
3269 // TLS variables are accessed through TOC entries.
3270 // To support this, set the DAG to use the TOC base pointer.
3271 setUsesTOCBasePtr(DAG);
3272
3273 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3274
3275 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3276 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3277 bool HasAIXSmallTLSGlobalAttr = false;
3278 SDValue VariableOffsetTGA =
3279 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3280 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3281 SDValue TLSReg;
3282
3283 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3284 if (GVar->hasAttribute("aix-small-tls"))
3285 HasAIXSmallTLSGlobalAttr = true;
3286
3287 if (Is64Bit) {
3288 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3289 // involves a load of the variable offset (from the TOC), followed by an
3290 // add of the loaded variable offset to R13 (the thread pointer).
3291 // This code sequence looks like:
3292 // ld reg1,var[TC](2)
3293 // add reg2, reg1, r13 // r13 contains the thread pointer
3294 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3295
3296 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3297 // global variable attribute, produce a faster access sequence for
3298 // local-exec TLS variables where the offset from the TLS base is encoded
3299 // as an immediate operand.
3300 //
3301 // We only utilize the faster local-exec access sequence when the TLS
3302 // variable has a size within the policy limit. We treat types that are
3303 // not sized or are empty as being over the policy size limit.
3304 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3305 IsTLSLocalExecModel) {
3306 Type *GVType = GV->getValueType();
3307 if (GVType->isSized() && !GVType->isEmptyTy() &&
3308 GV->getDataLayout().getTypeAllocSize(GVType) <=
3310 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3311 }
3312 } else {
3313 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3314 // involves loading the variable offset from the TOC, generating a call to
3315 // .__get_tpointer to get the thread pointer (which will be in R3), and
3316 // adding the two together:
3317 // lwz reg1,var[TC](2)
3318 // bla .__get_tpointer
3319 // add reg2, reg1, r3
3320 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3321
3322 // We do not implement the 32-bit version of the faster access sequence
3323 // for local-exec that is controlled by the -maix-small-local-exec-tls
3324 // option, or the "aix-small-tls" global variable attribute.
3325 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3326 report_fatal_error("The small-local-exec TLS access sequence is "
3327 "currently only supported on AIX (64-bit mode).");
3328 }
3329 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3330 }
3331
3332 if (Model == TLSModel::LocalDynamic) {
3333 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3334
3335 // We do not implement the 32-bit version of the faster access sequence
3336 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3337 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3338 report_fatal_error("The small-local-dynamic TLS access sequence is "
3339 "currently only supported on AIX (64-bit mode).");
3340
3341 // For local-dynamic on AIX, we need to generate one TOC entry for each
3342 // variable offset, and a single module-handle TOC entry for the entire
3343 // file.
3344
3345 SDValue VariableOffsetTGA =
3346 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3347 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3348
3350 GlobalVariable *TLSGV =
3351 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3352 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3354 assert(TLSGV && "Not able to create GV for _$TLSML.");
3355 SDValue ModuleHandleTGA =
3356 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3357 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3358 SDValue ModuleHandle =
3359 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3360
3361 // With the -maix-small-local-dynamic-tls option, produce a faster access
3362 // sequence for local-dynamic TLS variables where the offset from the
3363 // module-handle is encoded as an immediate operand.
3364 //
3365 // We only utilize the faster local-dynamic access sequence when the TLS
3366 // variable has a size within the policy limit. We treat types that are
3367 // not sized or are empty as being over the policy size limit.
3368 if (HasAIXSmallLocalDynamicTLS) {
3369 Type *GVType = GV->getValueType();
3370 if (GVType->isSized() && !GVType->isEmptyTy() &&
3371 GV->getDataLayout().getTypeAllocSize(GVType) <=
3373 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3374 ModuleHandle);
3375 }
3376
3377 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3378 }
3379
3380 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3381 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3382 // need to generate two TOC entries, one for the variable offset, one for the
3383 // region handle. The global address for the TOC entry of the region handle is
3384 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3385 // entry of the variable offset is created with MO_TLSGD_FLAG.
3386 SDValue VariableOffsetTGA =
3387 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3388 SDValue RegionHandleTGA =
3389 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3390 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3391 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3392 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3393 RegionHandle);
3394}
3395
3396SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3397 SelectionDAG &DAG) const {
3398 // FIXME: TLS addresses currently use medium model code sequences,
3399 // which is the most useful form. Eventually support for small and
3400 // large models could be added if users need it, at the cost of
3401 // additional complexity.
3402 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3403 if (DAG.getTarget().useEmulatedTLS())
3404 return LowerToTLSEmulatedModel(GA, DAG);
3405
3406 SDLoc dl(GA);
3407 const GlobalValue *GV = GA->getGlobal();
3408 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3409 bool is64bit = Subtarget.isPPC64();
3410 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3411 PICLevel::Level picLevel = M->getPICLevel();
3412
3413 const TargetMachine &TM = getTargetMachine();
3414 TLSModel::Model Model = TM.getTLSModel(GV);
3415
3416 if (Model == TLSModel::LocalExec) {
3417 if (Subtarget.isUsingPCRelativeCalls()) {
3418 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3419 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3421 SDValue MatAddr =
3422 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3423 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3424 }
3425
3426 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3428 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3430 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3431 : DAG.getRegister(PPC::R2, MVT::i32);
3432
3433 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3434 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3435 }
3436
3437 if (Model == TLSModel::InitialExec) {
3438 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3440 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3441 SDValue TGATLS = DAG.getTargetGlobalAddress(
3442 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3443 SDValue TPOffset;
3444 if (IsPCRel) {
3445 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3446 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3447 MachinePointerInfo());
3448 } else {
3449 SDValue GOTPtr;
3450 if (is64bit) {
3451 setUsesTOCBasePtr(DAG);
3452 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3453 GOTPtr =
3454 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3455 } else {
3456 if (!TM.isPositionIndependent())
3457 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3458 else if (picLevel == PICLevel::SmallPIC)
3459 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3460 else
3461 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3462 }
3463 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3464 }
3465 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3466 }
3467
3468 if (Model == TLSModel::GeneralDynamic) {
3469 if (Subtarget.isUsingPCRelativeCalls()) {
3470 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3472 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3473 }
3474
3475 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3476 SDValue GOTPtr;
3477 if (is64bit) {
3478 setUsesTOCBasePtr(DAG);
3479 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3480 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3481 GOTReg, TGA);
3482 } else {
3483 if (picLevel == PICLevel::SmallPIC)
3484 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3485 else
3486 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3487 }
3488 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3489 GOTPtr, TGA, TGA);
3490 }
3491
3492 if (Model == TLSModel::LocalDynamic) {
3493 if (Subtarget.isUsingPCRelativeCalls()) {
3494 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3496 SDValue MatPCRel =
3497 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3498 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3499 }
3500
3501 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3502 SDValue GOTPtr;
3503 if (is64bit) {
3504 setUsesTOCBasePtr(DAG);
3505 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3506 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3507 GOTReg, TGA);
3508 } else {
3509 if (picLevel == PICLevel::SmallPIC)
3510 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3511 else
3512 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3513 }
3514 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3515 PtrVT, GOTPtr, TGA, TGA);
3516 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3517 PtrVT, TLSAddr, TGA);
3518 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3519 }
3520
3521 llvm_unreachable("Unknown TLS model!");
3522}
3523
3524SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3525 SelectionDAG &DAG) const {
3526 EVT PtrVT = Op.getValueType();
3527 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3528 SDLoc DL(GSDN);
3529 const GlobalValue *GV = GSDN->getGlobal();
3530
3531 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3532 // The actual address of the GlobalValue is stored in the TOC.
3533 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3534 if (Subtarget.isUsingPCRelativeCalls()) {
3535 EVT Ty = getPointerTy(DAG.getDataLayout());
3537 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3539 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3540 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3541 MachinePointerInfo());
3542 return Load;
3543 } else {
3544 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3546 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3547 }
3548 }
3549 setUsesTOCBasePtr(DAG);
3550 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3551 return getTOCEntry(DAG, DL, GA);
3552 }
3553
3554 unsigned MOHiFlag, MOLoFlag;
3555 bool IsPIC = isPositionIndependent();
3556 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3557
3558 if (IsPIC && Subtarget.isSVR4ABI()) {
3559 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3560 GSDN->getOffset(),
3562 return getTOCEntry(DAG, DL, GA);
3563 }
3564
3565 SDValue GAHi =
3566 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3567 SDValue GALo =
3568 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3569
3570 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3571}
3572
3573SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3574 bool IsStrict = Op->isStrictFPOpcode();
3575 ISD::CondCode CC =
3576 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3577 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3578 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3579 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3580 EVT LHSVT = LHS.getValueType();
3581 SDLoc dl(Op);
3582
3583 // Soften the setcc with libcall if it is fp128.
3584 if (LHSVT == MVT::f128) {
3585 assert(!Subtarget.hasP9Vector() &&
3586 "SETCC for f128 is already legal under Power9!");
3587 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3588 Op->getOpcode() == ISD::STRICT_FSETCCS);
3589 if (RHS.getNode())
3590 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3591 DAG.getCondCode(CC));
3592 if (IsStrict)
3593 return DAG.getMergeValues({LHS, Chain}, dl);
3594 return LHS;
3595 }
3596
3597 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3598
3599 if (Op.getValueType() == MVT::v2i64) {
3600 // When the operands themselves are v2i64 values, we need to do something
3601 // special because VSX has no underlying comparison operations for these.
3602 if (LHS.getValueType() == MVT::v2i64) {
3603 // Equality can be handled by casting to the legal type for Altivec
3604 // comparisons, everything else needs to be expanded.
3605 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3606 return SDValue();
3607 SDValue SetCC32 = DAG.getSetCC(
3608 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3609 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3610 int ShuffV[] = {1, 0, 3, 2};
3611 SDValue Shuff =
3612 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3613 return DAG.getBitcast(MVT::v2i64,
3614 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3615 dl, MVT::v4i32, Shuff, SetCC32));
3616 }
3617
3618 // We handle most of these in the usual way.
3619 return Op;
3620 }
3621
3622 // If we're comparing for equality to zero, expose the fact that this is
3623 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3624 // fold the new nodes.
3625 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3626 return V;
3627
3628 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3629 // Leave comparisons against 0 and -1 alone for now, since they're usually
3630 // optimized. FIXME: revisit this when we can custom lower all setcc
3631 // optimizations.
3632 if (C->isAllOnes() || C->isZero())
3633 return SDValue();
3634 }
3635
3636 // If we have an integer seteq/setne, turn it into a compare against zero
3637 // by xor'ing the rhs with the lhs, which is faster than setting a
3638 // condition register, reading it back out, and masking the correct bit. The
3639 // normal approach here uses sub to do this instead of xor. Using xor exposes
3640 // the result to other bit-twiddling opportunities.
3641 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3642 EVT VT = Op.getValueType();
3643 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3644 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3645 }
3646 return SDValue();
3647}
3648
3649SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3650 SDNode *Node = Op.getNode();
3651 EVT VT = Node->getValueType(0);
3652 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3653 SDValue InChain = Node->getOperand(0);
3654 SDValue VAListPtr = Node->getOperand(1);
3655 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3656 SDLoc dl(Node);
3657
3658 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3659
3660 // gpr_index
3661 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3662 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3663 InChain = GprIndex.getValue(1);
3664
3665 if (VT == MVT::i64) {
3666 // Check if GprIndex is even
3667 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3668 DAG.getConstant(1, dl, MVT::i32));
3669 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3670 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3671 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3672 DAG.getConstant(1, dl, MVT::i32));
3673 // Align GprIndex to be even if it isn't
3674 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3675 GprIndex);
3676 }
3677
3678 // fpr index is 1 byte after gpr
3679 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3680 DAG.getConstant(1, dl, MVT::i32));
3681
3682 // fpr
3683 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3684 FprPtr, MachinePointerInfo(SV), MVT::i8);
3685 InChain = FprIndex.getValue(1);
3686
3687 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3688 DAG.getConstant(8, dl, MVT::i32));
3689
3690 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3691 DAG.getConstant(4, dl, MVT::i32));
3692
3693 // areas
3694 SDValue OverflowArea =
3695 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3696 InChain = OverflowArea.getValue(1);
3697
3698 SDValue RegSaveArea =
3699 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3700 InChain = RegSaveArea.getValue(1);
3701
3702 // select overflow_area if index > 8
3703 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3704 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3705
3706 // adjustment constant gpr_index * 4/8
3707 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3708 VT.isInteger() ? GprIndex : FprIndex,
3709 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3710 MVT::i32));
3711
3712 // OurReg = RegSaveArea + RegConstant
3713 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3714 RegConstant);
3715
3716 // Floating types are 32 bytes into RegSaveArea
3717 if (VT.isFloatingPoint())
3718 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3719 DAG.getConstant(32, dl, MVT::i32));
3720
3721 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3722 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3723 VT.isInteger() ? GprIndex : FprIndex,
3724 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3725 MVT::i32));
3726
3727 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3728 VT.isInteger() ? VAListPtr : FprPtr,
3729 MachinePointerInfo(SV), MVT::i8);
3730
3731 // determine if we should load from reg_save_area or overflow_area
3732 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3733
3734 // increase overflow_area by 4/8 if gpr/fpr > 8
3735 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3736 DAG.getConstant(VT.isInteger() ? 4 : 8,
3737 dl, MVT::i32));
3738
3739 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3740 OverflowAreaPlusN);
3741
3742 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3743 MachinePointerInfo(), MVT::i32);
3744
3745 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3746}
3747
3748SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3749 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3750
3751 // We have to copy the entire va_list struct:
3752 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3753 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3754 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3755 false, true, /*CI=*/nullptr, std::nullopt,
3756 MachinePointerInfo(), MachinePointerInfo());
3757}
3758
3759SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3760 SelectionDAG &DAG) const {
3761 return Op.getOperand(0);
3762}
3763
3764SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3765 MachineFunction &MF = DAG.getMachineFunction();
3766 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3767
3768 assert((Op.getOpcode() == ISD::INLINEASM ||
3769 Op.getOpcode() == ISD::INLINEASM_BR) &&
3770 "Expecting Inline ASM node.");
3771
3772 // If an LR store is already known to be required then there is not point in
3773 // checking this ASM as well.
3774 if (MFI.isLRStoreRequired())
3775 return Op;
3776
3777 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3778 // type MVT::Glue. We want to ignore this last operand if that is the case.
3779 unsigned NumOps = Op.getNumOperands();
3780 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3781 --NumOps;
3782
3783 // Check all operands that may contain the LR.
3784 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3785 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3786 unsigned NumVals = Flags.getNumOperandRegisters();
3787 ++i; // Skip the ID value.
3788
3789 switch (Flags.getKind()) {
3790 default:
3791 llvm_unreachable("Bad flags!");
3795 i += NumVals;
3796 break;
3800 for (; NumVals; --NumVals, ++i) {
3801 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3802 if (Reg != PPC::LR && Reg != PPC::LR8)
3803 continue;
3804 MFI.setLRStoreRequired();
3805 return Op;
3806 }
3807 break;
3808 }
3809 }
3810 }
3811
3812 return Op;
3813}
3814
3815SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3816 SelectionDAG &DAG) const {
3817 SDValue Chain = Op.getOperand(0);
3818 SDValue Trmp = Op.getOperand(1); // trampoline
3819 SDValue FPtr = Op.getOperand(2); // nested function
3820 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3821 SDLoc dl(Op);
3822
3823 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3824
3825 if (Subtarget.isAIXABI()) {
3826 // On AIX we create a trampoline descriptor by combining the
3827 // entry point and TOC from the global descriptor (FPtr) with the
3828 // nest argument as the environment pointer.
3829 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3830 MaybeAlign PointerAlign(PointerSize);
3831 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3834 : MachineMemOperand::MONone;
3835
3836 uint64_t TOCPointerOffset = 1 * PointerSize;
3837 uint64_t EnvPointerOffset = 2 * PointerSize;
3838 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3839 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3840
3841 const Value *TrampolineAddr =
3842 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3843 const Function *Func =
3844 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3845
3846 SDValue OutChains[3];
3847
3848 // Copy the entry point address from the global descriptor to the
3849 // trampoline buffer.
3850 SDValue LoadEntryPoint =
3851 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3852 PointerAlign, MMOFlags);
3853 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3854 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3855 MachinePointerInfo(TrampolineAddr, 0));
3856
3857 // Copy the TOC pointer from the global descriptor to the trampoline
3858 // buffer.
3859 SDValue TOCFromDescriptorPtr =
3860 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3861 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3862 MachinePointerInfo(Func, TOCPointerOffset),
3863 PointerAlign, MMOFlags);
3864 SDValue TrampolineTOCPointer =
3865 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3866 SDValue TOCLoadChain = TOCReg.getValue(1);
3867 OutChains[1] =
3868 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3869 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3870
3871 // Store the nest argument into the environment pointer in the trampoline
3872 // buffer.
3873 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3874 OutChains[2] =
3875 DAG.getStore(Chain, dl, Nest, EnvPointer,
3876 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3877
3879 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3880 return TokenFactor;
3881 }
3882
3883 bool isPPC64 = (PtrVT == MVT::i64);
3884 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3885
3887 Args.emplace_back(Trmp, IntPtrTy);
3888 // TrampSize == (isPPC64 ? 48 : 40);
3889 Args.emplace_back(
3890 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3891 IntPtrTy);
3892 Args.emplace_back(FPtr, IntPtrTy);
3893 Args.emplace_back(Nest, IntPtrTy);
3894
3895 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3896 TargetLowering::CallLoweringInfo CLI(DAG);
3897 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3899 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3900
3901 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3902 return CallResult.second;
3903}
3904
3905SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3906 MachineFunction &MF = DAG.getMachineFunction();
3907 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3908 EVT PtrVT = getPointerTy(MF.getDataLayout());
3909
3910 SDLoc dl(Op);
3911
3912 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3913 // vastart just stores the address of the VarArgsFrameIndex slot into the
3914 // memory location argument.
3915 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3916 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3917 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3918 MachinePointerInfo(SV));
3919 }
3920
3921 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3922 // We suppose the given va_list is already allocated.
3923 //
3924 // typedef struct {
3925 // char gpr; /* index into the array of 8 GPRs
3926 // * stored in the register save area
3927 // * gpr=0 corresponds to r3,
3928 // * gpr=1 to r4, etc.
3929 // */
3930 // char fpr; /* index into the array of 8 FPRs
3931 // * stored in the register save area
3932 // * fpr=0 corresponds to f1,
3933 // * fpr=1 to f2, etc.
3934 // */
3935 // char *overflow_arg_area;
3936 // /* location on stack that holds
3937 // * the next overflow argument
3938 // */
3939 // char *reg_save_area;
3940 // /* where r3:r10 and f1:f8 (if saved)
3941 // * are stored
3942 // */
3943 // } va_list[1];
3944
3945 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3946 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3947 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3948 PtrVT);
3949 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3950 PtrVT);
3951
3952 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3953 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3954
3955 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3956 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3957
3958 uint64_t FPROffset = 1;
3959 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3960
3961 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3962
3963 // Store first byte : number of int regs
3964 SDValue firstStore =
3965 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3966 MachinePointerInfo(SV), MVT::i8);
3967 uint64_t nextOffset = FPROffset;
3968 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3969 ConstFPROffset);
3970
3971 // Store second byte : number of float regs
3972 SDValue secondStore =
3973 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3974 MachinePointerInfo(SV, nextOffset), MVT::i8);
3975 nextOffset += StackOffset;
3976 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3977
3978 // Store second word : arguments given on stack
3979 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3980 MachinePointerInfo(SV, nextOffset));
3981 nextOffset += FrameOffset;
3982 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3983
3984 // Store third word : arguments given in registers
3985 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3986 MachinePointerInfo(SV, nextOffset));
3987}
3988
3989/// FPR - The set of FP registers that should be allocated for arguments
3990/// on Darwin and AIX.
3991static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3992 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3993 PPC::F11, PPC::F12, PPC::F13};
3994
3995/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3996/// the stack.
3997static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3998 unsigned PtrByteSize) {
3999 unsigned ArgSize = ArgVT.getStoreSize();
4000 if (Flags.isByVal())
4001 ArgSize = Flags.getByValSize();
4002
4003 // Round up to multiples of the pointer size, except for array members,
4004 // which are always packed.
4005 if (!Flags.isInConsecutiveRegs())
4006 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4007
4008 return ArgSize;
4009}
4010
4011/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4012/// on the stack.
4014 ISD::ArgFlagsTy Flags,
4015 unsigned PtrByteSize) {
4016 Align Alignment(PtrByteSize);
4017
4018 // Altivec parameters are padded to a 16 byte boundary.
4019 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4020 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4021 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4022 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4023 Alignment = Align(16);
4024
4025 // ByVal parameters are aligned as requested.
4026 if (Flags.isByVal()) {
4027 auto BVAlign = Flags.getNonZeroByValAlign();
4028 if (BVAlign > PtrByteSize) {
4029 if (BVAlign.value() % PtrByteSize != 0)
4031 "ByVal alignment is not a multiple of the pointer size");
4032
4033 Alignment = BVAlign;
4034 }
4035 }
4036
4037 // Array members are always packed to their original alignment.
4038 if (Flags.isInConsecutiveRegs()) {
4039 // If the array member was split into multiple registers, the first
4040 // needs to be aligned to the size of the full type. (Except for
4041 // ppcf128, which is only aligned as its f64 components.)
4042 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4043 Alignment = Align(OrigVT.getStoreSize());
4044 else
4045 Alignment = Align(ArgVT.getStoreSize());
4046 }
4047
4048 return Alignment;
4049}
4050
4051/// CalculateStackSlotUsed - Return whether this argument will use its
4052/// stack slot (instead of being passed in registers). ArgOffset,
4053/// AvailableFPRs, and AvailableVRs must hold the current argument
4054/// position, and will be updated to account for this argument.
4055static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4056 unsigned PtrByteSize, unsigned LinkageSize,
4057 unsigned ParamAreaSize, unsigned &ArgOffset,
4058 unsigned &AvailableFPRs,
4059 unsigned &AvailableVRs) {
4060 bool UseMemory = false;
4061
4062 // Respect alignment of argument on the stack.
4063 Align Alignment =
4064 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4065 ArgOffset = alignTo(ArgOffset, Alignment);
4066 // If there's no space left in the argument save area, we must
4067 // use memory (this check also catches zero-sized arguments).
4068 if (ArgOffset >= LinkageSize + ParamAreaSize)
4069 UseMemory = true;
4070
4071 // Allocate argument on the stack.
4072 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4073 if (Flags.isInConsecutiveRegsLast())
4074 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4075 // If we overran the argument save area, we must use memory
4076 // (this check catches arguments passed partially in memory)
4077 if (ArgOffset > LinkageSize + ParamAreaSize)
4078 UseMemory = true;
4079
4080 // However, if the argument is actually passed in an FPR or a VR,
4081 // we don't use memory after all.
4082 if (!Flags.isByVal()) {
4083 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4084 if (AvailableFPRs > 0) {
4085 --AvailableFPRs;
4086 return false;
4087 }
4088 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4089 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4090 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4091 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4092 if (AvailableVRs > 0) {
4093 --AvailableVRs;
4094 return false;
4095 }
4096 }
4097
4098 return UseMemory;
4099}
4100
4101/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4102/// ensure minimum alignment required for target.
4104 unsigned NumBytes) {
4105 return alignTo(NumBytes, Lowering->getStackAlign());
4106}
4107
4108SDValue PPCTargetLowering::LowerFormalArguments(
4109 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4110 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4111 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4112 if (Subtarget.isAIXABI())
4113 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4114 InVals);
4115 if (Subtarget.is64BitELFABI())
4116 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4117 InVals);
4118 assert(Subtarget.is32BitELFABI());
4119 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4120 InVals);
4121}
4122
4123SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4124 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4125 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4126 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4127
4128 // 32-bit SVR4 ABI Stack Frame Layout:
4129 // +-----------------------------------+
4130 // +--> | Back chain |
4131 // | +-----------------------------------+
4132 // | | Floating-point register save area |
4133 // | +-----------------------------------+
4134 // | | General register save area |
4135 // | +-----------------------------------+
4136 // | | CR save word |
4137 // | +-----------------------------------+
4138 // | | VRSAVE save word |
4139 // | +-----------------------------------+
4140 // | | Alignment padding |
4141 // | +-----------------------------------+
4142 // | | Vector register save area |
4143 // | +-----------------------------------+
4144 // | | Local variable space |
4145 // | +-----------------------------------+
4146 // | | Parameter list area |
4147 // | +-----------------------------------+
4148 // | | LR save word |
4149 // | +-----------------------------------+
4150 // SP--> +--- | Back chain |
4151 // +-----------------------------------+
4152 //
4153 // Specifications:
4154 // System V Application Binary Interface PowerPC Processor Supplement
4155 // AltiVec Technology Programming Interface Manual
4156
4157 MachineFunction &MF = DAG.getMachineFunction();
4158 MachineFrameInfo &MFI = MF.getFrameInfo();
4159 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4160
4161 EVT PtrVT = getPointerTy(MF.getDataLayout());
4162 // Potential tail calls could cause overwriting of argument stack slots.
4163 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4164 (CallConv == CallingConv::Fast));
4165 const Align PtrAlign(4);
4166
4167 // Assign locations to all of the incoming arguments.
4169 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4170 *DAG.getContext());
4171
4172 // Reserve space for the linkage area on the stack.
4173 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4174 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4175 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4176
4177 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4178 CCValAssign &VA = ArgLocs[i];
4179
4180 // Arguments stored in registers.
4181 if (VA.isRegLoc()) {
4182 const TargetRegisterClass *RC;
4183 EVT ValVT = VA.getValVT();
4184
4185 switch (ValVT.getSimpleVT().SimpleTy) {
4186 default:
4187 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4188 case MVT::i1:
4189 case MVT::i32:
4190 RC = &PPC::GPRCRegClass;
4191 break;
4192 case MVT::f32:
4193 if (Subtarget.hasP8Vector())
4194 RC = &PPC::VSSRCRegClass;
4195 else if (Subtarget.hasSPE())
4196 RC = &PPC::GPRCRegClass;
4197 else
4198 RC = &PPC::F4RCRegClass;
4199 break;
4200 case MVT::f64:
4201 if (Subtarget.hasVSX())
4202 RC = &PPC::VSFRCRegClass;
4203 else if (Subtarget.hasSPE())
4204 // SPE passes doubles in GPR pairs.
4205 RC = &PPC::GPRCRegClass;
4206 else
4207 RC = &PPC::F8RCRegClass;
4208 break;
4209 case MVT::v16i8:
4210 case MVT::v8i16:
4211 case MVT::v4i32:
4212 RC = &PPC::VRRCRegClass;
4213 break;
4214 case MVT::v4f32:
4215 RC = &PPC::VRRCRegClass;
4216 break;
4217 case MVT::v2f64:
4218 case MVT::v2i64:
4219 RC = &PPC::VRRCRegClass;
4220 break;
4221 }
4222
4223 SDValue ArgValue;
4224 // Transform the arguments stored in physical registers into
4225 // virtual ones.
4226 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4227 assert(i + 1 < e && "No second half of double precision argument");
4228 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4229 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4230 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4231 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4232 if (!Subtarget.isLittleEndian())
4233 std::swap (ArgValueLo, ArgValueHi);
4234 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4235 ArgValueHi);
4236 } else {
4237 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4238 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4239 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4240 if (ValVT == MVT::i1)
4241 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4242 }
4243
4244 InVals.push_back(ArgValue);
4245 } else {
4246 // Argument stored in memory.
4247 assert(VA.isMemLoc());
4248
4249 // Get the extended size of the argument type in stack
4250 unsigned ArgSize = VA.getLocVT().getStoreSize();
4251 // Get the actual size of the argument type
4252 unsigned ObjSize = VA.getValVT().getStoreSize();
4253 unsigned ArgOffset = VA.getLocMemOffset();
4254 // Stack objects in PPC32 are right justified.
4255 ArgOffset += ArgSize - ObjSize;
4256 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4257
4258 // Create load nodes to retrieve arguments from the stack.
4259 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4260 InVals.push_back(
4261 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4262 }
4263 }
4264
4265 // Assign locations to all of the incoming aggregate by value arguments.
4266 // Aggregates passed by value are stored in the local variable space of the
4267 // caller's stack frame, right above the parameter list area.
4268 SmallVector<CCValAssign, 16> ByValArgLocs;
4269 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4270 ByValArgLocs, *DAG.getContext());
4271
4272 // Reserve stack space for the allocations in CCInfo.
4273 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4274
4275 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4276
4277 // Area that is at least reserved in the caller of this function.
4278 unsigned MinReservedArea = CCByValInfo.getStackSize();
4279 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4280
4281 // Set the size that is at least reserved in caller of this function. Tail
4282 // call optimized function's reserved stack space needs to be aligned so that
4283 // taking the difference between two stack areas will result in an aligned
4284 // stack.
4285 MinReservedArea =
4286 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4287 FuncInfo->setMinReservedArea(MinReservedArea);
4288
4290
4291 // If the function takes variable number of arguments, make a frame index for
4292 // the start of the first vararg value... for expansion of llvm.va_start.
4293 if (isVarArg) {
4294 static const MCPhysReg GPArgRegs[] = {
4295 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4296 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4297 };
4298 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4299
4300 static const MCPhysReg FPArgRegs[] = {
4301 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4302 PPC::F8
4303 };
4304 unsigned NumFPArgRegs = std::size(FPArgRegs);
4305
4306 if (useSoftFloat() || hasSPE())
4307 NumFPArgRegs = 0;
4308
4309 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4310 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4311
4312 // Make room for NumGPArgRegs and NumFPArgRegs.
4313 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4314 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4315
4317 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4318
4319 FuncInfo->setVarArgsFrameIndex(
4320 MFI.CreateStackObject(Depth, Align(8), false));
4321 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4322
4323 // The fixed integer arguments of a variadic function are stored to the
4324 // VarArgsFrameIndex on the stack so that they may be loaded by
4325 // dereferencing the result of va_next.
4326 for (MCPhysReg GPArgReg : GPArgRegs) {
4327 // Get an existing live-in vreg, or add a new one.
4328 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4329 if (!VReg)
4330 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4331
4332 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4333 SDValue Store =
4334 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4335 MemOps.push_back(Store);
4336 // Increment the address by four for the next argument to store
4337 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4338 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4339 }
4340
4341 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4342 // is set.
4343 // The double arguments are stored to the VarArgsFrameIndex
4344 // on the stack.
4345 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4346 // Get an existing live-in vreg, or add a new one.
4347 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4348 if (!VReg)
4349 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4350
4351 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4352 SDValue Store =
4353 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4354 MemOps.push_back(Store);
4355 // Increment the address by eight for the next argument to store
4356 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4357 PtrVT);
4358 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4359 }
4360 }
4361
4362 if (!MemOps.empty())
4363 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4364
4365 return Chain;
4366}
4367
4368// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4369// value to MVT::i64 and then truncate to the correct register size.
4370SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4371 EVT ObjectVT, SelectionDAG &DAG,
4372 SDValue ArgVal,
4373 const SDLoc &dl) const {
4374 if (Flags.isSExt())
4375 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4376 DAG.getValueType(ObjectVT));
4377 else if (Flags.isZExt())
4378 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4379 DAG.getValueType(ObjectVT));
4380
4381 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4382}
4383
4384SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4385 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4386 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4387 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4388 // TODO: add description of PPC stack frame format, or at least some docs.
4389 //
4390 bool isELFv2ABI = Subtarget.isELFv2ABI();
4391 bool isLittleEndian = Subtarget.isLittleEndian();
4392 MachineFunction &MF = DAG.getMachineFunction();
4393 MachineFrameInfo &MFI = MF.getFrameInfo();
4394 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4395
4396 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4397 "fastcc not supported on varargs functions");
4398
4399 EVT PtrVT = getPointerTy(MF.getDataLayout());
4400 // Potential tail calls could cause overwriting of argument stack slots.
4401 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4402 (CallConv == CallingConv::Fast));
4403 unsigned PtrByteSize = 8;
4404 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4405
4406 static const MCPhysReg GPR[] = {
4407 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4408 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4409 };
4410 static const MCPhysReg VR[] = {
4411 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4412 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4413 };
4414
4415 const unsigned Num_GPR_Regs = std::size(GPR);
4416 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4417 const unsigned Num_VR_Regs = std::size(VR);
4418
4419 // Do a first pass over the arguments to determine whether the ABI
4420 // guarantees that our caller has allocated the parameter save area
4421 // on its stack frame. In the ELFv1 ABI, this is always the case;
4422 // in the ELFv2 ABI, it is true if this is a vararg function or if
4423 // any parameter is located in a stack slot.
4424
4425 bool HasParameterArea = !isELFv2ABI || isVarArg;
4426 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4427 unsigned NumBytes = LinkageSize;
4428 unsigned AvailableFPRs = Num_FPR_Regs;
4429 unsigned AvailableVRs = Num_VR_Regs;
4430 for (const ISD::InputArg &In : Ins) {
4431 if (In.Flags.isNest())
4432 continue;
4433
4434 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4435 LinkageSize, ParamAreaSize, NumBytes,
4436 AvailableFPRs, AvailableVRs))
4437 HasParameterArea = true;
4438 }
4439
4440 // Add DAG nodes to load the arguments or copy them out of registers. On
4441 // entry to a function on PPC, the arguments start after the linkage area,
4442 // although the first ones are often in registers.
4443
4444 unsigned ArgOffset = LinkageSize;
4445 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4448 unsigned CurArgIdx = 0;
4449 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4450 SDValue ArgVal;
4451 bool needsLoad = false;
4452 EVT ObjectVT = Ins[ArgNo].VT;
4453 EVT OrigVT = Ins[ArgNo].ArgVT;
4454 unsigned ObjSize = ObjectVT.getStoreSize();
4455 unsigned ArgSize = ObjSize;
4456 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4457 if (Ins[ArgNo].isOrigArg()) {
4458 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4459 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4460 }
4461 // We re-align the argument offset for each argument, except when using the
4462 // fast calling convention, when we need to make sure we do that only when
4463 // we'll actually use a stack slot.
4464 unsigned CurArgOffset;
4465 Align Alignment;
4466 auto ComputeArgOffset = [&]() {
4467 /* Respect alignment of argument on the stack. */
4468 Alignment =
4469 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4470 ArgOffset = alignTo(ArgOffset, Alignment);
4471 CurArgOffset = ArgOffset;
4472 };
4473
4474 if (CallConv != CallingConv::Fast) {
4475 ComputeArgOffset();
4476
4477 /* Compute GPR index associated with argument offset. */
4478 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4479 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4480 }
4481
4482 // FIXME the codegen can be much improved in some cases.
4483 // We do not have to keep everything in memory.
4484 if (Flags.isByVal()) {
4485 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4486
4487 if (CallConv == CallingConv::Fast)
4488 ComputeArgOffset();
4489
4490 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4491 ObjSize = Flags.getByValSize();
4492 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4493 // Empty aggregate parameters do not take up registers. Examples:
4494 // struct { } a;
4495 // union { } b;
4496 // int c[0];
4497 // etc. However, we have to provide a place-holder in InVals, so
4498 // pretend we have an 8-byte item at the current address for that
4499 // purpose.
4500 if (!ObjSize) {
4501 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4502 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4503 InVals.push_back(FIN);
4504 continue;
4505 }
4506
4507 // Create a stack object covering all stack doublewords occupied
4508 // by the argument. If the argument is (fully or partially) on
4509 // the stack, or if the argument is fully in registers but the
4510 // caller has allocated the parameter save anyway, we can refer
4511 // directly to the caller's stack frame. Otherwise, create a
4512 // local copy in our own frame.
4513 int FI;
4514 if (HasParameterArea ||
4515 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4516 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4517 else
4518 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4519 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4520
4521 // Handle aggregates smaller than 8 bytes.
4522 if (ObjSize < PtrByteSize) {
4523 // The value of the object is its address, which differs from the
4524 // address of the enclosing doubleword on big-endian systems.
4525 SDValue Arg = FIN;
4526 if (!isLittleEndian) {
4527 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4528 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4529 }
4530 InVals.push_back(Arg);
4531
4532 if (GPR_idx != Num_GPR_Regs) {
4533 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4534 FuncInfo->addLiveInAttr(VReg, Flags);
4535 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4536 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4537 SDValue Store =
4538 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4539 MachinePointerInfo(&*FuncArg), ObjType);
4540 MemOps.push_back(Store);
4541 }
4542 // Whether we copied from a register or not, advance the offset
4543 // into the parameter save area by a full doubleword.
4544 ArgOffset += PtrByteSize;
4545 continue;
4546 }
4547
4548 // The value of the object is its address, which is the address of
4549 // its first stack doubleword.
4550 InVals.push_back(FIN);
4551
4552 // Store whatever pieces of the object are in registers to memory.
4553 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4554 if (GPR_idx == Num_GPR_Regs)
4555 break;
4556
4557 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4558 FuncInfo->addLiveInAttr(VReg, Flags);
4559 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4560 SDValue Addr = FIN;
4561 if (j) {
4562 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4563 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4564 }
4565 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4566 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4567 SDValue Store =
4568 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4569 MachinePointerInfo(&*FuncArg, j), ObjType);
4570 MemOps.push_back(Store);
4571 ++GPR_idx;
4572 }
4573 ArgOffset += ArgSize;
4574 continue;
4575 }
4576
4577 switch (ObjectVT.getSimpleVT().SimpleTy) {
4578 default: llvm_unreachable("Unhandled argument type!");
4579 case MVT::i1:
4580 case MVT::i32:
4581 case MVT::i64:
4582 if (Flags.isNest()) {
4583 // The 'nest' parameter, if any, is passed in R11.
4584 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4585 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4586
4587 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4588 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4589
4590 break;
4591 }
4592
4593 // These can be scalar arguments or elements of an integer array type
4594 // passed directly. Clang may use those instead of "byval" aggregate
4595 // types to avoid forcing arguments to memory unnecessarily.
4596 if (GPR_idx != Num_GPR_Regs) {
4597 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4598 FuncInfo->addLiveInAttr(VReg, Flags);
4599 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4600
4601 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4602 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4603 // value to MVT::i64 and then truncate to the correct register size.
4604 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4605 } else {
4606 if (CallConv == CallingConv::Fast)
4607 ComputeArgOffset();
4608
4609 needsLoad = true;
4610 ArgSize = PtrByteSize;
4611 }
4612 if (CallConv != CallingConv::Fast || needsLoad)
4613 ArgOffset += 8;
4614 break;
4615
4616 case MVT::f32:
4617 case MVT::f64:
4618 // These can be scalar arguments or elements of a float array type
4619 // passed directly. The latter are used to implement ELFv2 homogenous
4620 // float aggregates.
4621 if (FPR_idx != Num_FPR_Regs) {
4622 unsigned VReg;
4623
4624 if (ObjectVT == MVT::f32)
4625 VReg = MF.addLiveIn(FPR[FPR_idx],
4626 Subtarget.hasP8Vector()
4627 ? &PPC::VSSRCRegClass
4628 : &PPC::F4RCRegClass);
4629 else
4630 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4631 ? &PPC::VSFRCRegClass
4632 : &PPC::F8RCRegClass);
4633
4634 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4635 ++FPR_idx;
4636 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4637 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4638 // once we support fp <-> gpr moves.
4639
4640 // This can only ever happen in the presence of f32 array types,
4641 // since otherwise we never run out of FPRs before running out
4642 // of GPRs.
4643 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4644 FuncInfo->addLiveInAttr(VReg, Flags);
4645 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4646
4647 if (ObjectVT == MVT::f32) {
4648 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4649 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4650 DAG.getConstant(32, dl, MVT::i32));
4651 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4652 }
4653
4654 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4655 } else {
4656 if (CallConv == CallingConv::Fast)
4657 ComputeArgOffset();
4658
4659 needsLoad = true;
4660 }
4661
4662 // When passing an array of floats, the array occupies consecutive
4663 // space in the argument area; only round up to the next doubleword
4664 // at the end of the array. Otherwise, each float takes 8 bytes.
4665 if (CallConv != CallingConv::Fast || needsLoad) {
4666 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4667 ArgOffset += ArgSize;
4668 if (Flags.isInConsecutiveRegsLast())
4669 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4670 }
4671 break;
4672 case MVT::v4f32:
4673 case MVT::v4i32:
4674 case MVT::v8i16:
4675 case MVT::v16i8:
4676 case MVT::v2f64:
4677 case MVT::v2i64:
4678 case MVT::v1i128:
4679 case MVT::f128:
4680 // These can be scalar arguments or elements of a vector array type
4681 // passed directly. The latter are used to implement ELFv2 homogenous
4682 // vector aggregates.
4683 if (VR_idx != Num_VR_Regs) {
4684 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4685 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4686 ++VR_idx;
4687 } else {
4688 if (CallConv == CallingConv::Fast)
4689 ComputeArgOffset();
4690 needsLoad = true;
4691 }
4692 if (CallConv != CallingConv::Fast || needsLoad)
4693 ArgOffset += 16;
4694 break;
4695 }
4696
4697 // We need to load the argument to a virtual register if we determined
4698 // above that we ran out of physical registers of the appropriate type.
4699 if (needsLoad) {
4700 if (ObjSize < ArgSize && !isLittleEndian)
4701 CurArgOffset += ArgSize - ObjSize;
4702 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4703 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4704 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4705 }
4706
4707 InVals.push_back(ArgVal);
4708 }
4709
4710 // Area that is at least reserved in the caller of this function.
4711 unsigned MinReservedArea;
4712 if (HasParameterArea)
4713 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4714 else
4715 MinReservedArea = LinkageSize;
4716
4717 // Set the size that is at least reserved in caller of this function. Tail
4718 // call optimized functions' reserved stack space needs to be aligned so that
4719 // taking the difference between two stack areas will result in an aligned
4720 // stack.
4721 MinReservedArea =
4722 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4723 FuncInfo->setMinReservedArea(MinReservedArea);
4724
4725 // If the function takes variable number of arguments, make a frame index for
4726 // the start of the first vararg value... for expansion of llvm.va_start.
4727 // On ELFv2ABI spec, it writes:
4728 // C programs that are intended to be *portable* across different compilers
4729 // and architectures must use the header file <stdarg.h> to deal with variable
4730 // argument lists.
4731 if (isVarArg && MFI.hasVAStart()) {
4732 int Depth = ArgOffset;
4733
4734 FuncInfo->setVarArgsFrameIndex(
4735 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4736 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4737
4738 // If this function is vararg, store any remaining integer argument regs
4739 // to their spots on the stack so that they may be loaded by dereferencing
4740 // the result of va_next.
4741 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4742 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4743 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4744 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4745 SDValue Store =
4746 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4747 MemOps.push_back(Store);
4748 // Increment the address by four for the next argument to store
4749 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4750 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4751 }
4752 }
4753
4754 if (!MemOps.empty())
4755 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4756
4757 return Chain;
4758}
4759
4760/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4761/// adjusted to accommodate the arguments for the tailcall.
4762static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4763 unsigned ParamSize) {
4764
4765 if (!isTailCall) return 0;
4766
4768 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4769 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4770 // Remember only if the new adjustment is bigger.
4771 if (SPDiff < FI->getTailCallSPDelta())
4772 FI->setTailCallSPDelta(SPDiff);
4773
4774 return SPDiff;
4775}
4776
4777static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4778
4779static bool callsShareTOCBase(const Function *Caller,
4780 const GlobalValue *CalleeGV,
4781 const TargetMachine &TM) {
4782 // It does not make sense to call callsShareTOCBase() with a caller that
4783 // is PC Relative since PC Relative callers do not have a TOC.
4784#ifndef NDEBUG
4785 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4786 assert(!STICaller->isUsingPCRelativeCalls() &&
4787 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4788#endif
4789
4790 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4791 // don't have enough information to determine if the caller and callee share
4792 // the same TOC base, so we have to pessimistically assume they don't for
4793 // correctness.
4794 if (!CalleeGV)
4795 return false;
4796
4797 // If the callee is preemptable, then the static linker will use a plt-stub
4798 // which saves the toc to the stack, and needs a nop after the call
4799 // instruction to convert to a toc-restore.
4800 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4801 return false;
4802
4803 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4804 // We may need a TOC restore in the situation where the caller requires a
4805 // valid TOC but the callee is PC Relative and does not.
4806 const Function *F = dyn_cast<Function>(CalleeGV);
4807 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4808
4809 // If we have an Alias we can try to get the function from there.
4810 if (Alias) {
4811 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4812 F = dyn_cast<Function>(GlobalObj);
4813 }
4814
4815 // If we still have no valid function pointer we do not have enough
4816 // information to determine if the callee uses PC Relative calls so we must
4817 // assume that it does.
4818 if (!F)
4819 return false;
4820
4821 // If the callee uses PC Relative we cannot guarantee that the callee won't
4822 // clobber the TOC of the caller and so we must assume that the two
4823 // functions do not share a TOC base.
4824 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4825 if (STICallee->isUsingPCRelativeCalls())
4826 return false;
4827
4828 // If the GV is not a strong definition then we need to assume it can be
4829 // replaced by another function at link time. The function that replaces
4830 // it may not share the same TOC as the caller since the callee may be
4831 // replaced by a PC Relative version of the same function.
4832 if (!CalleeGV->isStrongDefinitionForLinker())
4833 return false;
4834
4835 // The medium and large code models are expected to provide a sufficiently
4836 // large TOC to provide all data addressing needs of a module with a
4837 // single TOC.
4838 if (CodeModel::Medium == TM.getCodeModel() ||
4840 return true;
4841
4842 // Any explicitly-specified sections and section prefixes must also match.
4843 // Also, if we're using -ffunction-sections, then each function is always in
4844 // a different section (the same is true for COMDAT functions).
4845 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4846 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4847 return false;
4848 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4849 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4850 return false;
4851 }
4852
4853 return true;
4854}
4855
4856static bool
4858 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4859 assert(Subtarget.is64BitELFABI());
4860
4861 const unsigned PtrByteSize = 8;
4862 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4863
4864 static const MCPhysReg GPR[] = {
4865 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4866 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4867 };
4868 static const MCPhysReg VR[] = {
4869 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4870 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4871 };
4872
4873 const unsigned NumGPRs = std::size(GPR);
4874 const unsigned NumFPRs = 13;
4875 const unsigned NumVRs = std::size(VR);
4876 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4877
4878 unsigned NumBytes = LinkageSize;
4879 unsigned AvailableFPRs = NumFPRs;
4880 unsigned AvailableVRs = NumVRs;
4881
4882 for (const ISD::OutputArg& Param : Outs) {
4883 if (Param.Flags.isNest()) continue;
4884
4885 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4886 LinkageSize, ParamAreaSize, NumBytes,
4887 AvailableFPRs, AvailableVRs))
4888 return true;
4889 }
4890 return false;
4891}
4892
4893static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4894 if (CB.arg_size() != CallerFn->arg_size())
4895 return false;
4896
4897 auto CalleeArgIter = CB.arg_begin();
4898 auto CalleeArgEnd = CB.arg_end();
4899 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4900
4901 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4902 const Value* CalleeArg = *CalleeArgIter;
4903 const Value* CallerArg = &(*CallerArgIter);
4904 if (CalleeArg == CallerArg)
4905 continue;
4906
4907 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4908 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4909 // }
4910 // 1st argument of callee is undef and has the same type as caller.
4911 if (CalleeArg->getType() == CallerArg->getType() &&
4912 isa<UndefValue>(CalleeArg))
4913 continue;
4914
4915 return false;
4916 }
4917
4918 return true;
4919}
4920
4921// Returns true if TCO is possible between the callers and callees
4922// calling conventions.
4923static bool
4925 CallingConv::ID CalleeCC) {
4926 // Tail calls are possible with fastcc and ccc.
4927 auto isTailCallableCC = [] (CallingConv::ID CC){
4928 return CC == CallingConv::C || CC == CallingConv::Fast;
4929 };
4930 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4931 return false;
4932
4933 // We can safely tail call both fastcc and ccc callees from a c calling
4934 // convention caller. If the caller is fastcc, we may have less stack space
4935 // than a non-fastcc caller with the same signature so disable tail-calls in
4936 // that case.
4937 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4938}
4939
4940bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4941 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4942 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4944 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4945 bool isCalleeExternalSymbol) const {
4946 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4947
4948 if (DisableSCO && !TailCallOpt) return false;
4949
4950 // Variadic argument functions are not supported.
4951 if (isVarArg) return false;
4952
4953 // Check that the calling conventions are compatible for tco.
4954 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4955 return false;
4956
4957 // Caller contains any byval parameter is not supported.
4958 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4959 return false;
4960
4961 // Callee contains any byval parameter is not supported, too.
4962 // Note: This is a quick work around, because in some cases, e.g.
4963 // caller's stack size > callee's stack size, we are still able to apply
4964 // sibling call optimization. For example, gcc is able to do SCO for caller1
4965 // in the following example, but not for caller2.
4966 // struct test {
4967 // long int a;
4968 // char ary[56];
4969 // } gTest;
4970 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4971 // b->a = v.a;
4972 // return 0;
4973 // }
4974 // void caller1(struct test a, struct test c, struct test *b) {
4975 // callee(gTest, b); }
4976 // void caller2(struct test *b) { callee(gTest, b); }
4977 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4978 return false;
4979
4980 // If callee and caller use different calling conventions, we cannot pass
4981 // parameters on stack since offsets for the parameter area may be different.
4982 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4983 return false;
4984
4985 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4986 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4987 // callee potentially have different TOC bases then we cannot tail call since
4988 // we need to restore the TOC pointer after the call.
4989 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4990 // We cannot guarantee this for indirect calls or calls to external functions.
4991 // When PC-Relative addressing is used, the concept of the TOC is no longer
4992 // applicable so this check is not required.
4993 // Check first for indirect calls.
4994 if (!Subtarget.isUsingPCRelativeCalls() &&
4995 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
4996 return false;
4997
4998 // Check if we share the TOC base.
4999 if (!Subtarget.isUsingPCRelativeCalls() &&
5000 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5001 return false;
5002
5003 // TCO allows altering callee ABI, so we don't have to check further.
5004 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5005 return true;
5006
5007 if (DisableSCO) return false;
5008
5009 // If callee use the same argument list that caller is using, then we can
5010 // apply SCO on this case. If it is not, then we need to check if callee needs
5011 // stack for passing arguments.
5012 // PC Relative tail calls may not have a CallBase.
5013 // If there is no CallBase we cannot verify if we have the same argument
5014 // list so assume that we don't have the same argument list.
5015 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5016 needStackSlotPassParameters(Subtarget, Outs))
5017 return false;
5018 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5019 return false;
5020
5021 return true;
5022}
5023
5024/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5025/// for tail call optimization. Targets which want to do tail call
5026/// optimization should implement this function.
5027bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5028 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5029 CallingConv::ID CallerCC, bool isVarArg,
5030 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5031 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5032 return false;
5033
5034 // Variable argument functions are not supported.
5035 if (isVarArg)
5036 return false;
5037
5038 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5039 // Functions containing by val parameters are not supported.
5040 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5041 return false;
5042
5043 // Non-PIC/GOT tail calls are supported.
5044 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5045 return true;
5046
5047 // At the moment we can only do local tail calls (in same module, hidden
5048 // or protected) if we are generating PIC.
5049 if (CalleeGV)
5050 return CalleeGV->hasHiddenVisibility() ||
5051 CalleeGV->hasProtectedVisibility();
5052 }
5053
5054 return false;
5055}
5056
5057/// isCallCompatibleAddress - Return the immediate to use if the specified
5058/// 32-bit value is representable in the immediate field of a BxA instruction.
5061 if (!C) return nullptr;
5062
5063 int Addr = C->getZExtValue();
5064 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5065 SignExtend32<26>(Addr) != Addr)
5066 return nullptr; // Top 6 bits have to be sext of immediate.
5067
5068 return DAG
5070 (int)C->getZExtValue() >> 2, SDLoc(Op),
5072 .getNode();
5073}
5074
5075namespace {
5076
5077struct TailCallArgumentInfo {
5078 SDValue Arg;
5079 SDValue FrameIdxOp;
5080 int FrameIdx = 0;
5081
5082 TailCallArgumentInfo() = default;
5083};
5084
5085} // end anonymous namespace
5086
5087/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5089 SelectionDAG &DAG, SDValue Chain,
5090 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5091 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5092 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5093 SDValue Arg = TailCallArgs[i].Arg;
5094 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5095 int FI = TailCallArgs[i].FrameIdx;
5096 // Store relative to framepointer.
5097 MemOpChains.push_back(DAG.getStore(
5098 Chain, dl, Arg, FIN,
5100 }
5101}
5102
5103/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5104/// the appropriate stack slot for the tail call optimized function call.
5106 SDValue OldRetAddr, SDValue OldFP,
5107 int SPDiff, const SDLoc &dl) {
5108 if (SPDiff) {
5109 // Calculate the new stack slot for the return address.
5111 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5112 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5113 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5114 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5115 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5116 NewRetAddrLoc, true);
5117 SDValue NewRetAddrFrIdx =
5118 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5119 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5120 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5121 }
5122 return Chain;
5123}
5124
5125/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5126/// the position of the argument.
5128 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5129 int SPDiff, unsigned ArgOffset,
5130 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5131 int Offset = ArgOffset + SPDiff;
5132 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5133 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5134 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5135 SDValue FIN = DAG.getFrameIndex(FI, VT);
5136 TailCallArgumentInfo Info;
5137 Info.Arg = Arg;
5138 Info.FrameIdxOp = FIN;
5139 Info.FrameIdx = FI;
5140 TailCallArguments.push_back(Info);
5141}
5142
5143/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5144/// stack slot. Returns the chain as result and the loaded frame pointers in
5145/// LROpOut/FPOpout. Used when tail calling.
5146SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5147 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5148 SDValue &FPOpOut, const SDLoc &dl) const {
5149 if (SPDiff) {
5150 // Load the LR and FP stack slot for later adjusting.
5151 LROpOut = getReturnAddrFrameIndex(DAG);
5152 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5153 MachinePointerInfo());
5154 Chain = SDValue(LROpOut.getNode(), 1);
5155 }
5156 return Chain;
5157}
5158
5159/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5160/// by "Src" to address "Dst" of size "Size". Alignment information is
5161/// specified by the specific parameter attribute. The copy will be passed as
5162/// a byval function parameter.
5163/// Sometimes what we are copying is the end of a larger object, the part that
5164/// does not fit in registers.
5166 SDValue Chain, ISD::ArgFlagsTy Flags,
5167 SelectionDAG &DAG, const SDLoc &dl) {
5168 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5169 return DAG.getMemcpy(
5170 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5171 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5172}
5173
5174/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5175/// tail calls.
5177 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5178 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5179 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5180 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5182 if (!isTailCall) {
5183 if (isVector) {
5184 SDValue StackPtr;
5185 if (isPPC64)
5186 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5187 else
5188 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5189 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5190 DAG.getConstant(ArgOffset, dl, PtrVT));
5191 }
5192 MemOpChains.push_back(
5193 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5194 // Calculate and remember argument location.
5195 } else
5196 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5197 TailCallArguments);
5198}
5199
5200static void
5202 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5203 SDValue FPOp,
5204 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5205 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5206 // might overwrite each other in case of tail call optimization.
5207 SmallVector<SDValue, 8> MemOpChains2;
5208 // Do not flag preceding copytoreg stuff together with the following stuff.
5209 InGlue = SDValue();
5210 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5211 MemOpChains2, dl);
5212 if (!MemOpChains2.empty())
5213 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5214
5215 // Store the return address to the appropriate stack slot.
5216 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5217
5218 // Emit callseq_end just before tailcall node.
5219 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5220 InGlue = Chain.getValue(1);
5221}
5222
5223// Is this global address that of a function that can be called by name? (as
5224// opposed to something that must hold a descriptor for an indirect call).
5225static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5226 if (GV) {
5227 if (GV->isThreadLocal())
5228 return false;
5229
5230 return GV->getValueType()->isFunctionTy();
5231 }
5232
5233 return false;
5234}
5235
5236SDValue PPCTargetLowering::LowerCallResult(
5237 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5238 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5239 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5241 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5242 *DAG.getContext());
5243
5244 CCRetInfo.AnalyzeCallResult(
5245 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5247 : RetCC_PPC);
5248
5249 // Copy all of the result registers out of their specified physreg.
5250 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5251 CCValAssign &VA = RVLocs[i];
5252 assert(VA.isRegLoc() && "Can only return in registers!");
5253
5254 SDValue Val;
5255
5256 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5257 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5258 InGlue);
5259 Chain = Lo.getValue(1);
5260 InGlue = Lo.getValue(2);
5261 VA = RVLocs[++i]; // skip ahead to next loc
5262 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5263 InGlue);
5264 Chain = Hi.getValue(1);
5265 InGlue = Hi.getValue(2);
5266 if (!Subtarget.isLittleEndian())
5267 std::swap (Lo, Hi);
5268 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5269 } else {
5270 Val = DAG.getCopyFromReg(Chain, dl,
5271 VA.getLocReg(), VA.getLocVT(), InGlue);
5272 Chain = Val.getValue(1);
5273 InGlue = Val.getValue(2);
5274 }
5275
5276 switch (VA.getLocInfo()) {
5277 default: llvm_unreachable("Unknown loc info!");
5278 case CCValAssign::Full: break;
5279 case CCValAssign::AExt:
5280 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5281 break;
5282 case CCValAssign::ZExt:
5283 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5284 DAG.getValueType(VA.getValVT()));
5285 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5286 break;
5287 case CCValAssign::SExt:
5288 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5289 DAG.getValueType(VA.getValVT()));
5290 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5291 break;
5292 }
5293
5294 InVals.push_back(Val);
5295 }
5296
5297 return Chain;
5298}
5299
5300static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5301 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5302 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5303 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5304
5305 // PatchPoint calls are not indirect.
5306 if (isPatchPoint)
5307 return false;
5308
5310 return false;
5311
5312 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5313 // becuase the immediate function pointer points to a descriptor instead of
5314 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5315 // pointer immediate points to the global entry point, while the BLA would
5316 // need to jump to the local entry point (see rL211174).
5317 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5318 isBLACompatibleAddress(Callee, DAG))
5319 return false;
5320
5321 return true;
5322}
5323
5324// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5325static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5326 return Subtarget.isAIXABI() ||
5327 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5328}
5329
5331 const Function &Caller, const SDValue &Callee,
5332 const PPCSubtarget &Subtarget,
5333 const TargetMachine &TM,
5334 bool IsStrictFPCall = false) {
5335 if (CFlags.IsTailCall)
5336 return PPCISD::TC_RETURN;
5337
5338 unsigned RetOpc = 0;
5339 // This is a call through a function pointer.
5340 if (CFlags.IsIndirect) {
5341 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5342 // indirect calls. The save of the caller's TOC pointer to the stack will be
5343 // inserted into the DAG as part of call lowering. The restore of the TOC
5344 // pointer is modeled by using a pseudo instruction for the call opcode that
5345 // represents the 2 instruction sequence of an indirect branch and link,
5346 // immediately followed by a load of the TOC pointer from the stack save
5347 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5348 // as it is not saved or used.
5349 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5350 : PPCISD::BCTRL;
5351 } else if (Subtarget.isUsingPCRelativeCalls()) {
5352 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5353 RetOpc = PPCISD::CALL_NOTOC;
5354 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5355 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5356 // immediately following the call instruction if the caller and callee may
5357 // have different TOC bases. At link time if the linker determines the calls
5358 // may not share a TOC base, the call is redirected to a trampoline inserted
5359 // by the linker. The trampoline will (among other things) save the callers
5360 // TOC pointer at an ABI designated offset in the linkage area and the
5361 // linker will rewrite the nop to be a load of the TOC pointer from the
5362 // linkage area into gpr2.
5363 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5364 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5365 RetOpc =
5366 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5367 } else
5368 RetOpc = PPCISD::CALL;
5369 if (IsStrictFPCall) {
5370 switch (RetOpc) {
5371 default:
5372 llvm_unreachable("Unknown call opcode");
5373 case PPCISD::BCTRL_LOAD_TOC:
5374 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5375 break;
5376 case PPCISD::BCTRL:
5377 RetOpc = PPCISD::BCTRL_RM;
5378 break;
5379 case PPCISD::CALL_NOTOC:
5380 RetOpc = PPCISD::CALL_NOTOC_RM;
5381 break;
5382 case PPCISD::CALL:
5383 RetOpc = PPCISD::CALL_RM;
5384 break;
5385 case PPCISD::CALL_NOP:
5386 RetOpc = PPCISD::CALL_NOP_RM;
5387 break;
5388 }
5389 }
5390 return RetOpc;
5391}
5392
5393static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5394 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5395 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5396 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5397 return SDValue(Dest, 0);
5398
5399 // Returns true if the callee is local, and false otherwise.
5400 auto isLocalCallee = [&]() {
5402 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5403
5404 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5406 };
5407
5408 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5409 // a static relocation model causes some versions of GNU LD (2.17.50, at
5410 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5411 // built with secure-PLT.
5412 bool UsePlt =
5413 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5415
5416 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5417 const TargetMachine &TM = Subtarget.getTargetMachine();
5419 auto *S =
5420 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5421
5423 return DAG.getMCSymbol(S, PtrVT);
5424 };
5425
5426 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5427 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5428 if (isFunctionGlobalAddress(GV)) {
5429 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5430
5431 if (Subtarget.isAIXABI()) {
5432 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5433 return getAIXFuncEntryPointSymbolSDNode(GV);
5434 }
5435 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5436 UsePlt ? PPCII::MO_PLT : 0);
5437 }
5438
5440 const char *SymName = S->getSymbol();
5441 if (Subtarget.isAIXABI()) {
5442 // If there exists a user-declared function whose name is the same as the
5443 // ExternalSymbol's, then we pick up the user-declared version.
5445 if (const Function *F =
5446 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5447 return getAIXFuncEntryPointSymbolSDNode(F);
5448
5449 // On AIX, direct function calls reference the symbol for the function's
5450 // entry point, which is named by prepending a "." before the function's
5451 // C-linkage name. A Qualname is returned here because an external
5452 // function entry point is a csect with XTY_ER property.
5453 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5454 auto &Context = DAG.getMachineFunction().getContext();
5455 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5456 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5458 return Sec->getQualNameSymbol();
5459 };
5460
5461 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5462 }
5463 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5464 UsePlt ? PPCII::MO_PLT : 0);
5465 }
5466
5467 // No transformation needed.
5468 assert(Callee.getNode() && "What no callee?");
5469 return Callee;
5470}
5471
5473 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5474 "Expected a CALLSEQ_STARTSDNode.");
5475
5476 // The last operand is the chain, except when the node has glue. If the node
5477 // has glue, then the last operand is the glue, and the chain is the second
5478 // last operand.
5479 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5480 if (LastValue.getValueType() != MVT::Glue)
5481 return LastValue;
5482
5483 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5484}
5485
5486// Creates the node that moves a functions address into the count register
5487// to prepare for an indirect call instruction.
5488static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5489 SDValue &Glue, SDValue &Chain,
5490 const SDLoc &dl) {
5491 SDValue MTCTROps[] = {Chain, Callee, Glue};
5492 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5493 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5494 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5495 // The glue is the second value produced.
5496 Glue = Chain.getValue(1);
5497}
5498
5500 SDValue &Glue, SDValue &Chain,
5501 SDValue CallSeqStart,
5502 const CallBase *CB, const SDLoc &dl,
5503 bool hasNest,
5504 const PPCSubtarget &Subtarget) {
5505 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5506 // entry point, but to the function descriptor (the function entry point
5507 // address is part of the function descriptor though).
5508 // The function descriptor is a three doubleword structure with the
5509 // following fields: function entry point, TOC base address and
5510 // environment pointer.
5511 // Thus for a call through a function pointer, the following actions need
5512 // to be performed:
5513 // 1. Save the TOC of the caller in the TOC save area of its stack
5514 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5515 // 2. Load the address of the function entry point from the function
5516 // descriptor.
5517 // 3. Load the TOC of the callee from the function descriptor into r2.
5518 // 4. Load the environment pointer from the function descriptor into
5519 // r11.
5520 // 5. Branch to the function entry point address.
5521 // 6. On return of the callee, the TOC of the caller needs to be
5522 // restored (this is done in FinishCall()).
5523 //
5524 // The loads are scheduled at the beginning of the call sequence, and the
5525 // register copies are flagged together to ensure that no other
5526 // operations can be scheduled in between. E.g. without flagging the
5527 // copies together, a TOC access in the caller could be scheduled between
5528 // the assignment of the callee TOC and the branch to the callee, which leads
5529 // to incorrect code.
5530
5531 // Start by loading the function address from the descriptor.
5532 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5533 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5537
5538 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5539
5540 // Registers used in building the DAG.
5541 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5542 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5543
5544 // Offsets of descriptor members.
5545 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5546 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5547
5548 const MVT RegVT = Subtarget.getScalarIntVT();
5549 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5550
5551 // One load for the functions entry point address.
5552 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5553 Alignment, MMOFlags);
5554
5555 // One for loading the TOC anchor for the module that contains the called
5556 // function.
5557 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5558 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5559 SDValue TOCPtr =
5560 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5561 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5562
5563 // One for loading the environment pointer.
5564 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5565 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5566 SDValue LoadEnvPtr =
5567 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5568 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5569
5570
5571 // Then copy the newly loaded TOC anchor to the TOC pointer.
5572 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5573 Chain = TOCVal.getValue(0);
5574 Glue = TOCVal.getValue(1);
5575
5576 // If the function call has an explicit 'nest' parameter, it takes the
5577 // place of the environment pointer.
5578 assert((!hasNest || !Subtarget.isAIXABI()) &&
5579 "Nest parameter is not supported on AIX.");
5580 if (!hasNest) {
5581 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5582 Chain = EnvVal.getValue(0);
5583 Glue = EnvVal.getValue(1);
5584 }
5585
5586 // The rest of the indirect call sequence is the same as the non-descriptor
5587 // DAG.
5588 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5589}
5590
5591static void
5593 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5594 SelectionDAG &DAG,
5595 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5596 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5597 const PPCSubtarget &Subtarget) {
5598 const bool IsPPC64 = Subtarget.isPPC64();
5599 // MVT for a general purpose register.
5600 const MVT RegVT = Subtarget.getScalarIntVT();
5601
5602 // First operand is always the chain.
5603 Ops.push_back(Chain);
5604
5605 // If it's a direct call pass the callee as the second operand.
5606 if (!CFlags.IsIndirect)
5607 Ops.push_back(Callee);
5608 else {
5609 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5610
5611 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5612 // on the stack (this would have been done in `LowerCall_64SVR4` or
5613 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5614 // represents both the indirect branch and a load that restores the TOC
5615 // pointer from the linkage area. The operand for the TOC restore is an add
5616 // of the TOC save offset to the stack pointer. This must be the second
5617 // operand: after the chain input but before any other variadic arguments.
5618 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5619 // saved or used.
5620 if (isTOCSaveRestoreRequired(Subtarget)) {
5621 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5622
5623 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5624 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5625 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5626 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5627 Ops.push_back(AddTOC);
5628 }
5629
5630 // Add the register used for the environment pointer.
5631 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5632 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5633 RegVT));
5634
5635
5636 // Add CTR register as callee so a bctr can be emitted later.
5637 if (CFlags.IsTailCall)
5638 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5639 }
5640
5641 // If this is a tail call add stack pointer delta.
5642 if (CFlags.IsTailCall)
5643 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5644
5645 // Add argument registers to the end of the list so that they are known live
5646 // into the call.
5647 for (const auto &[Reg, N] : RegsToPass)
5648 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5649
5650 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5651 // no way to mark dependencies as implicit here.
5652 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5653 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5654 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5655 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5656
5657 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5658 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5659 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5660
5661 // Add a register mask operand representing the call-preserved registers.
5662 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5663 const uint32_t *Mask =
5664 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5665 assert(Mask && "Missing call preserved mask for calling convention");
5666 Ops.push_back(DAG.getRegisterMask(Mask));
5667
5668 // If the glue is valid, it is the last operand.
5669 if (Glue.getNode())
5670 Ops.push_back(Glue);
5671}
5672
5673SDValue PPCTargetLowering::FinishCall(
5674 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5675 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5676 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5677 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5678 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5679
5680 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5681 Subtarget.isAIXABI())
5682 setUsesTOCBasePtr(DAG);
5683
5684 unsigned CallOpc =
5685 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5686 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5687
5688 if (!CFlags.IsIndirect)
5689 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5690 else if (Subtarget.usesFunctionDescriptors())
5691 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5692 dl, CFlags.HasNest, Subtarget);
5693 else
5694 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5695
5696 // Build the operand list for the call instruction.
5698 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5699 SPDiff, Subtarget);
5700
5701 // Emit tail call.
5702 if (CFlags.IsTailCall) {
5703 // Indirect tail call when using PC Relative calls do not have the same
5704 // constraints.
5705 assert(((Callee.getOpcode() == ISD::Register &&
5706 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5707 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5708 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5709 isa<ConstantSDNode>(Callee) ||
5710 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5711 "Expecting a global address, external symbol, absolute value, "
5712 "register or an indirect tail call when PC Relative calls are "
5713 "used.");
5714 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5715 assert(CallOpc == PPCISD::TC_RETURN &&
5716 "Unexpected call opcode for a tail call.");
5718 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5719 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5720 return Ret;
5721 }
5722
5723 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5724 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5725 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5726 Glue = Chain.getValue(1);
5727
5728 // When performing tail call optimization the callee pops its arguments off
5729 // the stack. Account for this here so these bytes can be pushed back on in
5730 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5731 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5733 ? NumBytes
5734 : 0;
5735
5736 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5737 Glue = Chain.getValue(1);
5738
5739 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5740 DAG, InVals);
5741}
5742
5744 CallingConv::ID CalleeCC = CB->getCallingConv();
5745 const Function *CallerFunc = CB->getCaller();
5746 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5747 const Function *CalleeFunc = CB->getCalledFunction();
5748 if (!CalleeFunc)
5749 return false;
5750 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5751
5754
5755 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5756 CalleeFunc->getAttributes(), Outs, *this,
5757 CalleeFunc->getDataLayout());
5758
5759 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5760 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5761 false /*isCalleeExternalSymbol*/);
5762}
5763
5764bool PPCTargetLowering::isEligibleForTCO(
5765 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5766 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5768 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5769 bool isCalleeExternalSymbol) const {
5770 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5771 return false;
5772
5773 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5774 return IsEligibleForTailCallOptimization_64SVR4(
5775 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5776 isCalleeExternalSymbol);
5777 else
5778 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5779 isVarArg, Ins);
5780}
5781
5782SDValue
5783PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5784 SmallVectorImpl<SDValue> &InVals) const {
5785 SelectionDAG &DAG = CLI.DAG;
5786 SDLoc &dl = CLI.DL;
5788 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5790 SDValue Chain = CLI.Chain;
5791 SDValue Callee = CLI.Callee;
5792 bool &isTailCall = CLI.IsTailCall;
5793 CallingConv::ID CallConv = CLI.CallConv;
5794 bool isVarArg = CLI.IsVarArg;
5795 bool isPatchPoint = CLI.IsPatchPoint;
5796 const CallBase *CB = CLI.CB;
5797
5798 if (isTailCall) {
5800 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5801 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5802 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5803 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5804
5805 isTailCall =
5806 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5807 &(MF.getFunction()), IsCalleeExternalSymbol);
5808 if (isTailCall) {
5809 ++NumTailCalls;
5810 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5811 ++NumSiblingCalls;
5812
5813 // PC Relative calls no longer guarantee that the callee is a Global
5814 // Address Node. The callee could be an indirect tail call in which
5815 // case the SDValue for the callee could be a load (to load the address
5816 // of a function pointer) or it may be a register copy (to move the
5817 // address of the callee from a function parameter into a virtual
5818 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5819 assert((Subtarget.isUsingPCRelativeCalls() ||
5820 isa<GlobalAddressSDNode>(Callee)) &&
5821 "Callee should be an llvm::Function object.");
5822
5823 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5824 << "\nTCO callee: ");
5825 LLVM_DEBUG(Callee.dump());
5826 }
5827 }
5828
5829 if (!isTailCall && CB && CB->isMustTailCall())
5830 report_fatal_error("failed to perform tail call elimination on a call "
5831 "site marked musttail");
5832
5833 // When long calls (i.e. indirect calls) are always used, calls are always
5834 // made via function pointer. If we have a function name, first translate it
5835 // into a pointer.
5836 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5837 !isTailCall)
5838 Callee = LowerGlobalAddress(Callee, DAG);
5839
5840 CallFlags CFlags(
5841 CallConv, isTailCall, isVarArg, isPatchPoint,
5842 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5843 // hasNest
5844 Subtarget.is64BitELFABI() &&
5845 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5846 CLI.NoMerge);
5847
5848 if (Subtarget.isAIXABI())
5849 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5850 InVals, CB);
5851
5852 assert(Subtarget.isSVR4ABI());
5853 if (Subtarget.isPPC64())
5854 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5855 InVals, CB);
5856 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5857 InVals, CB);
5858}
5859
5860SDValue PPCTargetLowering::LowerCall_32SVR4(
5861 SDValue Chain, SDValue Callee, CallFlags CFlags,
5863 const SmallVectorImpl<SDValue> &OutVals,
5864 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5866 const CallBase *CB) const {
5867 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5868 // of the 32-bit SVR4 ABI stack frame layout.
5869
5870 const CallingConv::ID CallConv = CFlags.CallConv;
5871 const bool IsVarArg = CFlags.IsVarArg;
5872 const bool IsTailCall = CFlags.IsTailCall;
5873
5874 assert((CallConv == CallingConv::C ||
5875 CallConv == CallingConv::Cold ||
5876 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5877
5878 const Align PtrAlign(4);
5879
5880 MachineFunction &MF = DAG.getMachineFunction();
5881
5882 // Mark this function as potentially containing a function that contains a
5883 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5884 // and restoring the callers stack pointer in this functions epilog. This is
5885 // done because by tail calling the called function might overwrite the value
5886 // in this function's (MF) stack pointer stack slot 0(SP).
5887 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5888 CallConv == CallingConv::Fast)
5889 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5890
5891 // Count how many bytes are to be pushed on the stack, including the linkage
5892 // area, parameter list area and the part of the local variable space which
5893 // contains copies of aggregates which are passed by value.
5894
5895 // Assign locations to all of the outgoing arguments.
5897 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5898
5899 // Reserve space for the linkage area on the stack.
5900 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5901 PtrAlign);
5902
5903 if (IsVarArg) {
5904 // Handle fixed and variable vector arguments differently.
5905 // Fixed vector arguments go into registers as long as registers are
5906 // available. Variable vector arguments always go into memory.
5907 unsigned NumArgs = Outs.size();
5908
5909 for (unsigned i = 0; i != NumArgs; ++i) {
5910 MVT ArgVT = Outs[i].VT;
5911 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5912 bool Result;
5913
5914 if (!ArgFlags.isVarArg()) {
5915 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5916 Outs[i].OrigTy, CCInfo);
5917 } else {
5919 ArgFlags, Outs[i].OrigTy, CCInfo);
5920 }
5921
5922 if (Result) {
5923#ifndef NDEBUG
5924 errs() << "Call operand #" << i << " has unhandled type "
5925 << ArgVT << "\n";
5926#endif
5927 llvm_unreachable(nullptr);
5928 }
5929 }
5930 } else {
5931 // All arguments are treated the same.
5932 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5933 }
5934
5935 // Assign locations to all of the outgoing aggregate by value arguments.
5936 SmallVector<CCValAssign, 16> ByValArgLocs;
5937 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5938
5939 // Reserve stack space for the allocations in CCInfo.
5940 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5941
5942 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5943
5944 // Size of the linkage area, parameter list area and the part of the local
5945 // space variable where copies of aggregates which are passed by value are
5946 // stored.
5947 unsigned NumBytes = CCByValInfo.getStackSize();
5948
5949 // Calculate by how many bytes the stack has to be adjusted in case of tail
5950 // call optimization.
5951 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5952
5953 // Adjust the stack pointer for the new arguments...
5954 // These operations are automatically eliminated by the prolog/epilog pass
5955 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5956 SDValue CallSeqStart = Chain;
5957
5958 // Load the return address and frame pointer so it can be moved somewhere else
5959 // later.
5960 SDValue LROp, FPOp;
5961 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5962
5963 // Set up a copy of the stack pointer for use loading and storing any
5964 // arguments that may not fit in the registers available for argument
5965 // passing.
5966 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5967
5969 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5970 SmallVector<SDValue, 8> MemOpChains;
5971
5972 bool seenFloatArg = false;
5973 // Walk the register/memloc assignments, inserting copies/loads.
5974 // i - Tracks the index into the list of registers allocated for the call
5975 // RealArgIdx - Tracks the index into the list of actual function arguments
5976 // j - Tracks the index into the list of byval arguments
5977 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5978 i != e;
5979 ++i, ++RealArgIdx) {
5980 CCValAssign &VA = ArgLocs[i];
5981 SDValue Arg = OutVals[RealArgIdx];
5982 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5983
5984 if (Flags.isByVal()) {
5985 // Argument is an aggregate which is passed by value, thus we need to
5986 // create a copy of it in the local variable space of the current stack
5987 // frame (which is the stack frame of the caller) and pass the address of
5988 // this copy to the callee.
5989 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5990 CCValAssign &ByValVA = ByValArgLocs[j++];
5991 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5992
5993 // Memory reserved in the local variable space of the callers stack frame.
5994 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5995
5996 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5997 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5998 StackPtr, PtrOff);
5999
6000 // Create a copy of the argument in the local area of the current
6001 // stack frame.
6002 SDValue MemcpyCall =
6003 CreateCopyOfByValArgument(Arg, PtrOff,
6004 CallSeqStart.getNode()->getOperand(0),
6005 Flags, DAG, dl);
6006
6007 // This must go outside the CALLSEQ_START..END.
6008 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6009 SDLoc(MemcpyCall));
6010 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6011 NewCallSeqStart.getNode());
6012 Chain = CallSeqStart = NewCallSeqStart;
6013
6014 // Pass the address of the aggregate copy on the stack either in a
6015 // physical register or in the parameter list area of the current stack
6016 // frame to the callee.
6017 Arg = PtrOff;
6018 }
6019
6020 // When useCRBits() is true, there can be i1 arguments.
6021 // It is because getRegisterType(MVT::i1) => MVT::i1,
6022 // and for other integer types getRegisterType() => MVT::i32.
6023 // Extend i1 and ensure callee will get i32.
6024 if (Arg.getValueType() == MVT::i1)
6025 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6026 dl, MVT::i32, Arg);
6027
6028 if (VA.isRegLoc()) {
6029 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6030 // Put argument in a physical register.
6031 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6032 bool IsLE = Subtarget.isLittleEndian();
6033 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6034 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6035 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6036 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6037 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6038 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6039 SVal.getValue(0)));
6040 } else
6041 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6042 } else {
6043 // Put argument in the parameter list area of the current stack frame.
6044 assert(VA.isMemLoc());
6045 unsigned LocMemOffset = VA.getLocMemOffset();
6046
6047 if (!IsTailCall) {
6048 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6049 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6050 StackPtr, PtrOff);
6051
6052 MemOpChains.push_back(
6053 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6054 } else {
6055 // Calculate and remember argument location.
6056 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6057 TailCallArguments);
6058 }
6059 }
6060 }
6061
6062 if (!MemOpChains.empty())
6063 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6064
6065 // Build a sequence of copy-to-reg nodes chained together with token chain
6066 // and flag operands which copy the outgoing args into the appropriate regs.
6067 SDValue InGlue;
6068 for (const auto &[Reg, N] : RegsToPass) {
6069 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6070 InGlue = Chain.getValue(1);
6071 }
6072
6073 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6074 // registers.
6075 if (IsVarArg) {
6076 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6077 SDValue Ops[] = { Chain, InGlue };
6078
6079 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6080 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6081
6082 InGlue = Chain.getValue(1);
6083 }
6084
6085 if (IsTailCall)
6086 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6087 TailCallArguments);
6088
6089 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6090 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6091}
6092
6093// Copy an argument into memory, being careful to do this outside the
6094// call sequence for the call to which the argument belongs.
6095SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6096 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6097 SelectionDAG &DAG, const SDLoc &dl) const {
6098 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6099 CallSeqStart.getNode()->getOperand(0),
6100 Flags, DAG, dl);
6101 // The MEMCPY must go outside the CALLSEQ_START..END.
6102 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6103 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6104 SDLoc(MemcpyCall));
6105 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6106 NewCallSeqStart.getNode());
6107 return NewCallSeqStart;
6108}
6109
6110SDValue PPCTargetLowering::LowerCall_64SVR4(
6111 SDValue Chain, SDValue Callee, CallFlags CFlags,
6113 const SmallVectorImpl<SDValue> &OutVals,
6114 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6116 const CallBase *CB) const {
6117 bool isELFv2ABI = Subtarget.isELFv2ABI();
6118 bool isLittleEndian = Subtarget.isLittleEndian();
6119 unsigned NumOps = Outs.size();
6120 bool IsSibCall = false;
6121 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6122
6123 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6124 unsigned PtrByteSize = 8;
6125
6126 MachineFunction &MF = DAG.getMachineFunction();
6127
6128 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6129 IsSibCall = true;
6130
6131 // Mark this function as potentially containing a function that contains a
6132 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6133 // and restoring the callers stack pointer in this functions epilog. This is
6134 // done because by tail calling the called function might overwrite the value
6135 // in this function's (MF) stack pointer stack slot 0(SP).
6136 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6137 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6138
6139 assert(!(IsFastCall && CFlags.IsVarArg) &&
6140 "fastcc not supported on varargs functions");
6141
6142 // Count how many bytes are to be pushed on the stack, including the linkage
6143 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6144 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6145 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6146 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6147 unsigned NumBytes = LinkageSize;
6148 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6149
6150 static const MCPhysReg GPR[] = {
6151 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6152 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6153 };
6154 static const MCPhysReg VR[] = {
6155 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6156 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6157 };
6158
6159 const unsigned NumGPRs = std::size(GPR);
6160 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6161 const unsigned NumVRs = std::size(VR);
6162
6163 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6164 // can be passed to the callee in registers.
6165 // For the fast calling convention, there is another check below.
6166 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6167 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6168 if (!HasParameterArea) {
6169 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6170 unsigned AvailableFPRs = NumFPRs;
6171 unsigned AvailableVRs = NumVRs;
6172 unsigned NumBytesTmp = NumBytes;
6173 for (unsigned i = 0; i != NumOps; ++i) {
6174 if (Outs[i].Flags.isNest()) continue;
6175 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6176 PtrByteSize, LinkageSize, ParamAreaSize,
6177 NumBytesTmp, AvailableFPRs, AvailableVRs))
6178 HasParameterArea = true;
6179 }
6180 }
6181
6182 // When using the fast calling convention, we don't provide backing for
6183 // arguments that will be in registers.
6184 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6185
6186 // Avoid allocating parameter area for fastcc functions if all the arguments
6187 // can be passed in the registers.
6188 if (IsFastCall)
6189 HasParameterArea = false;
6190
6191 // Add up all the space actually used.
6192 for (unsigned i = 0; i != NumOps; ++i) {
6193 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6194 EVT ArgVT = Outs[i].VT;
6195 EVT OrigVT = Outs[i].ArgVT;
6196
6197 if (Flags.isNest())
6198 continue;
6199
6200 if (IsFastCall) {
6201 if (Flags.isByVal()) {
6202 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6203 if (NumGPRsUsed > NumGPRs)
6204 HasParameterArea = true;
6205 } else {
6206 switch (ArgVT.getSimpleVT().SimpleTy) {
6207 default: llvm_unreachable("Unexpected ValueType for argument!");
6208 case MVT::i1:
6209 case MVT::i32:
6210 case MVT::i64:
6211 if (++NumGPRsUsed <= NumGPRs)
6212 continue;
6213 break;
6214 case MVT::v4i32:
6215 case MVT::v8i16:
6216 case MVT::v16i8:
6217 case MVT::v2f64:
6218 case MVT::v2i64:
6219 case MVT::v1i128:
6220 case MVT::f128:
6221 if (++NumVRsUsed <= NumVRs)
6222 continue;
6223 break;
6224 case MVT::v4f32:
6225 if (++NumVRsUsed <= NumVRs)
6226 continue;
6227 break;
6228 case MVT::f32:
6229 case MVT::f64:
6230 if (++NumFPRsUsed <= NumFPRs)
6231 continue;
6232 break;
6233 }
6234 HasParameterArea = true;
6235 }
6236 }
6237
6238 /* Respect alignment of argument on the stack. */
6239 auto Alignement =
6240 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6241 NumBytes = alignTo(NumBytes, Alignement);
6242
6243 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6244 if (Flags.isInConsecutiveRegsLast())
6245 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6246 }
6247
6248 unsigned NumBytesActuallyUsed = NumBytes;
6249
6250 // In the old ELFv1 ABI,
6251 // the prolog code of the callee may store up to 8 GPR argument registers to
6252 // the stack, allowing va_start to index over them in memory if its varargs.
6253 // Because we cannot tell if this is needed on the caller side, we have to
6254 // conservatively assume that it is needed. As such, make sure we have at
6255 // least enough stack space for the caller to store the 8 GPRs.
6256 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6257 // really requires memory operands, e.g. a vararg function.
6258 if (HasParameterArea)
6259 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6260 else
6261 NumBytes = LinkageSize;
6262
6263 // Tail call needs the stack to be aligned.
6264 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6265 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6266
6267 int SPDiff = 0;
6268
6269 // Calculate by how many bytes the stack has to be adjusted in case of tail
6270 // call optimization.
6271 if (!IsSibCall)
6272 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6273
6274 // To protect arguments on the stack from being clobbered in a tail call,
6275 // force all the loads to happen before doing any other lowering.
6276 if (CFlags.IsTailCall)
6277 Chain = DAG.getStackArgumentTokenFactor(Chain);
6278
6279 // Adjust the stack pointer for the new arguments...
6280 // These operations are automatically eliminated by the prolog/epilog pass
6281 if (!IsSibCall)
6282 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6283 SDValue CallSeqStart = Chain;
6284
6285 // Load the return address and frame pointer so it can be move somewhere else
6286 // later.
6287 SDValue LROp, FPOp;
6288 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6289
6290 // Set up a copy of the stack pointer for use loading and storing any
6291 // arguments that may not fit in the registers available for argument
6292 // passing.
6293 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6294
6295 // Figure out which arguments are going to go in registers, and which in
6296 // memory. Also, if this is a vararg function, floating point operations
6297 // must be stored to our stack, and loaded into integer regs as well, if
6298 // any integer regs are available for argument passing.
6299 unsigned ArgOffset = LinkageSize;
6300
6302 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6303
6304 SmallVector<SDValue, 8> MemOpChains;
6305 for (unsigned i = 0; i != NumOps; ++i) {
6306 SDValue Arg = OutVals[i];
6307 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6308 EVT ArgVT = Outs[i].VT;
6309 EVT OrigVT = Outs[i].ArgVT;
6310
6311 // PtrOff will be used to store the current argument to the stack if a
6312 // register cannot be found for it.
6313 SDValue PtrOff;
6314
6315 // We re-align the argument offset for each argument, except when using the
6316 // fast calling convention, when we need to make sure we do that only when
6317 // we'll actually use a stack slot.
6318 auto ComputePtrOff = [&]() {
6319 /* Respect alignment of argument on the stack. */
6320 auto Alignment =
6321 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6322 ArgOffset = alignTo(ArgOffset, Alignment);
6323
6324 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6325
6326 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6327 };
6328
6329 if (!IsFastCall) {
6330 ComputePtrOff();
6331
6332 /* Compute GPR index associated with argument offset. */
6333 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6334 GPR_idx = std::min(GPR_idx, NumGPRs);
6335 }
6336
6337 // Promote integers to 64-bit values.
6338 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6339 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6340 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6341 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6342 }
6343
6344 // FIXME memcpy is used way more than necessary. Correctness first.
6345 // Note: "by value" is code for passing a structure by value, not
6346 // basic types.
6347 if (Flags.isByVal()) {
6348 // Note: Size includes alignment padding, so
6349 // struct x { short a; char b; }
6350 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6351 // These are the proper values we need for right-justifying the
6352 // aggregate in a parameter register.
6353 unsigned Size = Flags.getByValSize();
6354
6355 // An empty aggregate parameter takes up no storage and no
6356 // registers.
6357 if (Size == 0)
6358 continue;
6359
6360 if (IsFastCall)
6361 ComputePtrOff();
6362
6363 // All aggregates smaller than 8 bytes must be passed right-justified.
6364 if (Size==1 || Size==2 || Size==4) {
6365 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6366 if (GPR_idx != NumGPRs) {
6367 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6368 MachinePointerInfo(), VT);
6369 MemOpChains.push_back(Load.getValue(1));
6370 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6371
6372 ArgOffset += PtrByteSize;
6373 continue;
6374 }
6375 }
6376
6377 if (GPR_idx == NumGPRs && Size < 8) {
6378 SDValue AddPtr = PtrOff;
6379 if (!isLittleEndian) {
6380 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6381 PtrOff.getValueType());
6382 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6383 }
6384 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6385 CallSeqStart,
6386 Flags, DAG, dl);
6387 ArgOffset += PtrByteSize;
6388 continue;
6389 }
6390 // Copy the object to parameter save area if it can not be entirely passed
6391 // by registers.
6392 // FIXME: we only need to copy the parts which need to be passed in
6393 // parameter save area. For the parts passed by registers, we don't need
6394 // to copy them to the stack although we need to allocate space for them
6395 // in parameter save area.
6396 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6397 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6398 CallSeqStart,
6399 Flags, DAG, dl);
6400
6401 // When a register is available, pass a small aggregate right-justified.
6402 if (Size < 8 && GPR_idx != NumGPRs) {
6403 // The easiest way to get this right-justified in a register
6404 // is to copy the structure into the rightmost portion of a
6405 // local variable slot, then load the whole slot into the
6406 // register.
6407 // FIXME: The memcpy seems to produce pretty awful code for
6408 // small aggregates, particularly for packed ones.
6409 // FIXME: It would be preferable to use the slot in the
6410 // parameter save area instead of a new local variable.
6411 SDValue AddPtr = PtrOff;
6412 if (!isLittleEndian) {
6413 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6414 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6415 }
6416 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6417 CallSeqStart,
6418 Flags, DAG, dl);
6419
6420 // Load the slot into the register.
6421 SDValue Load =
6422 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6423 MemOpChains.push_back(Load.getValue(1));
6424 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6425
6426 // Done with this argument.
6427 ArgOffset += PtrByteSize;
6428 continue;
6429 }
6430
6431 // For aggregates larger than PtrByteSize, copy the pieces of the
6432 // object that fit into registers from the parameter save area.
6433 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6434 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6435 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6436 if (GPR_idx != NumGPRs) {
6437 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6438 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6439 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6440 MachinePointerInfo(), ObjType);
6441
6442 MemOpChains.push_back(Load.getValue(1));
6443 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6444 ArgOffset += PtrByteSize;
6445 } else {
6446 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6447 break;
6448 }
6449 }
6450 continue;
6451 }
6452
6453 switch (Arg.getSimpleValueType().SimpleTy) {
6454 default: llvm_unreachable("Unexpected ValueType for argument!");
6455 case MVT::i1:
6456 case MVT::i32:
6457 case MVT::i64:
6458 if (Flags.isNest()) {
6459 // The 'nest' parameter, if any, is passed in R11.
6460 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6461 break;
6462 }
6463
6464 // These can be scalar arguments or elements of an integer array type
6465 // passed directly. Clang may use those instead of "byval" aggregate
6466 // types to avoid forcing arguments to memory unnecessarily.
6467 if (GPR_idx != NumGPRs) {
6468 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6469 } else {
6470 if (IsFastCall)
6471 ComputePtrOff();
6472
6473 assert(HasParameterArea &&
6474 "Parameter area must exist to pass an argument in memory.");
6475 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6476 true, CFlags.IsTailCall, false, MemOpChains,
6477 TailCallArguments, dl);
6478 if (IsFastCall)
6479 ArgOffset += PtrByteSize;
6480 }
6481 if (!IsFastCall)
6482 ArgOffset += PtrByteSize;
6483 break;
6484 case MVT::f32:
6485 case MVT::f64: {
6486 // These can be scalar arguments or elements of a float array type
6487 // passed directly. The latter are used to implement ELFv2 homogenous
6488 // float aggregates.
6489
6490 // Named arguments go into FPRs first, and once they overflow, the
6491 // remaining arguments go into GPRs and then the parameter save area.
6492 // Unnamed arguments for vararg functions always go to GPRs and
6493 // then the parameter save area. For now, put all arguments to vararg
6494 // routines always in both locations (FPR *and* GPR or stack slot).
6495 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6496 bool NeededLoad = false;
6497
6498 // First load the argument into the next available FPR.
6499 if (FPR_idx != NumFPRs)
6500 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6501
6502 // Next, load the argument into GPR or stack slot if needed.
6503 if (!NeedGPROrStack)
6504 ;
6505 else if (GPR_idx != NumGPRs && !IsFastCall) {
6506 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6507 // once we support fp <-> gpr moves.
6508
6509 // In the non-vararg case, this can only ever happen in the
6510 // presence of f32 array types, since otherwise we never run
6511 // out of FPRs before running out of GPRs.
6512 SDValue ArgVal;
6513
6514 // Double values are always passed in a single GPR.
6515 if (Arg.getValueType() != MVT::f32) {
6516 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6517
6518 // Non-array float values are extended and passed in a GPR.
6519 } else if (!Flags.isInConsecutiveRegs()) {
6520 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6521 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6522
6523 // If we have an array of floats, we collect every odd element
6524 // together with its predecessor into one GPR.
6525 } else if (ArgOffset % PtrByteSize != 0) {
6526 SDValue Lo, Hi;
6527 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6528 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6529 if (!isLittleEndian)
6530 std::swap(Lo, Hi);
6531 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6532
6533 // The final element, if even, goes into the first half of a GPR.
6534 } else if (Flags.isInConsecutiveRegsLast()) {
6535 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6536 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6537 if (!isLittleEndian)
6538 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6539 DAG.getConstant(32, dl, MVT::i32));
6540
6541 // Non-final even elements are skipped; they will be handled
6542 // together the with subsequent argument on the next go-around.
6543 } else
6544 ArgVal = SDValue();
6545
6546 if (ArgVal.getNode())
6547 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6548 } else {
6549 if (IsFastCall)
6550 ComputePtrOff();
6551
6552 // Single-precision floating-point values are mapped to the
6553 // second (rightmost) word of the stack doubleword.
6554 if (Arg.getValueType() == MVT::f32 &&
6555 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6556 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6557 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6558 }
6559
6560 assert(HasParameterArea &&
6561 "Parameter area must exist to pass an argument in memory.");
6562 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6563 true, CFlags.IsTailCall, false, MemOpChains,
6564 TailCallArguments, dl);
6565
6566 NeededLoad = true;
6567 }
6568 // When passing an array of floats, the array occupies consecutive
6569 // space in the argument area; only round up to the next doubleword
6570 // at the end of the array. Otherwise, each float takes 8 bytes.
6571 if (!IsFastCall || NeededLoad) {
6572 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6573 Flags.isInConsecutiveRegs()) ? 4 : 8;
6574 if (Flags.isInConsecutiveRegsLast())
6575 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6576 }
6577 break;
6578 }
6579 case MVT::v4f32:
6580 case MVT::v4i32:
6581 case MVT::v8i16:
6582 case MVT::v16i8:
6583 case MVT::v2f64:
6584 case MVT::v2i64:
6585 case MVT::v1i128:
6586 case MVT::f128:
6587 // These can be scalar arguments or elements of a vector array type
6588 // passed directly. The latter are used to implement ELFv2 homogenous
6589 // vector aggregates.
6590
6591 // For a varargs call, named arguments go into VRs or on the stack as
6592 // usual; unnamed arguments always go to the stack or the corresponding
6593 // GPRs when within range. For now, we always put the value in both
6594 // locations (or even all three).
6595 if (CFlags.IsVarArg) {
6596 assert(HasParameterArea &&
6597 "Parameter area must exist if we have a varargs call.");
6598 // We could elide this store in the case where the object fits
6599 // entirely in R registers. Maybe later.
6600 SDValue Store =
6601 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6602 MemOpChains.push_back(Store);
6603 if (VR_idx != NumVRs) {
6604 SDValue Load =
6605 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6606 MemOpChains.push_back(Load.getValue(1));
6607 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6608 }
6609 ArgOffset += 16;
6610 for (unsigned i=0; i<16; i+=PtrByteSize) {
6611 if (GPR_idx == NumGPRs)
6612 break;
6613 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6614 DAG.getConstant(i, dl, PtrVT));
6615 SDValue Load =
6616 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6617 MemOpChains.push_back(Load.getValue(1));
6618 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6619 }
6620 break;
6621 }
6622
6623 // Non-varargs Altivec params go into VRs or on the stack.
6624 if (VR_idx != NumVRs) {
6625 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6626 } else {
6627 if (IsFastCall)
6628 ComputePtrOff();
6629
6630 assert(HasParameterArea &&
6631 "Parameter area must exist to pass an argument in memory.");
6632 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6633 true, CFlags.IsTailCall, true, MemOpChains,
6634 TailCallArguments, dl);
6635 if (IsFastCall)
6636 ArgOffset += 16;
6637 }
6638
6639 if (!IsFastCall)
6640 ArgOffset += 16;
6641 break;
6642 }
6643 }
6644
6645 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6646 "mismatch in size of parameter area");
6647 (void)NumBytesActuallyUsed;
6648
6649 if (!MemOpChains.empty())
6650 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6651
6652 // Check if this is an indirect call (MTCTR/BCTRL).
6653 // See prepareDescriptorIndirectCall and buildCallOperands for more
6654 // information about calls through function pointers in the 64-bit SVR4 ABI.
6655 if (CFlags.IsIndirect) {
6656 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6657 // caller in the TOC save area.
6658 if (isTOCSaveRestoreRequired(Subtarget)) {
6659 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6660 // Load r2 into a virtual register and store it to the TOC save area.
6661 setUsesTOCBasePtr(DAG);
6662 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6663 // TOC save area offset.
6664 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6665 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6666 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6667 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6669 DAG.getMachineFunction(), TOCSaveOffset));
6670 }
6671 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6672 // This does not mean the MTCTR instruction must use R12; it's easier
6673 // to model this as an extra parameter, so do that.
6674 if (isELFv2ABI && !CFlags.IsPatchPoint)
6675 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6676 }
6677
6678 // Build a sequence of copy-to-reg nodes chained together with token chain
6679 // and flag operands which copy the outgoing args into the appropriate regs.
6680 SDValue InGlue;
6681 for (const auto &[Reg, N] : RegsToPass) {
6682 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6683 InGlue = Chain.getValue(1);
6684 }
6685
6686 if (CFlags.IsTailCall && !IsSibCall)
6687 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6688 TailCallArguments);
6689
6690 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6691 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6692}
6693
6694// Returns true when the shadow of a general purpose argument register
6695// in the parameter save area is aligned to at least 'RequiredAlign'.
6696static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6697 assert(RequiredAlign.value() <= 16 &&
6698 "Required alignment greater than stack alignment.");
6699 switch (Reg) {
6700 default:
6701 report_fatal_error("called on invalid register.");
6702 case PPC::R5:
6703 case PPC::R9:
6704 case PPC::X3:
6705 case PPC::X5:
6706 case PPC::X7:
6707 case PPC::X9:
6708 // These registers are 16 byte aligned which is the most strict aligment
6709 // we can support.
6710 return true;
6711 case PPC::R3:
6712 case PPC::R7:
6713 case PPC::X4:
6714 case PPC::X6:
6715 case PPC::X8:
6716 case PPC::X10:
6717 // The shadow of these registers in the PSA is 8 byte aligned.
6718 return RequiredAlign <= 8;
6719 case PPC::R4:
6720 case PPC::R6:
6721 case PPC::R8:
6722 case PPC::R10:
6723 return RequiredAlign <= 4;
6724 }
6725}
6726
6727static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6728 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6729 Type *OrigTy, CCState &State) {
6730 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6731 State.getMachineFunction().getSubtarget());
6732 const bool IsPPC64 = Subtarget.isPPC64();
6733 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6734 const Align PtrAlign(PtrSize);
6735 const Align StackAlign(16);
6736 const MVT RegVT = Subtarget.getScalarIntVT();
6737
6738 if (ValVT == MVT::f128)
6739 report_fatal_error("f128 is unimplemented on AIX.");
6740
6741 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6742 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6743 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6744 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6745 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6746 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6747
6748 static const MCPhysReg VR[] = {// Vector registers.
6749 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6750 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6751 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6752
6753 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6754
6755 if (ArgFlags.isNest()) {
6756 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6757 if (!EnvReg)
6758 report_fatal_error("More then one nest argument.");
6759 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6760 return false;
6761 }
6762
6763 if (ArgFlags.isByVal()) {
6764 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6765 if (ByValAlign > StackAlign)
6766 report_fatal_error("Pass-by-value arguments with alignment greater than "
6767 "16 are not supported.");
6768
6769 const unsigned ByValSize = ArgFlags.getByValSize();
6770 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6771
6772 // An empty aggregate parameter takes up no storage and no registers,
6773 // but needs a MemLoc for a stack slot for the formal arguments side.
6774 if (ByValSize == 0) {
6776 State.getStackSize(), RegVT, LocInfo));
6777 return false;
6778 }
6779
6780 // Shadow allocate any registers that are not properly aligned.
6781 unsigned NextReg = State.getFirstUnallocated(GPRs);
6782 while (NextReg != GPRs.size() &&
6783 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6784 // Shadow allocate next registers since its aligment is not strict enough.
6785 MCRegister Reg = State.AllocateReg(GPRs);
6786 // Allocate the stack space shadowed by said register.
6787 State.AllocateStack(PtrSize, PtrAlign);
6788 assert(Reg && "Alocating register unexpectedly failed.");
6789 (void)Reg;
6790 NextReg = State.getFirstUnallocated(GPRs);
6791 }
6792
6793 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6794 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6795 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6796 if (MCRegister Reg = State.AllocateReg(GPRs))
6797 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6798 else {
6801 LocInfo));
6802 break;
6803 }
6804 }
6805 return false;
6806 }
6807
6808 // Arguments always reserve parameter save area.
6809 switch (ValVT.SimpleTy) {
6810 default:
6811 report_fatal_error("Unhandled value type for argument.");
6812 case MVT::i64:
6813 // i64 arguments should have been split to i32 for PPC32.
6814 assert(IsPPC64 && "PPC32 should have split i64 values.");
6815 [[fallthrough]];
6816 case MVT::i1:
6817 case MVT::i32: {
6818 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6819 // AIX integer arguments are always passed in register width.
6820 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6821 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6823 if (MCRegister Reg = State.AllocateReg(GPRs))
6824 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6825 else
6826 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6827
6828 return false;
6829 }
6830 case MVT::f32:
6831 case MVT::f64: {
6832 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6833 const unsigned StoreSize = LocVT.getStoreSize();
6834 // Floats are always 4-byte aligned in the PSA on AIX.
6835 // This includes f64 in 64-bit mode for ABI compatibility.
6836 const unsigned Offset =
6837 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6838 MCRegister FReg = State.AllocateReg(FPR);
6839 if (FReg)
6840 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6841
6842 // Reserve and initialize GPRs or initialize the PSA as required.
6843 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6844 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6845 assert(FReg && "An FPR should be available when a GPR is reserved.");
6846 if (State.isVarArg()) {
6847 // Successfully reserved GPRs are only initialized for vararg calls.
6848 // Custom handling is required for:
6849 // f64 in PPC32 needs to be split into 2 GPRs.
6850 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6851 State.addLoc(
6852 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6853 }
6854 } else {
6855 // If there are insufficient GPRs, the PSA needs to be initialized.
6856 // Initialization occurs even if an FPR was initialized for
6857 // compatibility with the AIX XL compiler. The full memory for the
6858 // argument will be initialized even if a prior word is saved in GPR.
6859 // A custom memLoc is used when the argument also passes in FPR so
6860 // that the callee handling can skip over it easily.
6861 State.addLoc(
6862 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6863 LocInfo)
6864 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6865 break;
6866 }
6867 }
6868
6869 return false;
6870 }
6871 case MVT::v4f32:
6872 case MVT::v4i32:
6873 case MVT::v8i16:
6874 case MVT::v16i8:
6875 case MVT::v2i64:
6876 case MVT::v2f64:
6877 case MVT::v1i128: {
6878 const unsigned VecSize = 16;
6879 const Align VecAlign(VecSize);
6880
6881 if (!State.isVarArg()) {
6882 // If there are vector registers remaining we don't consume any stack
6883 // space.
6884 if (MCRegister VReg = State.AllocateReg(VR)) {
6885 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6886 return false;
6887 }
6888 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6889 // might be allocated in the portion of the PSA that is shadowed by the
6890 // GPRs.
6891 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6892 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6893 return false;
6894 }
6895
6896 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6897 // Burn any underaligned registers and their shadowed stack space until
6898 // we reach the required alignment.
6899 while (NextRegIndex != GPRs.size() &&
6900 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6901 // Shadow allocate register and its stack shadow.
6902 MCRegister Reg = State.AllocateReg(GPRs);
6903 State.AllocateStack(PtrSize, PtrAlign);
6904 assert(Reg && "Allocating register unexpectedly failed.");
6905 (void)Reg;
6906 NextRegIndex = State.getFirstUnallocated(GPRs);
6907 }
6908
6909 // Vectors that are passed as fixed arguments are handled differently.
6910 // They are passed in VRs if any are available (unlike arguments passed
6911 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6912 // functions)
6913 if (!ArgFlags.isVarArg()) {
6914 if (MCRegister VReg = State.AllocateReg(VR)) {
6915 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6916 // Shadow allocate GPRs and stack space even though we pass in a VR.
6917 for (unsigned I = 0; I != VecSize; I += PtrSize)
6918 State.AllocateReg(GPRs);
6919 State.AllocateStack(VecSize, VecAlign);
6920 return false;
6921 }
6922 // No vector registers remain so pass on the stack.
6923 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6924 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6925 return false;
6926 }
6927
6928 // If all GPRS are consumed then we pass the argument fully on the stack.
6929 if (NextRegIndex == GPRs.size()) {
6930 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6931 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6932 return false;
6933 }
6934
6935 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6936 // half of the argument, and then need to pass the remaining half on the
6937 // stack.
6938 if (GPRs[NextRegIndex] == PPC::R9) {
6939 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6940 State.addLoc(
6941 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6942
6943 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
6944 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
6945 assert(FirstReg && SecondReg &&
6946 "Allocating R9 or R10 unexpectedly failed.");
6947 State.addLoc(
6948 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6949 State.addLoc(
6950 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6951 return false;
6952 }
6953
6954 // We have enough GPRs to fully pass the vector argument, and we have
6955 // already consumed any underaligned registers. Start with the custom
6956 // MemLoc and then the custom RegLocs.
6957 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6958 State.addLoc(
6959 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6960 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6961 const MCRegister Reg = State.AllocateReg(GPRs);
6962 assert(Reg && "Failed to allocated register for vararg vector argument");
6963 State.addLoc(
6964 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6965 }
6966 return false;
6967 }
6968 }
6969 return true;
6970}
6971
6972// So far, this function is only used by LowerFormalArguments_AIX()
6974 bool IsPPC64,
6975 bool HasP8Vector,
6976 bool HasVSX) {
6977 assert((IsPPC64 || SVT != MVT::i64) &&
6978 "i64 should have been split for 32-bit codegen.");
6979
6980 switch (SVT) {
6981 default:
6982 report_fatal_error("Unexpected value type for formal argument");
6983 case MVT::i1:
6984 case MVT::i32:
6985 case MVT::i64:
6986 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6987 case MVT::f32:
6988 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6989 case MVT::f64:
6990 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6991 case MVT::v4f32:
6992 case MVT::v4i32:
6993 case MVT::v8i16:
6994 case MVT::v16i8:
6995 case MVT::v2i64:
6996 case MVT::v2f64:
6997 case MVT::v1i128:
6998 return &PPC::VRRCRegClass;
6999 }
7000}
7001
7003 SelectionDAG &DAG, SDValue ArgValue,
7004 MVT LocVT, const SDLoc &dl) {
7005 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7006 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7007
7008 if (Flags.isSExt())
7009 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7010 DAG.getValueType(ValVT));
7011 else if (Flags.isZExt())
7012 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7013 DAG.getValueType(ValVT));
7014
7015 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7016}
7017
7018static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7019 const unsigned LASize = FL->getLinkageSize();
7020
7021 if (PPC::GPRCRegClass.contains(Reg)) {
7022 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7023 "Reg must be a valid argument register!");
7024 return LASize + 4 * (Reg - PPC::R3);
7025 }
7026
7027 if (PPC::G8RCRegClass.contains(Reg)) {
7028 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7029 "Reg must be a valid argument register!");
7030 return LASize + 8 * (Reg - PPC::X3);
7031 }
7032
7033 llvm_unreachable("Only general purpose registers expected.");
7034}
7035
7036// AIX ABI Stack Frame Layout:
7037//
7038// Low Memory +--------------------------------------------+
7039// SP +---> | Back chain | ---+
7040// | +--------------------------------------------+ |
7041// | | Saved Condition Register | |
7042// | +--------------------------------------------+ |
7043// | | Saved Linkage Register | |
7044// | +--------------------------------------------+ | Linkage Area
7045// | | Reserved for compilers | |
7046// | +--------------------------------------------+ |
7047// | | Reserved for binders | |
7048// | +--------------------------------------------+ |
7049// | | Saved TOC pointer | ---+
7050// | +--------------------------------------------+
7051// | | Parameter save area |
7052// | +--------------------------------------------+
7053// | | Alloca space |
7054// | +--------------------------------------------+
7055// | | Local variable space |
7056// | +--------------------------------------------+
7057// | | Float/int conversion temporary |
7058// | +--------------------------------------------+
7059// | | Save area for AltiVec registers |
7060// | +--------------------------------------------+
7061// | | AltiVec alignment padding |
7062// | +--------------------------------------------+
7063// | | Save area for VRSAVE register |
7064// | +--------------------------------------------+
7065// | | Save area for General Purpose registers |
7066// | +--------------------------------------------+
7067// | | Save area for Floating Point registers |
7068// | +--------------------------------------------+
7069// +---- | Back chain |
7070// High Memory +--------------------------------------------+
7071//
7072// Specifications:
7073// AIX 7.2 Assembler Language Reference
7074// Subroutine linkage convention
7075
7076SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7077 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7078 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7079 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7080
7081 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7082 CallConv == CallingConv::Fast) &&
7083 "Unexpected calling convention!");
7084
7085 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7086 report_fatal_error("Tail call support is unimplemented on AIX.");
7087
7088 if (useSoftFloat())
7089 report_fatal_error("Soft float support is unimplemented on AIX.");
7090
7091 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7092
7093 const bool IsPPC64 = Subtarget.isPPC64();
7094 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7095
7096 // Assign locations to all of the incoming arguments.
7098 MachineFunction &MF = DAG.getMachineFunction();
7099 MachineFrameInfo &MFI = MF.getFrameInfo();
7100 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7101 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7102
7103 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7104 // Reserve space for the linkage area on the stack.
7105 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7106 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7107 uint64_t SaveStackPos = CCInfo.getStackSize();
7108 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7109 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7110
7112
7113 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7114 CCValAssign &VA = ArgLocs[I++];
7115 MVT LocVT = VA.getLocVT();
7116 MVT ValVT = VA.getValVT();
7117 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7118
7119 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7120 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7121 // For compatibility with the AIX XL compiler, the float args in the
7122 // parameter save area are initialized even if the argument is available
7123 // in register. The caller is required to initialize both the register
7124 // and memory, however, the callee can choose to expect it in either.
7125 // The memloc is dismissed here because the argument is retrieved from
7126 // the register.
7127 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7128 continue;
7129
7130 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7131 const TargetRegisterClass *RegClass = getRegClassForSVT(
7132 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7133 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7134 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7135 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7136 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7137 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7138 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7139 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7140 MachinePointerInfo(), Align(PtrByteSize));
7141 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7142 MemOps.push_back(StoreReg);
7143 }
7144
7145 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7146 unsigned StoreSize =
7147 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7148 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7149 }
7150
7151 auto HandleMemLoc = [&]() {
7152 const unsigned LocSize = LocVT.getStoreSize();
7153 const unsigned ValSize = ValVT.getStoreSize();
7154 assert((ValSize <= LocSize) &&
7155 "Object size is larger than size of MemLoc");
7156 int CurArgOffset = VA.getLocMemOffset();
7157 // Objects are right-justified because AIX is big-endian.
7158 if (LocSize > ValSize)
7159 CurArgOffset += LocSize - ValSize;
7160 // Potential tail calls could cause overwriting of argument stack slots.
7161 const bool IsImmutable =
7163 (CallConv == CallingConv::Fast));
7164 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7165 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7166 SDValue ArgValue =
7167 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7168
7169 // While the ABI specifies the argument type is (sign or zero) extended
7170 // out to register width, not all code is compliant. We truncate and
7171 // re-extend to be more forgiving of these callers when the argument type
7172 // is smaller than register width.
7173 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7174 ValVT.isInteger() &&
7175 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7176 // It is possible to have either real integer values
7177 // or integers that were not originally integers.
7178 // In the latter case, these could have came from structs,
7179 // and these integers would not have an extend on the parameter.
7180 // Since these types of integers do not have an extend specified
7181 // in the first place, the type of extend that we do should not matter.
7182 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7183 ? MVT::i8
7184 : ArgVT;
7185 SDValue ArgValueTrunc =
7186 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7187 SDValue ArgValueExt =
7188 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7189 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7190 InVals.push_back(ArgValueExt);
7191 } else {
7192 InVals.push_back(ArgValue);
7193 }
7194 };
7195
7196 // Vector arguments to VaArg functions are passed both on the stack, and
7197 // in any available GPRs. Load the value from the stack and add the GPRs
7198 // as live ins.
7199 if (VA.isMemLoc() && VA.needsCustom()) {
7200 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7201 assert(isVarArg && "Only use custom memloc for vararg.");
7202 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7203 // matching custom RegLocs.
7204 const unsigned OriginalValNo = VA.getValNo();
7205 (void)OriginalValNo;
7206
7207 auto HandleCustomVecRegLoc = [&]() {
7208 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7209 "Missing custom RegLoc.");
7210 VA = ArgLocs[I++];
7211 assert(VA.getValVT().isVector() &&
7212 "Unexpected Val type for custom RegLoc.");
7213 assert(VA.getValNo() == OriginalValNo &&
7214 "ValNo mismatch between custom MemLoc and RegLoc.");
7216 MF.addLiveIn(VA.getLocReg(),
7217 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7218 Subtarget.hasVSX()));
7219 };
7220
7221 HandleMemLoc();
7222 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7223 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7224 // R10.
7225 HandleCustomVecRegLoc();
7226 HandleCustomVecRegLoc();
7227
7228 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7229 // we passed the vector in R5, R6, R7 and R8.
7230 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7231 assert(!IsPPC64 &&
7232 "Only 2 custom RegLocs expected for 64-bit codegen.");
7233 HandleCustomVecRegLoc();
7234 HandleCustomVecRegLoc();
7235 }
7236
7237 continue;
7238 }
7239
7240 if (VA.isRegLoc()) {
7241 if (VA.getValVT().isScalarInteger())
7243 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7244 switch (VA.getValVT().SimpleTy) {
7245 default:
7246 report_fatal_error("Unhandled value type for argument.");
7247 case MVT::f32:
7249 break;
7250 case MVT::f64:
7252 break;
7253 }
7254 } else if (VA.getValVT().isVector()) {
7255 switch (VA.getValVT().SimpleTy) {
7256 default:
7257 report_fatal_error("Unhandled value type for argument.");
7258 case MVT::v16i8:
7260 break;
7261 case MVT::v8i16:
7263 break;
7264 case MVT::v4i32:
7265 case MVT::v2i64:
7266 case MVT::v1i128:
7268 break;
7269 case MVT::v4f32:
7270 case MVT::v2f64:
7272 break;
7273 }
7274 }
7275 }
7276
7277 if (Flags.isByVal() && VA.isMemLoc()) {
7278 const unsigned Size =
7279 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7280 PtrByteSize);
7281 const int FI = MF.getFrameInfo().CreateFixedObject(
7282 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7283 /* IsAliased */ true);
7284 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7285 InVals.push_back(FIN);
7286
7287 continue;
7288 }
7289
7290 if (Flags.isByVal()) {
7291 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7292
7293 const MCPhysReg ArgReg = VA.getLocReg();
7294 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7295
7296 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7297 const int FI = MF.getFrameInfo().CreateFixedObject(
7298 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7299 /* IsAliased */ true);
7300 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7301 InVals.push_back(FIN);
7302
7303 // Add live ins for all the RegLocs for the same ByVal.
7304 const TargetRegisterClass *RegClass =
7305 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7306
7307 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7308 unsigned Offset) {
7309 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7310 // Since the callers side has left justified the aggregate in the
7311 // register, we can simply store the entire register into the stack
7312 // slot.
7313 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7314 // The store to the fixedstack object is needed becuase accessing a
7315 // field of the ByVal will use a gep and load. Ideally we will optimize
7316 // to extracting the value from the register directly, and elide the
7317 // stores when the arguments address is not taken, but that will need to
7318 // be future work.
7319 SDValue Store = DAG.getStore(
7320 CopyFrom.getValue(1), dl, CopyFrom,
7323
7324 MemOps.push_back(Store);
7325 };
7326
7327 unsigned Offset = 0;
7328 HandleRegLoc(VA.getLocReg(), Offset);
7329 Offset += PtrByteSize;
7330 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7331 Offset += PtrByteSize) {
7332 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7333 "RegLocs should be for ByVal argument.");
7334
7335 const CCValAssign RL = ArgLocs[I++];
7336 HandleRegLoc(RL.getLocReg(), Offset);
7338 }
7339
7340 if (Offset != StackSize) {
7341 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7342 "Expected MemLoc for remaining bytes.");
7343 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7344 // Consume the MemLoc.The InVal has already been emitted, so nothing
7345 // more needs to be done.
7346 ++I;
7347 }
7348
7349 continue;
7350 }
7351
7352 if (VA.isRegLoc() && !VA.needsCustom()) {
7353 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7354 Register VReg =
7355 MF.addLiveIn(VA.getLocReg(),
7356 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7357 Subtarget.hasVSX()));
7358 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7359 if (ValVT.isScalarInteger() &&
7360 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7361 ArgValue =
7362 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7363 }
7364 InVals.push_back(ArgValue);
7365 continue;
7366 }
7367 if (VA.isMemLoc()) {
7368 HandleMemLoc();
7369 continue;
7370 }
7371 }
7372
7373 // On AIX a minimum of 8 words is saved to the parameter save area.
7374 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7375 // Area that is at least reserved in the caller of this function.
7376 unsigned CallerReservedArea = std::max<unsigned>(
7377 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7378
7379 // Set the size that is at least reserved in caller of this function. Tail
7380 // call optimized function's reserved stack space needs to be aligned so
7381 // that taking the difference between two stack areas will result in an
7382 // aligned stack.
7383 CallerReservedArea =
7384 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7385 FuncInfo->setMinReservedArea(CallerReservedArea);
7386
7387 if (isVarArg) {
7388 FuncInfo->setVarArgsFrameIndex(
7389 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7390 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7391
7392 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7393 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7394
7395 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7396 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7397 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7398
7399 // The fixed integer arguments of a variadic function are stored to the
7400 // VarArgsFrameIndex on the stack so that they may be loaded by
7401 // dereferencing the result of va_next.
7402 for (unsigned GPRIndex =
7403 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7404 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7405
7406 const Register VReg =
7407 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7408 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7409
7410 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7411 SDValue Store =
7412 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7413 MemOps.push_back(Store);
7414 // Increment the address for the next argument to store.
7415 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7416 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7417 }
7418 }
7419
7420 if (!MemOps.empty())
7421 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7422
7423 return Chain;
7424}
7425
7426SDValue PPCTargetLowering::LowerCall_AIX(
7427 SDValue Chain, SDValue Callee, CallFlags CFlags,
7429 const SmallVectorImpl<SDValue> &OutVals,
7430 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7432 const CallBase *CB) const {
7433 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7434 // AIX ABI stack frame layout.
7435
7436 assert((CFlags.CallConv == CallingConv::C ||
7437 CFlags.CallConv == CallingConv::Cold ||
7438 CFlags.CallConv == CallingConv::Fast) &&
7439 "Unexpected calling convention!");
7440
7441 if (CFlags.IsPatchPoint)
7442 report_fatal_error("This call type is unimplemented on AIX.");
7443
7444 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7445
7446 MachineFunction &MF = DAG.getMachineFunction();
7448 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7449 *DAG.getContext());
7450
7451 // Reserve space for the linkage save area (LSA) on the stack.
7452 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7453 // [SP][CR][LR][2 x reserved][TOC].
7454 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7455 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7456 const bool IsPPC64 = Subtarget.isPPC64();
7457 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7458 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7459 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7460 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7461
7462 // The prolog code of the callee may store up to 8 GPR argument registers to
7463 // the stack, allowing va_start to index over them in memory if the callee
7464 // is variadic.
7465 // Because we cannot tell if this is needed on the caller side, we have to
7466 // conservatively assume that it is needed. As such, make sure we have at
7467 // least enough stack space for the caller to store the 8 GPRs.
7468 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7469 const unsigned NumBytes = std::max<unsigned>(
7470 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7471
7472 // Adjust the stack pointer for the new arguments...
7473 // These operations are automatically eliminated by the prolog/epilog pass.
7474 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7475 SDValue CallSeqStart = Chain;
7476
7478 SmallVector<SDValue, 8> MemOpChains;
7479
7480 // Set up a copy of the stack pointer for loading and storing any
7481 // arguments that may not fit in the registers available for argument
7482 // passing.
7483 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7484 : DAG.getRegister(PPC::R1, MVT::i32);
7485
7486 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7487 const unsigned ValNo = ArgLocs[I].getValNo();
7488 SDValue Arg = OutVals[ValNo];
7489 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7490
7491 if (Flags.isByVal()) {
7492 const unsigned ByValSize = Flags.getByValSize();
7493
7494 // Nothing to do for zero-sized ByVals on the caller side.
7495 if (!ByValSize) {
7496 ++I;
7497 continue;
7498 }
7499
7500 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7501 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7502 (LoadOffset != 0)
7503 ? DAG.getObjectPtrOffset(
7504 dl, Arg, TypeSize::getFixed(LoadOffset))
7505 : Arg,
7506 MachinePointerInfo(), VT);
7507 };
7508
7509 unsigned LoadOffset = 0;
7510
7511 // Initialize registers, which are fully occupied by the by-val argument.
7512 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7513 SDValue Load = GetLoad(PtrVT, LoadOffset);
7514 MemOpChains.push_back(Load.getValue(1));
7515 LoadOffset += PtrByteSize;
7516 const CCValAssign &ByValVA = ArgLocs[I++];
7517 assert(ByValVA.getValNo() == ValNo &&
7518 "Unexpected location for pass-by-value argument.");
7519 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7520 }
7521
7522 if (LoadOffset == ByValSize)
7523 continue;
7524
7525 // There must be one more loc to handle the remainder.
7526 assert(ArgLocs[I].getValNo() == ValNo &&
7527 "Expected additional location for by-value argument.");
7528
7529 if (ArgLocs[I].isMemLoc()) {
7530 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7531 const CCValAssign &ByValVA = ArgLocs[I++];
7532 ISD::ArgFlagsTy MemcpyFlags = Flags;
7533 // Only memcpy the bytes that don't pass in register.
7534 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7535 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7536 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7537 dl, Arg, TypeSize::getFixed(LoadOffset))
7538 : Arg,
7540 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7541 CallSeqStart, MemcpyFlags, DAG, dl);
7542 continue;
7543 }
7544
7545 // Initialize the final register residue.
7546 // Any residue that occupies the final by-val arg register must be
7547 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7548 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7549 // 2 and 1 byte loads.
7550 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7551 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7552 "Unexpected register residue for by-value argument.");
7553 SDValue ResidueVal;
7554 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7555 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7556 const MVT VT =
7557 N == 1 ? MVT::i8
7558 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7559 SDValue Load = GetLoad(VT, LoadOffset);
7560 MemOpChains.push_back(Load.getValue(1));
7561 LoadOffset += N;
7562 Bytes += N;
7563
7564 // By-val arguments are passed left-justfied in register.
7565 // Every load here needs to be shifted, otherwise a full register load
7566 // should have been used.
7567 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7568 "Unexpected load emitted during handling of pass-by-value "
7569 "argument.");
7570 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7571 EVT ShiftAmountTy =
7572 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7573 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7574 SDValue ShiftedLoad =
7575 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7576 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7577 ShiftedLoad)
7578 : ShiftedLoad;
7579 }
7580
7581 const CCValAssign &ByValVA = ArgLocs[I++];
7582 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7583 continue;
7584 }
7585
7586 CCValAssign &VA = ArgLocs[I++];
7587 const MVT LocVT = VA.getLocVT();
7588 const MVT ValVT = VA.getValVT();
7589
7590 switch (VA.getLocInfo()) {
7591 default:
7592 report_fatal_error("Unexpected argument extension type.");
7593 case CCValAssign::Full:
7594 break;
7595 case CCValAssign::ZExt:
7596 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7597 break;
7598 case CCValAssign::SExt:
7599 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7600 break;
7601 }
7602
7603 if (VA.isRegLoc() && !VA.needsCustom()) {
7604 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7605 continue;
7606 }
7607
7608 // Vector arguments passed to VarArg functions need custom handling when
7609 // they are passed (at least partially) in GPRs.
7610 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7611 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7612 // Store value to its stack slot.
7613 SDValue PtrOff =
7614 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7615 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7616 SDValue Store =
7617 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7618 MemOpChains.push_back(Store);
7619 const unsigned OriginalValNo = VA.getValNo();
7620 // Then load the GPRs from the stack
7621 unsigned LoadOffset = 0;
7622 auto HandleCustomVecRegLoc = [&]() {
7623 assert(I != E && "Unexpected end of CCvalAssigns.");
7624 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7625 "Expected custom RegLoc.");
7626 CCValAssign RegVA = ArgLocs[I++];
7627 assert(RegVA.getValNo() == OriginalValNo &&
7628 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7629 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7630 DAG.getConstant(LoadOffset, dl, PtrVT));
7631 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7632 MemOpChains.push_back(Load.getValue(1));
7633 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7634 LoadOffset += PtrByteSize;
7635 };
7636
7637 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7638 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7639 // R10.
7640 HandleCustomVecRegLoc();
7641 HandleCustomVecRegLoc();
7642
7643 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7644 ArgLocs[I].getValNo() == OriginalValNo) {
7645 assert(!IsPPC64 &&
7646 "Only 2 custom RegLocs expected for 64-bit codegen.");
7647 HandleCustomVecRegLoc();
7648 HandleCustomVecRegLoc();
7649 }
7650
7651 continue;
7652 }
7653
7654 if (VA.isMemLoc()) {
7655 SDValue PtrOff =
7656 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7657 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7658 MemOpChains.push_back(
7659 DAG.getStore(Chain, dl, Arg, PtrOff,
7661 Subtarget.getFrameLowering()->getStackAlign()));
7662
7663 continue;
7664 }
7665
7666 if (!ValVT.isFloatingPoint())
7668 "Unexpected register handling for calling convention.");
7669
7670 // Custom handling is used for GPR initializations for vararg float
7671 // arguments.
7672 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7673 LocVT.isInteger() &&
7674 "Custom register handling only expected for VarArg.");
7675
7676 SDValue ArgAsInt =
7677 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7678
7679 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7680 // f32 in 32-bit GPR
7681 // f64 in 64-bit GPR
7682 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7683 else if (Arg.getValueType().getFixedSizeInBits() <
7684 LocVT.getFixedSizeInBits())
7685 // f32 in 64-bit GPR.
7686 RegsToPass.push_back(std::make_pair(
7687 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7688 else {
7689 // f64 in two 32-bit GPRs
7690 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7691 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7692 "Unexpected custom register for argument!");
7693 CCValAssign &GPR1 = VA;
7694 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7695 DAG.getConstant(32, dl, MVT::i8));
7696 RegsToPass.push_back(std::make_pair(
7697 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7698
7699 if (I != E) {
7700 // If only 1 GPR was available, there will only be one custom GPR and
7701 // the argument will also pass in memory.
7702 CCValAssign &PeekArg = ArgLocs[I];
7703 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7704 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7705 CCValAssign &GPR2 = ArgLocs[I++];
7706 RegsToPass.push_back(std::make_pair(
7707 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7708 }
7709 }
7710 }
7711 }
7712
7713 if (!MemOpChains.empty())
7714 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7715
7716 // For indirect calls, we need to save the TOC base to the stack for
7717 // restoration after the call.
7718 if (CFlags.IsIndirect) {
7719 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7720 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7721 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7722 const MVT PtrVT = Subtarget.getScalarIntVT();
7723 const unsigned TOCSaveOffset =
7724 Subtarget.getFrameLowering()->getTOCSaveOffset();
7725
7726 setUsesTOCBasePtr(DAG);
7727 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7728 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7729 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7730 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7731 Chain = DAG.getStore(
7732 Val.getValue(1), dl, Val, AddPtr,
7733 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7734 }
7735
7736 // Build a sequence of copy-to-reg nodes chained together with token chain
7737 // and flag operands which copy the outgoing args into the appropriate regs.
7738 SDValue InGlue;
7739 for (auto Reg : RegsToPass) {
7740 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7741 InGlue = Chain.getValue(1);
7742 }
7743
7744 const int SPDiff = 0;
7745 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7746 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7747}
7748
7749bool
7750PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7751 MachineFunction &MF, bool isVarArg,
7754 const Type *RetTy) const {
7756 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7757 return CCInfo.CheckReturn(
7758 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7760 : RetCC_PPC);
7761}
7762
7763SDValue
7764PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7765 bool isVarArg,
7767 const SmallVectorImpl<SDValue> &OutVals,
7768 const SDLoc &dl, SelectionDAG &DAG) const {
7770 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7771 *DAG.getContext());
7772 CCInfo.AnalyzeReturn(Outs,
7773 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7775 : RetCC_PPC);
7776
7777 SDValue Glue;
7778 SmallVector<SDValue, 4> RetOps(1, Chain);
7779
7780 // Copy the result values into the output registers.
7781 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7782 CCValAssign &VA = RVLocs[i];
7783 assert(VA.isRegLoc() && "Can only return in registers!");
7784
7785 SDValue Arg = OutVals[RealResIdx];
7786
7787 switch (VA.getLocInfo()) {
7788 default: llvm_unreachable("Unknown loc info!");
7789 case CCValAssign::Full: break;
7790 case CCValAssign::AExt:
7791 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7792 break;
7793 case CCValAssign::ZExt:
7794 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7795 break;
7796 case CCValAssign::SExt:
7797 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7798 break;
7799 }
7800 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7801 bool isLittleEndian = Subtarget.isLittleEndian();
7802 // Legalize ret f64 -> ret 2 x i32.
7803 SDValue SVal =
7804 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7805 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7806 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7807 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7808 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7809 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7810 Glue = Chain.getValue(1);
7811 VA = RVLocs[++i]; // skip ahead to next loc
7812 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7813 } else
7814 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7815 Glue = Chain.getValue(1);
7816 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7817 }
7818
7819 RetOps[0] = Chain; // Update chain.
7820
7821 // Add the glue if we have it.
7822 if (Glue.getNode())
7823 RetOps.push_back(Glue);
7824
7825 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7826}
7827
7828SDValue
7829PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7830 SelectionDAG &DAG) const {
7831 SDLoc dl(Op);
7832
7833 // Get the correct type for integers.
7834 EVT IntVT = Op.getValueType();
7835
7836 // Get the inputs.
7837 SDValue Chain = Op.getOperand(0);
7838 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7839 // Build a DYNAREAOFFSET node.
7840 SDValue Ops[2] = {Chain, FPSIdx};
7841 SDVTList VTs = DAG.getVTList(IntVT);
7842 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7843}
7844
7845SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7846 SelectionDAG &DAG) const {
7847 // When we pop the dynamic allocation we need to restore the SP link.
7848 SDLoc dl(Op);
7849
7850 // Get the correct type for pointers.
7851 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7852
7853 // Construct the stack pointer operand.
7854 bool isPPC64 = Subtarget.isPPC64();
7855 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7856 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7857
7858 // Get the operands for the STACKRESTORE.
7859 SDValue Chain = Op.getOperand(0);
7860 SDValue SaveSP = Op.getOperand(1);
7861
7862 // Load the old link SP.
7863 SDValue LoadLinkSP =
7864 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7865
7866 // Restore the stack pointer.
7867 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7868
7869 // Store the old link SP.
7870 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7871}
7872
7873SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7874 MachineFunction &MF = DAG.getMachineFunction();
7875 bool isPPC64 = Subtarget.isPPC64();
7876 EVT PtrVT = getPointerTy(MF.getDataLayout());
7877
7878 // Get current frame pointer save index. The users of this index will be
7879 // primarily DYNALLOC instructions.
7880 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7881 int RASI = FI->getReturnAddrSaveIndex();
7882
7883 // If the frame pointer save index hasn't been defined yet.
7884 if (!RASI) {
7885 // Find out what the fix offset of the frame pointer save area.
7886 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7887 // Allocate the frame index for frame pointer save area.
7888 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7889 // Save the result.
7890 FI->setReturnAddrSaveIndex(RASI);
7891 }
7892 return DAG.getFrameIndex(RASI, PtrVT);
7893}
7894
7895SDValue
7896PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7897 MachineFunction &MF = DAG.getMachineFunction();
7898 bool isPPC64 = Subtarget.isPPC64();
7899 EVT PtrVT = getPointerTy(MF.getDataLayout());
7900
7901 // Get current frame pointer save index. The users of this index will be
7902 // primarily DYNALLOC instructions.
7903 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7904 int FPSI = FI->getFramePointerSaveIndex();
7905
7906 // If the frame pointer save index hasn't been defined yet.
7907 if (!FPSI) {
7908 // Find out what the fix offset of the frame pointer save area.
7909 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7910 // Allocate the frame index for frame pointer save area.
7911 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7912 // Save the result.
7913 FI->setFramePointerSaveIndex(FPSI);
7914 }
7915 return DAG.getFrameIndex(FPSI, PtrVT);
7916}
7917
7918SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7919 SelectionDAG &DAG) const {
7920 MachineFunction &MF = DAG.getMachineFunction();
7921 // Get the inputs.
7922 SDValue Chain = Op.getOperand(0);
7923 SDValue Size = Op.getOperand(1);
7924 SDLoc dl(Op);
7925
7926 // Get the correct type for pointers.
7927 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7928 // Negate the size.
7929 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7930 DAG.getConstant(0, dl, PtrVT), Size);
7931 // Construct a node for the frame pointer save index.
7932 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7933 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7934 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7935 if (hasInlineStackProbe(MF))
7936 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7937 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7938}
7939
7940SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7941 SelectionDAG &DAG) const {
7942 MachineFunction &MF = DAG.getMachineFunction();
7943
7944 bool isPPC64 = Subtarget.isPPC64();
7945 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7946
7947 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7948 return DAG.getFrameIndex(FI, PtrVT);
7949}
7950
7951SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7952 SelectionDAG &DAG) const {
7953 SDLoc DL(Op);
7954 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7955 DAG.getVTList(MVT::i32, MVT::Other),
7956 Op.getOperand(0), Op.getOperand(1));
7957}
7958
7959SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7960 SelectionDAG &DAG) const {
7961 SDLoc DL(Op);
7962 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7963 Op.getOperand(0), Op.getOperand(1));
7964}
7965
7966SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7967 if (Op.getValueType().isVector())
7968 return LowerVectorLoad(Op, DAG);
7969
7970 assert(Op.getValueType() == MVT::i1 &&
7971 "Custom lowering only for i1 loads");
7972
7973 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7974
7975 SDLoc dl(Op);
7976 LoadSDNode *LD = cast<LoadSDNode>(Op);
7977
7978 SDValue Chain = LD->getChain();
7979 SDValue BasePtr = LD->getBasePtr();
7980 MachineMemOperand *MMO = LD->getMemOperand();
7981
7982 SDValue NewLD =
7983 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7984 BasePtr, MVT::i8, MMO);
7985 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7986
7987 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7988 return DAG.getMergeValues(Ops, dl);
7989}
7990
7991SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7992 if (Op.getOperand(1).getValueType().isVector())
7993 return LowerVectorStore(Op, DAG);
7994
7995 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7996 "Custom lowering only for i1 stores");
7997
7998 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7999
8000 SDLoc dl(Op);
8001 StoreSDNode *ST = cast<StoreSDNode>(Op);
8002
8003 SDValue Chain = ST->getChain();
8004 SDValue BasePtr = ST->getBasePtr();
8005 SDValue Value = ST->getValue();
8006 MachineMemOperand *MMO = ST->getMemOperand();
8007
8009 Value);
8010 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8011}
8012
8013// FIXME: Remove this once the ANDI glue bug is fixed:
8014SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8015 assert(Op.getValueType() == MVT::i1 &&
8016 "Custom lowering only for i1 results");
8017
8018 SDLoc DL(Op);
8019 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8020}
8021
8022SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8023 SelectionDAG &DAG) const {
8024
8025 // Implements a vector truncate that fits in a vector register as a shuffle.
8026 // We want to legalize vector truncates down to where the source fits in
8027 // a vector register (and target is therefore smaller than vector register
8028 // size). At that point legalization will try to custom lower the sub-legal
8029 // result and get here - where we can contain the truncate as a single target
8030 // operation.
8031
8032 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8033 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8034 //
8035 // We will implement it for big-endian ordering as this (where x denotes
8036 // undefined):
8037 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8038 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8039 //
8040 // The same operation in little-endian ordering will be:
8041 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8042 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8043
8044 EVT TrgVT = Op.getValueType();
8045 assert(TrgVT.isVector() && "Vector type expected.");
8046 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8047 EVT EltVT = TrgVT.getVectorElementType();
8048 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8049 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8051 return SDValue();
8052
8053 SDValue N1 = Op.getOperand(0);
8054 EVT SrcVT = N1.getValueType();
8055 unsigned SrcSize = SrcVT.getSizeInBits();
8056 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8059 return SDValue();
8060 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8061 return SDValue();
8062
8063 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8064 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8065
8066 SDLoc DL(Op);
8067 SDValue Op1, Op2;
8068 if (SrcSize == 256) {
8069 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8070 EVT SplitVT =
8072 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8073 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8074 DAG.getConstant(0, DL, VecIdxTy));
8075 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8076 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8077 }
8078 else {
8079 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8080 Op2 = DAG.getUNDEF(WideVT);
8081 }
8082
8083 // First list the elements we want to keep.
8084 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8085 SmallVector<int, 16> ShuffV;
8086 if (Subtarget.isLittleEndian())
8087 for (unsigned i = 0; i < TrgNumElts; ++i)
8088 ShuffV.push_back(i * SizeMult);
8089 else
8090 for (unsigned i = 1; i <= TrgNumElts; ++i)
8091 ShuffV.push_back(i * SizeMult - 1);
8092
8093 // Populate the remaining elements with undefs.
8094 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8095 // ShuffV.push_back(i + WideNumElts);
8096 ShuffV.push_back(WideNumElts + 1);
8097
8098 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8099 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8100 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8101}
8102
8103/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8104/// possible.
8105SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8106 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8107 EVT ResVT = Op.getValueType();
8108 EVT CmpVT = Op.getOperand(0).getValueType();
8109 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8110 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8111 SDLoc dl(Op);
8112
8113 // Without power9-vector, we don't have native instruction for f128 comparison.
8114 // Following transformation to libcall is needed for setcc:
8115 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8116 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8117 SDValue Z = DAG.getSetCC(
8118 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8119 LHS, RHS, CC);
8120 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8121 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8122 }
8123
8124 // Not FP, or using SPE? Not a fsel.
8125 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8126 Subtarget.hasSPE())
8127 return Op;
8128
8129 SDNodeFlags Flags = Op.getNode()->getFlags();
8130
8131 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8132 // presence of infinities.
8133 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8134 switch (CC) {
8135 default:
8136 break;
8137 case ISD::SETOGT:
8138 case ISD::SETGT:
8139 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8140 case ISD::SETOLT:
8141 case ISD::SETLT:
8142 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8143 }
8144 }
8145
8146 // We might be able to do better than this under some circumstances, but in
8147 // general, fsel-based lowering of select is a finite-math-only optimization.
8148 // For more information, see section F.3 of the 2.06 ISA specification.
8149 // With ISA 3.0
8150 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8151 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8152 ResVT == MVT::f128)
8153 return Op;
8154
8155 // If the RHS of the comparison is a 0.0, we don't need to do the
8156 // subtraction at all.
8157 SDValue Sel1;
8159 switch (CC) {
8160 default: break; // SETUO etc aren't handled by fsel.
8161 case ISD::SETNE:
8162 std::swap(TV, FV);
8163 [[fallthrough]];
8164 case ISD::SETEQ:
8165 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8166 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8167 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8168 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8169 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8170 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8171 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8172 case ISD::SETULT:
8173 case ISD::SETLT:
8174 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8175 [[fallthrough]];
8176 case ISD::SETOGE:
8177 case ISD::SETGE:
8178 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8179 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8180 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8181 case ISD::SETUGT:
8182 case ISD::SETGT:
8183 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8184 [[fallthrough]];
8185 case ISD::SETOLE:
8186 case ISD::SETLE:
8187 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8188 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8189 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8190 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8191 }
8192
8193 SDValue Cmp;
8194 switch (CC) {
8195 default: break; // SETUO etc aren't handled by fsel.
8196 case ISD::SETNE:
8197 std::swap(TV, FV);
8198 [[fallthrough]];
8199 case ISD::SETEQ:
8200 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8201 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8202 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8203 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8204 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8205 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8206 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8207 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8208 case ISD::SETULT:
8209 case ISD::SETLT:
8210 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8211 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8212 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8213 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8214 case ISD::SETOGE:
8215 case ISD::SETGE:
8216 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8217 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8218 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8219 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8220 case ISD::SETUGT:
8221 case ISD::SETGT:
8222 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8223 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8224 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8225 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8226 case ISD::SETOLE:
8227 case ISD::SETLE:
8228 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8229 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8230 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8231 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8232 }
8233 return Op;
8234}
8235
8236static unsigned getPPCStrictOpcode(unsigned Opc) {
8237 switch (Opc) {
8238 default:
8239 llvm_unreachable("No strict version of this opcode!");
8240 case PPCISD::FCTIDZ:
8241 return PPCISD::STRICT_FCTIDZ;
8242 case PPCISD::FCTIWZ:
8243 return PPCISD::STRICT_FCTIWZ;
8244 case PPCISD::FCTIDUZ:
8245 return PPCISD::STRICT_FCTIDUZ;
8246 case PPCISD::FCTIWUZ:
8247 return PPCISD::STRICT_FCTIWUZ;
8248 case PPCISD::FCFID:
8249 return PPCISD::STRICT_FCFID;
8250 case PPCISD::FCFIDU:
8251 return PPCISD::STRICT_FCFIDU;
8252 case PPCISD::FCFIDS:
8253 return PPCISD::STRICT_FCFIDS;
8254 case PPCISD::FCFIDUS:
8255 return PPCISD::STRICT_FCFIDUS;
8256 }
8257}
8258
8260 const PPCSubtarget &Subtarget) {
8261 SDLoc dl(Op);
8262 bool IsStrict = Op->isStrictFPOpcode();
8263 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8264 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8265
8266 // TODO: Any other flags to propagate?
8267 SDNodeFlags Flags;
8268 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8269
8270 // For strict nodes, source is the second operand.
8271 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8272 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8273 MVT DestTy = Op.getSimpleValueType();
8274 assert(Src.getValueType().isFloatingPoint() &&
8275 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8276 DestTy == MVT::i64) &&
8277 "Invalid FP_TO_INT types");
8278 if (Src.getValueType() == MVT::f32) {
8279 if (IsStrict) {
8280 Src =
8282 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8283 Chain = Src.getValue(1);
8284 } else
8285 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8286 }
8287 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8288 DestTy = Subtarget.getScalarIntVT();
8289 unsigned Opc = ISD::DELETED_NODE;
8290 switch (DestTy.SimpleTy) {
8291 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8292 case MVT::i32:
8293 Opc = IsSigned ? PPCISD::FCTIWZ
8294 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8295 break;
8296 case MVT::i64:
8297 assert((IsSigned || Subtarget.hasFPCVT()) &&
8298 "i64 FP_TO_UINT is supported only with FPCVT");
8299 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8300 }
8301 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8302 SDValue Conv;
8303 if (IsStrict) {
8305 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8306 Flags);
8307 } else {
8308 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8309 }
8310 return Conv;
8311}
8312
8313void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8314 SelectionDAG &DAG,
8315 const SDLoc &dl) const {
8316 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8317 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8318 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8319 bool IsStrict = Op->isStrictFPOpcode();
8320
8321 // Convert the FP value to an int value through memory.
8322 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8323 (IsSigned || Subtarget.hasFPCVT());
8324 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8325 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8326 MachinePointerInfo MPI =
8328
8329 // Emit a store to the stack slot.
8330 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8331 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8332 if (i32Stack) {
8333 MachineFunction &MF = DAG.getMachineFunction();
8334 Alignment = Align(4);
8335 MachineMemOperand *MMO =
8336 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8337 SDValue Ops[] = { Chain, Tmp, FIPtr };
8338 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8339 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8340 } else
8341 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8342
8343 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8344 // add in a bias on big endian.
8345 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8346 !Subtarget.isLittleEndian()) {
8347 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8348 DAG.getConstant(4, dl, FIPtr.getValueType()));
8349 MPI = MPI.getWithOffset(4);
8350 }
8351
8352 RLI.Chain = Chain;
8353 RLI.Ptr = FIPtr;
8354 RLI.MPI = MPI;
8355 RLI.Alignment = Alignment;
8356}
8357
8358/// Custom lowers floating point to integer conversions to use
8359/// the direct move instructions available in ISA 2.07 to avoid the
8360/// need for load/store combinations.
8361SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8362 SelectionDAG &DAG,
8363 const SDLoc &dl) const {
8364 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8365 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8366 if (Op->isStrictFPOpcode())
8367 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8368 else
8369 return Mov;
8370}
8371
8372SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8373 const SDLoc &dl) const {
8374 bool IsStrict = Op->isStrictFPOpcode();
8375 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8376 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8377 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8378 EVT SrcVT = Src.getValueType();
8379 EVT DstVT = Op.getValueType();
8380
8381 // FP to INT conversions are legal for f128.
8382 if (SrcVT == MVT::f128)
8383 return Subtarget.hasP9Vector() ? Op : SDValue();
8384
8385 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8386 // PPC (the libcall is not available).
8387 if (SrcVT == MVT::ppcf128) {
8388 if (DstVT == MVT::i32) {
8389 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8390 // set other fast-math flags to FP operations in both strict and
8391 // non-strict cases. (FP_TO_SINT, FSUB)
8392 SDNodeFlags Flags;
8393 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8394
8395 if (IsSigned) {
8396 SDValue Lo, Hi;
8397 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8398
8399 // Add the two halves of the long double in round-to-zero mode, and use
8400 // a smaller FP_TO_SINT.
8401 if (IsStrict) {
8402 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8403 DAG.getVTList(MVT::f64, MVT::Other),
8404 {Op.getOperand(0), Lo, Hi}, Flags);
8405 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8406 DAG.getVTList(MVT::i32, MVT::Other),
8407 {Res.getValue(1), Res}, Flags);
8408 } else {
8409 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8410 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8411 }
8412 } else {
8413 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8414 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8415 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8416 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8417 if (IsStrict) {
8418 // Sel = Src < 0x80000000
8419 // FltOfs = select Sel, 0.0, 0x80000000
8420 // IntOfs = select Sel, 0, 0x80000000
8421 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8422 SDValue Chain = Op.getOperand(0);
8423 EVT SetCCVT =
8424 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8425 EVT DstSetCCVT =
8426 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8427 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8428 Chain, true);
8429 Chain = Sel.getValue(1);
8430
8431 SDValue FltOfs = DAG.getSelect(
8432 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8433 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8434
8435 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8436 DAG.getVTList(SrcVT, MVT::Other),
8437 {Chain, Src, FltOfs}, Flags);
8438 Chain = Val.getValue(1);
8439 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8440 DAG.getVTList(DstVT, MVT::Other),
8441 {Chain, Val}, Flags);
8442 Chain = SInt.getValue(1);
8443 SDValue IntOfs = DAG.getSelect(
8444 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8445 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8446 return DAG.getMergeValues({Result, Chain}, dl);
8447 } else {
8448 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8449 // FIXME: generated code sucks.
8450 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8451 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8452 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8453 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8454 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8455 }
8456 }
8457 }
8458
8459 return SDValue();
8460 }
8461
8462 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8463 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8464
8465 ReuseLoadInfo RLI;
8466 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8467
8468 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8469 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8470}
8471
8472// We're trying to insert a regular store, S, and then a load, L. If the
8473// incoming value, O, is a load, we might just be able to have our load use the
8474// address used by O. However, we don't know if anything else will store to
8475// that address before we can load from it. To prevent this situation, we need
8476// to insert our load, L, into the chain as a peer of O. To do this, we give L
8477// the same chain operand as O, we create a token factor from the chain results
8478// of O and L, and we replace all uses of O's chain result with that token
8479// factor (this last part is handled by makeEquivalentMemoryOrdering).
8480bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8481 ReuseLoadInfo &RLI,
8482 SelectionDAG &DAG,
8483 ISD::LoadExtType ET) const {
8484 // Conservatively skip reusing for constrained FP nodes.
8485 if (Op->isStrictFPOpcode())
8486 return false;
8487
8488 SDLoc dl(Op);
8489 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8490 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8491 if (ET == ISD::NON_EXTLOAD &&
8492 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8493 isOperationLegalOrCustom(Op.getOpcode(),
8494 Op.getOperand(0).getValueType())) {
8495
8496 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8497 return true;
8498 }
8499
8500 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8501 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8502 LD->isNonTemporal())
8503 return false;
8504 if (LD->getMemoryVT() != MemVT)
8505 return false;
8506
8507 // If the result of the load is an illegal type, then we can't build a
8508 // valid chain for reuse since the legalised loads and token factor node that
8509 // ties the legalised loads together uses a different output chain then the
8510 // illegal load.
8511 if (!isTypeLegal(LD->getValueType(0)))
8512 return false;
8513
8514 RLI.Ptr = LD->getBasePtr();
8515 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8516 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8517 "Non-pre-inc AM on PPC?");
8518 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8519 LD->getOffset());
8520 }
8521
8522 RLI.Chain = LD->getChain();
8523 RLI.MPI = LD->getPointerInfo();
8524 RLI.IsDereferenceable = LD->isDereferenceable();
8525 RLI.IsInvariant = LD->isInvariant();
8526 RLI.Alignment = LD->getAlign();
8527 RLI.AAInfo = LD->getAAInfo();
8528 RLI.Ranges = LD->getRanges();
8529
8530 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8531 return true;
8532}
8533
8534/// Analyze profitability of direct move
8535/// prefer float load to int load plus direct move
8536/// when there is no integer use of int load
8537bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8538 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8539 if (Origin->getOpcode() != ISD::LOAD)
8540 return true;
8541
8542 // If there is no LXSIBZX/LXSIHZX, like Power8,
8543 // prefer direct move if the memory size is 1 or 2 bytes.
8544 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8545 if (!Subtarget.hasP9Vector() &&
8546 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8547 return true;
8548
8549 for (SDUse &Use : Origin->uses()) {
8550
8551 // Only look at the users of the loaded value.
8552 if (Use.getResNo() != 0)
8553 continue;
8554
8555 SDNode *User = Use.getUser();
8556 if (User->getOpcode() != ISD::SINT_TO_FP &&
8557 User->getOpcode() != ISD::UINT_TO_FP &&
8558 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8559 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8560 return true;
8561 }
8562
8563 return false;
8564}
8565
8567 const PPCSubtarget &Subtarget,
8568 SDValue Chain = SDValue()) {
8569 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8570 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8571 SDLoc dl(Op);
8572
8573 // TODO: Any other flags to propagate?
8574 SDNodeFlags Flags;
8575 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8576
8577 // If we have FCFIDS, then use it when converting to single-precision.
8578 // Otherwise, convert to double-precision and then round.
8579 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8580 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8581 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8582 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8583 if (Op->isStrictFPOpcode()) {
8584 if (!Chain)
8585 Chain = Op.getOperand(0);
8586 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8587 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8588 } else
8589 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8590}
8591
8592/// Custom lowers integer to floating point conversions to use
8593/// the direct move instructions available in ISA 2.07 to avoid the
8594/// need for load/store combinations.
8595SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8596 SelectionDAG &DAG,
8597 const SDLoc &dl) const {
8598 assert((Op.getValueType() == MVT::f32 ||
8599 Op.getValueType() == MVT::f64) &&
8600 "Invalid floating point type as target of conversion");
8601 assert(Subtarget.hasFPCVT() &&
8602 "Int to FP conversions with direct moves require FPCVT");
8603 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8604 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8605 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8606 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8607 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8608 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8609 return convertIntToFP(Op, Mov, DAG, Subtarget);
8610}
8611
8612static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8613
8614 EVT VecVT = Vec.getValueType();
8615 assert(VecVT.isVector() && "Expected a vector type.");
8616 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8617
8618 EVT EltVT = VecVT.getVectorElementType();
8619 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8620 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8621
8622 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8623 SmallVector<SDValue, 16> Ops(NumConcat);
8624 Ops[0] = Vec;
8625 SDValue UndefVec = DAG.getUNDEF(VecVT);
8626 for (unsigned i = 1; i < NumConcat; ++i)
8627 Ops[i] = UndefVec;
8628
8629 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8630}
8631
8632SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8633 const SDLoc &dl) const {
8634 bool IsStrict = Op->isStrictFPOpcode();
8635 unsigned Opc = Op.getOpcode();
8636 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8639 "Unexpected conversion type");
8640 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8641 "Supports conversions to v2f64/v4f32 only.");
8642
8643 // TODO: Any other flags to propagate?
8644 SDNodeFlags Flags;
8645 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8646
8647 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8648 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8649
8650 SDValue Wide = widenVec(DAG, Src, dl);
8651 EVT WideVT = Wide.getValueType();
8652 unsigned WideNumElts = WideVT.getVectorNumElements();
8653 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8654
8655 SmallVector<int, 16> ShuffV;
8656 for (unsigned i = 0; i < WideNumElts; ++i)
8657 ShuffV.push_back(i + WideNumElts);
8658
8659 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8660 int SaveElts = FourEltRes ? 4 : 2;
8661 if (Subtarget.isLittleEndian())
8662 for (int i = 0; i < SaveElts; i++)
8663 ShuffV[i * Stride] = i;
8664 else
8665 for (int i = 1; i <= SaveElts; i++)
8666 ShuffV[i * Stride - 1] = i - 1;
8667
8668 SDValue ShuffleSrc2 =
8669 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8670 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8671
8672 SDValue Extend;
8673 if (SignedConv) {
8674 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8675 EVT ExtVT = Src.getValueType();
8676 if (Subtarget.hasP9Altivec())
8677 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8678 IntermediateVT.getVectorNumElements());
8679
8680 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8681 DAG.getValueType(ExtVT));
8682 } else
8683 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8684
8685 if (IsStrict)
8686 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8687 {Op.getOperand(0), Extend}, Flags);
8688
8689 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8690}
8691
8692SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8693 SelectionDAG &DAG) const {
8694 SDLoc dl(Op);
8695 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8696 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8697 bool IsStrict = Op->isStrictFPOpcode();
8698 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8699 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8700
8701 // TODO: Any other flags to propagate?
8702 SDNodeFlags Flags;
8703 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8704
8705 EVT InVT = Src.getValueType();
8706 EVT OutVT = Op.getValueType();
8707 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8708 isOperationCustom(Op.getOpcode(), InVT))
8709 return LowerINT_TO_FPVector(Op, DAG, dl);
8710
8711 // Conversions to f128 are legal.
8712 if (Op.getValueType() == MVT::f128)
8713 return Subtarget.hasP9Vector() ? Op : SDValue();
8714
8715 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8716 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8717 return SDValue();
8718
8719 if (Src.getValueType() == MVT::i1) {
8720 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8721 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8722 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8723 if (IsStrict)
8724 return DAG.getMergeValues({Sel, Chain}, dl);
8725 else
8726 return Sel;
8727 }
8728
8729 // If we have direct moves, we can do all the conversion, skip the store/load
8730 // however, without FPCVT we can't do most conversions.
8731 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8732 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8733 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8734
8735 assert((IsSigned || Subtarget.hasFPCVT()) &&
8736 "UINT_TO_FP is supported only with FPCVT");
8737
8738 if (Src.getValueType() == MVT::i64) {
8739 SDValue SINT = Src;
8740 // When converting to single-precision, we actually need to convert
8741 // to double-precision first and then round to single-precision.
8742 // To avoid double-rounding effects during that operation, we have
8743 // to prepare the input operand. Bits that might be truncated when
8744 // converting to double-precision are replaced by a bit that won't
8745 // be lost at this stage, but is below the single-precision rounding
8746 // position.
8747 //
8748 // However, if afn is in effect, accept double
8749 // rounding to avoid the extra overhead.
8750 // FIXME: Currently INT_TO_FP can't support fast math flags because
8751 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8752 // false.
8753 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8754 !Op->getFlags().hasApproximateFuncs()) {
8755
8756 // Twiddle input to make sure the low 11 bits are zero. (If this
8757 // is the case, we are guaranteed the value will fit into the 53 bit
8758 // mantissa of an IEEE double-precision value without rounding.)
8759 // If any of those low 11 bits were not zero originally, make sure
8760 // bit 12 (value 2048) is set instead, so that the final rounding
8761 // to single-precision gets the correct result.
8762 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8763 SINT, DAG.getConstant(2047, dl, MVT::i64));
8764 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8765 Round, DAG.getConstant(2047, dl, MVT::i64));
8766 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8767 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8768 DAG.getSignedConstant(-2048, dl, MVT::i64));
8769
8770 // However, we cannot use that value unconditionally: if the magnitude
8771 // of the input value is small, the bit-twiddling we did above might
8772 // end up visibly changing the output. Fortunately, in that case, we
8773 // don't need to twiddle bits since the original input will convert
8774 // exactly to double-precision floating-point already. Therefore,
8775 // construct a conditional to use the original value if the top 11
8776 // bits are all sign-bit copies, and use the rounded value computed
8777 // above otherwise.
8778 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8779 SINT, DAG.getConstant(53, dl, MVT::i32));
8780 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8781 Cond, DAG.getConstant(1, dl, MVT::i64));
8782 Cond = DAG.getSetCC(
8783 dl,
8784 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8785 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8786
8787 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8788 }
8789
8790 ReuseLoadInfo RLI;
8791 SDValue Bits;
8792
8793 MachineFunction &MF = DAG.getMachineFunction();
8794 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8795 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8796 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8797 if (RLI.ResChain)
8798 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8799 } else if (Subtarget.hasLFIWAX() &&
8800 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8801 MachineMemOperand *MMO =
8803 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8804 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8805 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8806 DAG.getVTList(MVT::f64, MVT::Other),
8807 Ops, MVT::i32, MMO);
8808 if (RLI.ResChain)
8809 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8810 } else if (Subtarget.hasFPCVT() &&
8811 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8812 MachineMemOperand *MMO =
8814 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8815 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8816 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8817 DAG.getVTList(MVT::f64, MVT::Other),
8818 Ops, MVT::i32, MMO);
8819 if (RLI.ResChain)
8820 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8821 } else if (((Subtarget.hasLFIWAX() &&
8822 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8823 (Subtarget.hasFPCVT() &&
8824 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8825 SINT.getOperand(0).getValueType() == MVT::i32) {
8826 MachineFrameInfo &MFI = MF.getFrameInfo();
8827 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8828
8829 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8830 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8831
8832 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8834 DAG.getMachineFunction(), FrameIdx));
8835 Chain = Store;
8836
8837 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8838 "Expected an i32 store");
8839
8840 RLI.Ptr = FIdx;
8841 RLI.Chain = Chain;
8842 RLI.MPI =
8844 RLI.Alignment = Align(4);
8845
8846 MachineMemOperand *MMO =
8848 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8849 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8851 PPCISD::LFIWZX : PPCISD::LFIWAX,
8852 dl, DAG.getVTList(MVT::f64, MVT::Other),
8853 Ops, MVT::i32, MMO);
8854 Chain = Bits.getValue(1);
8855 } else
8856 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8857
8858 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8859 if (IsStrict)
8860 Chain = FP.getValue(1);
8861
8862 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8863 if (IsStrict)
8864 FP = DAG.getNode(
8865 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8866 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8867 Flags);
8868 else
8869 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8870 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8871 }
8872 return FP;
8873 }
8874
8875 assert(Src.getValueType() == MVT::i32 &&
8876 "Unhandled INT_TO_FP type in custom expander!");
8877 // Since we only generate this in 64-bit mode, we can take advantage of
8878 // 64-bit registers. In particular, sign extend the input value into the
8879 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8880 // then lfd it and fcfid it.
8881 MachineFunction &MF = DAG.getMachineFunction();
8882 MachineFrameInfo &MFI = MF.getFrameInfo();
8883 EVT PtrVT = getPointerTy(MF.getDataLayout());
8884
8885 SDValue Ld;
8886 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8887 ReuseLoadInfo RLI;
8888 bool ReusingLoad;
8889 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8890 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8891 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8892
8893 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8895 DAG.getMachineFunction(), FrameIdx));
8896 Chain = Store;
8897
8898 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8899 "Expected an i32 store");
8900
8901 RLI.Ptr = FIdx;
8902 RLI.Chain = Chain;
8903 RLI.MPI =
8905 RLI.Alignment = Align(4);
8906 }
8907
8908 MachineMemOperand *MMO =
8910 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8911 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8912 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8913 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8914 MVT::i32, MMO);
8915 Chain = Ld.getValue(1);
8916 if (ReusingLoad && RLI.ResChain) {
8917 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
8918 }
8919 } else {
8920 assert(Subtarget.isPPC64() &&
8921 "i32->FP without LFIWAX supported only on PPC64");
8922
8923 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8924 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8925
8926 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8927
8928 // STD the extended value into the stack slot.
8929 SDValue Store = DAG.getStore(
8930 Chain, dl, Ext64, FIdx,
8932 Chain = Store;
8933
8934 // Load the value as a double.
8935 Ld = DAG.getLoad(
8936 MVT::f64, dl, Chain, FIdx,
8938 Chain = Ld.getValue(1);
8939 }
8940
8941 // FCFID it and return it.
8942 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8943 if (IsStrict)
8944 Chain = FP.getValue(1);
8945 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8946 if (IsStrict)
8947 FP = DAG.getNode(
8948 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8949 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
8950 else
8951 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8952 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8953 }
8954 return FP;
8955}
8956
8957SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8958 SelectionDAG &DAG) const {
8959 SDLoc Dl(Op);
8960 MachineFunction &MF = DAG.getMachineFunction();
8961 EVT PtrVT = getPointerTy(MF.getDataLayout());
8962 SDValue Chain = Op.getOperand(0);
8963
8964 // If requested mode is constant, just use simpler mtfsb/mffscrni
8965 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8966 uint64_t Mode = CVal->getZExtValue();
8967 assert(Mode < 4 && "Unsupported rounding mode!");
8968 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
8969 if (Subtarget.isISA3_0())
8970 return SDValue(
8971 DAG.getMachineNode(
8972 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
8973 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
8974 1);
8975 SDNode *SetHi = DAG.getMachineNode(
8976 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
8977 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
8978 SDNode *SetLo = DAG.getMachineNode(
8979 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
8980 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
8981 return SDValue(SetLo, 0);
8982 }
8983
8984 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
8985 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
8986 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
8987 DAG.getConstant(3, Dl, MVT::i32));
8988 SDValue DstFlag = DAG.getNode(
8989 ISD::XOR, Dl, MVT::i32, SrcFlag,
8990 DAG.getNode(ISD::AND, Dl, MVT::i32,
8991 DAG.getNOT(Dl,
8992 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
8993 MVT::i32),
8994 One));
8995 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
8996 SDValue MFFS;
8997 if (!Subtarget.isISA3_0()) {
8998 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
8999 Chain = MFFS.getValue(1);
9000 }
9001 SDValue NewFPSCR;
9002 if (Subtarget.isPPC64()) {
9003 if (Subtarget.isISA3_0()) {
9004 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9005 } else {
9006 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9007 SDNode *InsertRN = DAG.getMachineNode(
9008 PPC::RLDIMI, Dl, MVT::i64,
9009 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9010 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9011 DAG.getTargetConstant(0, Dl, MVT::i32),
9012 DAG.getTargetConstant(62, Dl, MVT::i32)});
9013 NewFPSCR = SDValue(InsertRN, 0);
9014 }
9015 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9016 } else {
9017 // In 32-bit mode, store f64, load and update the lower half.
9018 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9019 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9020 SDValue Addr = Subtarget.isLittleEndian()
9021 ? StackSlot
9022 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9023 DAG.getConstant(4, Dl, PtrVT));
9024 if (Subtarget.isISA3_0()) {
9025 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9026 } else {
9027 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9028 SDValue Tmp =
9029 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9030 Chain = Tmp.getValue(1);
9031 Tmp = SDValue(DAG.getMachineNode(
9032 PPC::RLWIMI, Dl, MVT::i32,
9033 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9034 DAG.getTargetConstant(30, Dl, MVT::i32),
9035 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9036 0);
9037 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9038 }
9039 NewFPSCR =
9040 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9041 Chain = NewFPSCR.getValue(1);
9042 }
9043 if (Subtarget.isISA3_0())
9044 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9045 {NewFPSCR, Chain}),
9046 1);
9047 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9048 SDNode *MTFSF = DAG.getMachineNode(
9049 PPC::MTFSF, Dl, MVT::Other,
9050 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9051 return SDValue(MTFSF, 0);
9052}
9053
9054SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9055 SelectionDAG &DAG) const {
9056 SDLoc dl(Op);
9057 /*
9058 The rounding mode is in bits 30:31 of FPSR, and has the following
9059 settings:
9060 00 Round to nearest
9061 01 Round to 0
9062 10 Round to +inf
9063 11 Round to -inf
9064
9065 GET_ROUNDING, on the other hand, expects the following:
9066 -1 Undefined
9067 0 Round to 0
9068 1 Round to nearest
9069 2 Round to +inf
9070 3 Round to -inf
9071
9072 To perform the conversion, we do:
9073 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9074 */
9075
9076 MachineFunction &MF = DAG.getMachineFunction();
9077 EVT VT = Op.getValueType();
9078 EVT PtrVT = getPointerTy(MF.getDataLayout());
9079
9080 // Save FP Control Word to register
9081 SDValue Chain = Op.getOperand(0);
9082 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9083 Chain = MFFS.getValue(1);
9084
9085 SDValue CWD;
9086 if (isTypeLegal(MVT::i64)) {
9087 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9088 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9089 } else {
9090 // Save FP register to stack slot
9091 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9092 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9093 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9094
9095 // Load FP Control Word from low 32 bits of stack slot.
9097 "Stack slot adjustment is valid only on big endian subtargets!");
9098 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9099 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9100 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9101 Chain = CWD.getValue(1);
9102 }
9103
9104 // Transform as necessary
9105 SDValue CWD1 =
9106 DAG.getNode(ISD::AND, dl, MVT::i32,
9107 CWD, DAG.getConstant(3, dl, MVT::i32));
9108 SDValue CWD2 =
9109 DAG.getNode(ISD::SRL, dl, MVT::i32,
9110 DAG.getNode(ISD::AND, dl, MVT::i32,
9111 DAG.getNode(ISD::XOR, dl, MVT::i32,
9112 CWD, DAG.getConstant(3, dl, MVT::i32)),
9113 DAG.getConstant(3, dl, MVT::i32)),
9114 DAG.getConstant(1, dl, MVT::i32));
9115
9116 SDValue RetVal =
9117 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9118
9119 RetVal =
9121 dl, VT, RetVal);
9122
9123 return DAG.getMergeValues({RetVal, Chain}, dl);
9124}
9125
9126SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9127 EVT VT = Op.getValueType();
9128 uint64_t BitWidth = VT.getSizeInBits();
9129 SDLoc dl(Op);
9130 assert(Op.getNumOperands() == 3 &&
9131 VT == Op.getOperand(1).getValueType() &&
9132 "Unexpected SHL!");
9133
9134 // Expand into a bunch of logical ops. Note that these ops
9135 // depend on the PPC behavior for oversized shift amounts.
9136 SDValue Lo = Op.getOperand(0);
9137 SDValue Hi = Op.getOperand(1);
9138 SDValue Amt = Op.getOperand(2);
9139 EVT AmtVT = Amt.getValueType();
9140
9141 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9142 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9143 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9144 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9145 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9146 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9147 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9148 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9149 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9150 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9151 SDValue OutOps[] = { OutLo, OutHi };
9152 return DAG.getMergeValues(OutOps, dl);
9153}
9154
9155SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9156 EVT VT = Op.getValueType();
9157 SDLoc dl(Op);
9158 uint64_t BitWidth = VT.getSizeInBits();
9159 assert(Op.getNumOperands() == 3 &&
9160 VT == Op.getOperand(1).getValueType() &&
9161 "Unexpected SRL!");
9162
9163 // Expand into a bunch of logical ops. Note that these ops
9164 // depend on the PPC behavior for oversized shift amounts.
9165 SDValue Lo = Op.getOperand(0);
9166 SDValue Hi = Op.getOperand(1);
9167 SDValue Amt = Op.getOperand(2);
9168 EVT AmtVT = Amt.getValueType();
9169
9170 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9171 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9172 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9173 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9174 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9175 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9176 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9177 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9178 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9179 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9180 SDValue OutOps[] = { OutLo, OutHi };
9181 return DAG.getMergeValues(OutOps, dl);
9182}
9183
9184SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9185 SDLoc dl(Op);
9186 EVT VT = Op.getValueType();
9187 uint64_t BitWidth = VT.getSizeInBits();
9188 assert(Op.getNumOperands() == 3 &&
9189 VT == Op.getOperand(1).getValueType() &&
9190 "Unexpected SRA!");
9191
9192 // Expand into a bunch of logical ops, followed by a select_cc.
9193 SDValue Lo = Op.getOperand(0);
9194 SDValue Hi = Op.getOperand(1);
9195 SDValue Amt = Op.getOperand(2);
9196 EVT AmtVT = Amt.getValueType();
9197
9198 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9199 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9200 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9201 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9202 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9203 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9204 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9205 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9206 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9207 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9208 Tmp4, Tmp6, ISD::SETLE);
9209 SDValue OutOps[] = { OutLo, OutHi };
9210 return DAG.getMergeValues(OutOps, dl);
9211}
9212
9213SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9214 SelectionDAG &DAG) const {
9215 SDLoc dl(Op);
9216 EVT VT = Op.getValueType();
9217 unsigned BitWidth = VT.getSizeInBits();
9218
9219 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9220 SDValue X = Op.getOperand(0);
9221 SDValue Y = Op.getOperand(1);
9222 SDValue Z = Op.getOperand(2);
9223 EVT AmtVT = Z.getValueType();
9224
9225 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9226 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9227 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9228 // on PowerPC shift by BW being well defined.
9229 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9230 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9231 SDValue SubZ =
9232 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9233 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9234 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9235 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9236}
9237
9238//===----------------------------------------------------------------------===//
9239// Vector related lowering.
9240//
9241
9242/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9243/// element size of SplatSize. Cast the result to VT.
9244static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9245 SelectionDAG &DAG, const SDLoc &dl) {
9246 static const MVT VTys[] = { // canonical VT to use for each size.
9247 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9248 };
9249
9250 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9251
9252 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9253 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9254 SplatSize = 1;
9255 Val = 0xFF;
9256 }
9257
9258 EVT CanonicalVT = VTys[SplatSize-1];
9259
9260 // Build a canonical splat for this value.
9261 // Explicitly truncate APInt here, as this API is used with a mix of
9262 // signed and unsigned values.
9263 return DAG.getBitcast(
9264 ReqVT,
9265 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9266}
9267
9268/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9269/// specified intrinsic ID.
9271 const SDLoc &dl, EVT DestVT = MVT::Other) {
9272 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9273 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9274 DAG.getConstant(IID, dl, MVT::i32), Op);
9275}
9276
9277/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9278/// specified intrinsic ID.
9280 SelectionDAG &DAG, const SDLoc &dl,
9281 EVT DestVT = MVT::Other) {
9282 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9283 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9284 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9285}
9286
9287/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9288/// specified intrinsic ID.
9289static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9290 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9291 EVT DestVT = MVT::Other) {
9292 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9293 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9294 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9295}
9296
9297/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9298/// amount. The result has the specified value type.
9299static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9300 SelectionDAG &DAG, const SDLoc &dl) {
9301 // Force LHS/RHS to be the right type.
9302 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9303 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9304
9305 int Ops[16];
9306 for (unsigned i = 0; i != 16; ++i)
9307 Ops[i] = i + Amt;
9308 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9309 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9310}
9311
9312/// Do we have an efficient pattern in a .td file for this node?
9313///
9314/// \param V - pointer to the BuildVectorSDNode being matched
9315/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9316///
9317/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9318/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9319/// the opposite is true (expansion is beneficial) are:
9320/// - The node builds a vector out of integers that are not 32 or 64-bits
9321/// - The node builds a vector out of constants
9322/// - The node is a "load-and-splat"
9323/// In all other cases, we will choose to keep the BUILD_VECTOR.
9325 bool HasDirectMove,
9326 bool HasP8Vector) {
9327 EVT VecVT = V->getValueType(0);
9328 bool RightType = VecVT == MVT::v2f64 ||
9329 (HasP8Vector && VecVT == MVT::v4f32) ||
9330 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9331 if (!RightType)
9332 return false;
9333
9334 bool IsSplat = true;
9335 bool IsLoad = false;
9336 SDValue Op0 = V->getOperand(0);
9337
9338 // This function is called in a block that confirms the node is not a constant
9339 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9340 // different constants.
9341 if (V->isConstant())
9342 return false;
9343 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9344 if (V->getOperand(i).isUndef())
9345 return false;
9346 // We want to expand nodes that represent load-and-splat even if the
9347 // loaded value is a floating point truncation or conversion to int.
9348 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9349 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9350 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9351 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9352 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9353 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9354 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9355 IsLoad = true;
9356 // If the operands are different or the input is not a load and has more
9357 // uses than just this BV node, then it isn't a splat.
9358 if (V->getOperand(i) != Op0 ||
9359 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9360 IsSplat = false;
9361 }
9362 return !(IsSplat && IsLoad);
9363}
9364
9365// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9366SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9367
9368 SDLoc dl(Op);
9369 SDValue Op0 = Op->getOperand(0);
9370
9371 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9372 (Op.getValueType() != MVT::f128))
9373 return SDValue();
9374
9375 SDValue Lo = Op0.getOperand(0);
9376 SDValue Hi = Op0.getOperand(1);
9377 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9378 return SDValue();
9379
9380 if (!Subtarget.isLittleEndian())
9381 std::swap(Lo, Hi);
9382
9383 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9384}
9385
9386static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9387 const SDValue *InputLoad = &Op;
9388 while (InputLoad->getOpcode() == ISD::BITCAST)
9389 InputLoad = &InputLoad->getOperand(0);
9390 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9391 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9392 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9393 InputLoad = &InputLoad->getOperand(0);
9394 }
9395 if (InputLoad->getOpcode() != ISD::LOAD)
9396 return nullptr;
9397 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9398 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9399}
9400
9401// Convert the argument APFloat to a single precision APFloat if there is no
9402// loss in information during the conversion to single precision APFloat and the
9403// resulting number is not a denormal number. Return true if successful.
9405 APFloat APFloatToConvert = ArgAPFloat;
9406 bool LosesInfo = true;
9408 &LosesInfo);
9409 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9410 if (Success)
9411 ArgAPFloat = APFloatToConvert;
9412 return Success;
9413}
9414
9415// Bitcast the argument APInt to a double and convert it to a single precision
9416// APFloat, bitcast the APFloat to an APInt and assign it to the original
9417// argument if there is no loss in information during the conversion from
9418// double to single precision APFloat and the resulting number is not a denormal
9419// number. Return true if successful.
9421 double DpValue = ArgAPInt.bitsToDouble();
9422 APFloat APFloatDp(DpValue);
9423 bool Success = convertToNonDenormSingle(APFloatDp);
9424 if (Success)
9425 ArgAPInt = APFloatDp.bitcastToAPInt();
9426 return Success;
9427}
9428
9429// Nondestructive check for convertTonNonDenormSingle.
9431 // Only convert if it loses info, since XXSPLTIDP should
9432 // handle the other case.
9433 APFloat APFloatToConvert = ArgAPFloat;
9434 bool LosesInfo = true;
9436 &LosesInfo);
9437
9438 return (!LosesInfo && !APFloatToConvert.isDenormal());
9439}
9440
9441static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9442 unsigned &Opcode) {
9443 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9444 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9445 return false;
9446
9447 EVT Ty = Op->getValueType(0);
9448 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9449 // as we cannot handle extending loads for these types.
9450 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9451 ISD::isNON_EXTLoad(InputNode))
9452 return true;
9453
9454 EVT MemVT = InputNode->getMemoryVT();
9455 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9456 // memory VT is the same vector element VT type.
9457 // The loads feeding into the v8i16 and v16i8 types will be extending because
9458 // scalar i8/i16 are not legal types.
9459 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9460 (MemVT == Ty.getVectorElementType()))
9461 return true;
9462
9463 if (Ty == MVT::v2i64) {
9464 // Check the extend type, when the input type is i32, and the output vector
9465 // type is v2i64.
9466 if (MemVT == MVT::i32) {
9467 if (ISD::isZEXTLoad(InputNode))
9468 Opcode = PPCISD::ZEXT_LD_SPLAT;
9469 if (ISD::isSEXTLoad(InputNode))
9470 Opcode = PPCISD::SEXT_LD_SPLAT;
9471 }
9472 return true;
9473 }
9474 return false;
9475}
9476
9478 bool IsLittleEndian) {
9479 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9480
9481 BitMask.clearAllBits();
9482 EVT VT = BVN.getValueType(0);
9483 unsigned VTSize = VT.getSizeInBits();
9484 APInt ConstValue(VTSize, 0);
9485
9486 unsigned EltWidth = VT.getScalarSizeInBits();
9487
9488 unsigned BitPos = 0;
9489 for (auto OpVal : BVN.op_values()) {
9490 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9491
9492 if (!CN)
9493 return false;
9494 // The elements in a vector register are ordered in reverse byte order
9495 // between little-endian and big-endian modes.
9496 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9497 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9498 BitPos += EltWidth;
9499 }
9500
9501 for (unsigned J = 0; J < 16; ++J) {
9502 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9503 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9504 return false;
9505 if (ExtractValue == 0xFF)
9506 BitMask.setBit(J);
9507 }
9508 return true;
9509}
9510
9511// If this is a case we can't handle, return null and let the default
9512// expansion code take care of it. If we CAN select this case, and if it
9513// selects to a single instruction, return Op. Otherwise, if we can codegen
9514// this case more efficiently than a constant pool load, lower it to the
9515// sequence of ops that should be used.
9516SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9517 SelectionDAG &DAG) const {
9518 SDLoc dl(Op);
9519 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9520 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9521
9522 if (Subtarget.hasP10Vector()) {
9523 APInt BitMask(32, 0);
9524 // If the value of the vector is all zeros or all ones,
9525 // we do not convert it to MTVSRBMI.
9526 // The xxleqv instruction sets a vector with all ones.
9527 // The xxlxor instruction sets a vector with all zeros.
9528 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9529 BitMask != 0 && BitMask != 0xffff) {
9530 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9531 MachineSDNode *MSDNode =
9532 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9533 SDValue SDV = SDValue(MSDNode, 0);
9534 EVT DVT = BVN->getValueType(0);
9535 EVT SVT = SDV.getValueType();
9536 if (SVT != DVT) {
9537 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9538 }
9539 return SDV;
9540 }
9541 // Recognize build vector patterns to emit VSX vector instructions
9542 // instead of loading value from memory.
9543 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9544 return VecPat;
9545 }
9546 // Check if this is a splat of a constant value.
9547 APInt APSplatBits, APSplatUndef;
9548 unsigned SplatBitSize;
9549 bool HasAnyUndefs;
9550 bool BVNIsConstantSplat =
9551 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9552 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9553
9554 // If it is a splat of a double, check if we can shrink it to a 32 bit
9555 // non-denormal float which when converted back to double gives us the same
9556 // double. This is to exploit the XXSPLTIDP instruction.
9557 // If we lose precision, we use XXSPLTI32DX.
9558 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9559 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9560 // Check the type first to short-circuit so we don't modify APSplatBits if
9561 // this block isn't executed.
9562 if ((Op->getValueType(0) == MVT::v2f64) &&
9563 convertToNonDenormSingle(APSplatBits)) {
9564 SDValue SplatNode = DAG.getNode(
9565 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9566 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9567 return DAG.getBitcast(Op.getValueType(), SplatNode);
9568 } else {
9569 // We may lose precision, so we have to use XXSPLTI32DX.
9570
9571 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9572 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9573 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9574
9575 if (!Hi || !Lo)
9576 // If either load is 0, then we should generate XXLXOR to set to 0.
9577 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9578
9579 if (Hi)
9580 SplatNode = DAG.getNode(
9581 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9582 DAG.getTargetConstant(0, dl, MVT::i32),
9583 DAG.getTargetConstant(Hi, dl, MVT::i32));
9584
9585 if (Lo)
9586 SplatNode =
9587 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9588 DAG.getTargetConstant(1, dl, MVT::i32),
9589 DAG.getTargetConstant(Lo, dl, MVT::i32));
9590
9591 return DAG.getBitcast(Op.getValueType(), SplatNode);
9592 }
9593 }
9594
9595 bool IsSplat64 = false;
9596 uint64_t SplatBits = 0;
9597 int32_t SextVal = 0;
9598 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9599 SplatBits = APSplatBits.getZExtValue();
9600 if (SplatBitSize <= 32) {
9601 SextVal = SignExtend32(SplatBits, SplatBitSize);
9602 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9603 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9604 bool P9Vector = Subtarget.hasP9Vector();
9605 int32_t Hi = P9Vector ? 127 : 15;
9606 int32_t Lo = P9Vector ? -128 : -16;
9607 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9608 SextVal = static_cast<int32_t>(SplatBits);
9609 }
9610 }
9611
9612 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9613 unsigned NewOpcode = PPCISD::LD_SPLAT;
9614
9615 // Handle load-and-splat patterns as we have instructions that will do this
9616 // in one go.
9617 if (DAG.isSplatValue(Op, true) &&
9618 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9619 const SDValue *InputLoad = &Op.getOperand(0);
9620 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9621
9622 // If the input load is an extending load, it will be an i32 -> i64
9623 // extending load and isValidSplatLoad() will update NewOpcode.
9624 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9625 unsigned ElementSize =
9626 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9627
9628 assert(((ElementSize == 2 * MemorySize)
9629 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9630 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9631 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9632 "Unmatched element size and opcode!\n");
9633
9634 // Checking for a single use of this load, we have to check for vector
9635 // width (128 bits) / ElementSize uses (since each operand of the
9636 // BUILD_VECTOR is a separate use of the value.
9637 unsigned NumUsesOfInputLD = 128 / ElementSize;
9638 for (SDValue BVInOp : Op->ops())
9639 if (BVInOp.isUndef())
9640 NumUsesOfInputLD--;
9641
9642 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9643 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9644 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9645 // 15", but function IsValidSplatLoad() now will only return true when
9646 // the data at index 0 is not nullptr. So we will not get into trouble for
9647 // these cases.
9648 //
9649 // case 1 - lfiwzx/lfiwax
9650 // 1.1: load result is i32 and is sign/zero extend to i64;
9651 // 1.2: build a v2i64 vector type with above loaded value;
9652 // 1.3: the vector has only one value at index 0, others are all undef;
9653 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9654 if (NumUsesOfInputLD == 1 &&
9655 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9656 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9657 Subtarget.hasLFIWAX()))
9658 return SDValue();
9659
9660 // case 2 - lxvr[hb]x
9661 // 2.1: load result is at most i16;
9662 // 2.2: build a vector with above loaded value;
9663 // 2.3: the vector has only one value at index 0, others are all undef;
9664 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9665 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9666 Subtarget.isISA3_1() && ElementSize <= 16)
9667 return SDValue();
9668
9669 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9670 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9671 Subtarget.hasVSX()) {
9672 SDValue Ops[] = {
9673 LD->getChain(), // Chain
9674 LD->getBasePtr(), // Ptr
9675 DAG.getValueType(Op.getValueType()) // VT
9676 };
9677 SDValue LdSplt = DAG.getMemIntrinsicNode(
9678 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9679 LD->getMemoryVT(), LD->getMemOperand());
9680 // Replace all uses of the output chain of the original load with the
9681 // output chain of the new load.
9682 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9683 LdSplt.getValue(1));
9684 return LdSplt;
9685 }
9686 }
9687
9688 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9689 // 32-bits can be lowered to VSX instructions under certain conditions.
9690 // Without VSX, there is no pattern more efficient than expanding the node.
9691 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9692 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9693 Subtarget.hasP8Vector()))
9694 return Op;
9695 return SDValue();
9696 }
9697
9698 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9699 unsigned SplatSize = SplatBitSize / 8;
9700
9701 // First, handle single instruction cases.
9702
9703 // All zeros?
9704 if (SplatBits == 0) {
9705 // Canonicalize all zero vectors to be v4i32.
9706 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9707 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9708 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9709 }
9710 return Op;
9711 }
9712
9713 // We have XXSPLTIW for constant splats four bytes wide.
9714 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9715 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9716 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9717 // turned into a 4-byte splat of 0xABABABAB.
9718 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9719 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9720 Op.getValueType(), DAG, dl);
9721
9722 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9723 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9724 dl);
9725
9726 // We have XXSPLTIB for constant splats one byte wide.
9727 if (Subtarget.hasP9Vector() && SplatSize == 1)
9728 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9729 dl);
9730
9731 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9732 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9733 if (SextVal >= -16 && SextVal <= 15) {
9734 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9735 // generate a splat word with extend for size 8.
9736 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9737 SDValue Res =
9738 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9739 if (SplatSize != 8)
9740 return Res;
9741 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9742 }
9743
9744 // Two instruction sequences.
9745
9746 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9747 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9749 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9750 unsigned IID;
9751 EVT VT;
9752 switch (SplatSize) {
9753 default:
9754 llvm_unreachable("Unexpected type for vector constant.");
9755 case 2:
9756 IID = Intrinsic::ppc_altivec_vupklsb;
9757 VT = MVT::v8i16;
9758 break;
9759 case 4:
9760 IID = Intrinsic::ppc_altivec_vextsb2w;
9761 VT = MVT::v4i32;
9762 break;
9763 case 8:
9764 IID = Intrinsic::ppc_altivec_vextsb2d;
9765 VT = MVT::v2i64;
9766 break;
9767 }
9768 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9769 return DAG.getBitcast(Op->getValueType(0), Extend);
9770 }
9771 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9772
9773 // If this value is in the range [-32,30] and is even, use:
9774 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9775 // If this value is in the range [17,31] and is odd, use:
9776 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9777 // If this value is in the range [-31,-17] and is odd, use:
9778 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9779 // Note the last two are three-instruction sequences.
9780 if (SextVal >= -32 && SextVal <= 31) {
9781 // To avoid having these optimizations undone by constant folding,
9782 // we convert to a pseudo that will be expanded later into one of
9783 // the above forms.
9784 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9785 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9786 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9787 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9788 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9789 if (VT == Op.getValueType())
9790 return RetVal;
9791 else
9792 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9793 }
9794
9795 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9796 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9797 // for fneg/fabs.
9798 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9799 // Make -1 and vspltisw -1:
9800 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9801
9802 // Make the VSLW intrinsic, computing 0x8000_0000.
9803 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9804 OnesV, DAG, dl);
9805
9806 // xor by OnesV to invert it.
9807 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9808 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9809 }
9810
9811 // Check to see if this is a wide variety of vsplti*, binop self cases.
9812 static const signed char SplatCsts[] = {
9813 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9814 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9815 };
9816
9817 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9818 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9819 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9820 int i = SplatCsts[idx];
9821
9822 // Figure out what shift amount will be used by altivec if shifted by i in
9823 // this splat size.
9824 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9825
9826 // vsplti + shl self.
9827 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9828 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9829 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9830 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9831 Intrinsic::ppc_altivec_vslw
9832 };
9833 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9834 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9835 }
9836
9837 // vsplti + srl self.
9838 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9839 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9840 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9841 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9842 Intrinsic::ppc_altivec_vsrw
9843 };
9844 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9845 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9846 }
9847
9848 // vsplti + rol self.
9849 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9850 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9851 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9852 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9853 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9854 Intrinsic::ppc_altivec_vrlw
9855 };
9856 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9857 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9858 }
9859
9860 // t = vsplti c, result = vsldoi t, t, 1
9861 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9862 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9863 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9864 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9865 }
9866 // t = vsplti c, result = vsldoi t, t, 2
9867 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9868 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9869 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9870 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9871 }
9872 // t = vsplti c, result = vsldoi t, t, 3
9873 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9874 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9875 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9876 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9877 }
9878 }
9879
9880 return SDValue();
9881}
9882
9883/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9884/// the specified operations to build the shuffle.
9886 SDValue RHS, SelectionDAG &DAG,
9887 const SDLoc &dl) {
9888 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9889 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9890 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9891
9892 enum {
9893 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9894 OP_VMRGHW,
9895 OP_VMRGLW,
9896 OP_VSPLTISW0,
9897 OP_VSPLTISW1,
9898 OP_VSPLTISW2,
9899 OP_VSPLTISW3,
9900 OP_VSLDOI4,
9901 OP_VSLDOI8,
9902 OP_VSLDOI12
9903 };
9904
9905 if (OpNum == OP_COPY) {
9906 if (LHSID == (1*9+2)*9+3) return LHS;
9907 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9908 return RHS;
9909 }
9910
9911 SDValue OpLHS, OpRHS;
9912 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9913 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9914
9915 int ShufIdxs[16];
9916 switch (OpNum) {
9917 default: llvm_unreachable("Unknown i32 permute!");
9918 case OP_VMRGHW:
9919 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9920 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9921 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9922 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9923 break;
9924 case OP_VMRGLW:
9925 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9926 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9927 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9928 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9929 break;
9930 case OP_VSPLTISW0:
9931 for (unsigned i = 0; i != 16; ++i)
9932 ShufIdxs[i] = (i&3)+0;
9933 break;
9934 case OP_VSPLTISW1:
9935 for (unsigned i = 0; i != 16; ++i)
9936 ShufIdxs[i] = (i&3)+4;
9937 break;
9938 case OP_VSPLTISW2:
9939 for (unsigned i = 0; i != 16; ++i)
9940 ShufIdxs[i] = (i&3)+8;
9941 break;
9942 case OP_VSPLTISW3:
9943 for (unsigned i = 0; i != 16; ++i)
9944 ShufIdxs[i] = (i&3)+12;
9945 break;
9946 case OP_VSLDOI4:
9947 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9948 case OP_VSLDOI8:
9949 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9950 case OP_VSLDOI12:
9951 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9952 }
9953 EVT VT = OpLHS.getValueType();
9954 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9955 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9956 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9957 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9958}
9959
9960/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9961/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9962/// SDValue.
9963SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9964 SelectionDAG &DAG) const {
9965 const unsigned BytesInVector = 16;
9966 bool IsLE = Subtarget.isLittleEndian();
9967 SDLoc dl(N);
9968 SDValue V1 = N->getOperand(0);
9969 SDValue V2 = N->getOperand(1);
9970 unsigned ShiftElts = 0, InsertAtByte = 0;
9971 bool Swap = false;
9972
9973 // Shifts required to get the byte we want at element 7.
9974 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9975 0, 15, 14, 13, 12, 11, 10, 9};
9976 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9977 1, 2, 3, 4, 5, 6, 7, 8};
9978
9979 ArrayRef<int> Mask = N->getMask();
9980 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9981
9982 // For each mask element, find out if we're just inserting something
9983 // from V2 into V1 or vice versa.
9984 // Possible permutations inserting an element from V2 into V1:
9985 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9986 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9987 // ...
9988 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9989 // Inserting from V1 into V2 will be similar, except mask range will be
9990 // [16,31].
9991
9992 bool FoundCandidate = false;
9993 // If both vector operands for the shuffle are the same vector, the mask
9994 // will contain only elements from the first one and the second one will be
9995 // undef.
9996 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9997 // Go through the mask of half-words to find an element that's being moved
9998 // from one vector to the other.
9999 for (unsigned i = 0; i < BytesInVector; ++i) {
10000 unsigned CurrentElement = Mask[i];
10001 // If 2nd operand is undefined, we should only look for element 7 in the
10002 // Mask.
10003 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10004 continue;
10005
10006 bool OtherElementsInOrder = true;
10007 // Examine the other elements in the Mask to see if they're in original
10008 // order.
10009 for (unsigned j = 0; j < BytesInVector; ++j) {
10010 if (j == i)
10011 continue;
10012 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10013 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10014 // in which we always assume we're always picking from the 1st operand.
10015 int MaskOffset =
10016 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10017 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10018 OtherElementsInOrder = false;
10019 break;
10020 }
10021 }
10022 // If other elements are in original order, we record the number of shifts
10023 // we need to get the element we want into element 7. Also record which byte
10024 // in the vector we should insert into.
10025 if (OtherElementsInOrder) {
10026 // If 2nd operand is undefined, we assume no shifts and no swapping.
10027 if (V2.isUndef()) {
10028 ShiftElts = 0;
10029 Swap = false;
10030 } else {
10031 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10032 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10033 : BigEndianShifts[CurrentElement & 0xF];
10034 Swap = CurrentElement < BytesInVector;
10035 }
10036 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10037 FoundCandidate = true;
10038 break;
10039 }
10040 }
10041
10042 if (!FoundCandidate)
10043 return SDValue();
10044
10045 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10046 // optionally with VECSHL if shift is required.
10047 if (Swap)
10048 std::swap(V1, V2);
10049 if (V2.isUndef())
10050 V2 = V1;
10051 if (ShiftElts) {
10052 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10053 DAG.getConstant(ShiftElts, dl, MVT::i32));
10054 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10055 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10056 }
10057 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10058 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10059}
10060
10061/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10062/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10063/// SDValue.
10064SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10065 SelectionDAG &DAG) const {
10066 const unsigned NumHalfWords = 8;
10067 const unsigned BytesInVector = NumHalfWords * 2;
10068 // Check that the shuffle is on half-words.
10069 if (!isNByteElemShuffleMask(N, 2, 1))
10070 return SDValue();
10071
10072 bool IsLE = Subtarget.isLittleEndian();
10073 SDLoc dl(N);
10074 SDValue V1 = N->getOperand(0);
10075 SDValue V2 = N->getOperand(1);
10076 unsigned ShiftElts = 0, InsertAtByte = 0;
10077 bool Swap = false;
10078
10079 // Shifts required to get the half-word we want at element 3.
10080 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10081 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10082
10083 uint32_t Mask = 0;
10084 uint32_t OriginalOrderLow = 0x1234567;
10085 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10086 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10087 // 32-bit space, only need 4-bit nibbles per element.
10088 for (unsigned i = 0; i < NumHalfWords; ++i) {
10089 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10090 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10091 }
10092
10093 // For each mask element, find out if we're just inserting something
10094 // from V2 into V1 or vice versa. Possible permutations inserting an element
10095 // from V2 into V1:
10096 // X, 1, 2, 3, 4, 5, 6, 7
10097 // 0, X, 2, 3, 4, 5, 6, 7
10098 // 0, 1, X, 3, 4, 5, 6, 7
10099 // 0, 1, 2, X, 4, 5, 6, 7
10100 // 0, 1, 2, 3, X, 5, 6, 7
10101 // 0, 1, 2, 3, 4, X, 6, 7
10102 // 0, 1, 2, 3, 4, 5, X, 7
10103 // 0, 1, 2, 3, 4, 5, 6, X
10104 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10105
10106 bool FoundCandidate = false;
10107 // Go through the mask of half-words to find an element that's being moved
10108 // from one vector to the other.
10109 for (unsigned i = 0; i < NumHalfWords; ++i) {
10110 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10111 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10112 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10113 uint32_t TargetOrder = 0x0;
10114
10115 // If both vector operands for the shuffle are the same vector, the mask
10116 // will contain only elements from the first one and the second one will be
10117 // undef.
10118 if (V2.isUndef()) {
10119 ShiftElts = 0;
10120 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10121 TargetOrder = OriginalOrderLow;
10122 Swap = false;
10123 // Skip if not the correct element or mask of other elements don't equal
10124 // to our expected order.
10125 if (MaskOneElt == VINSERTHSrcElem &&
10126 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10127 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10128 FoundCandidate = true;
10129 break;
10130 }
10131 } else { // If both operands are defined.
10132 // Target order is [8,15] if the current mask is between [0,7].
10133 TargetOrder =
10134 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10135 // Skip if mask of other elements don't equal our expected order.
10136 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10137 // We only need the last 3 bits for the number of shifts.
10138 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10139 : BigEndianShifts[MaskOneElt & 0x7];
10140 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10141 Swap = MaskOneElt < NumHalfWords;
10142 FoundCandidate = true;
10143 break;
10144 }
10145 }
10146 }
10147
10148 if (!FoundCandidate)
10149 return SDValue();
10150
10151 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10152 // optionally with VECSHL if shift is required.
10153 if (Swap)
10154 std::swap(V1, V2);
10155 if (V2.isUndef())
10156 V2 = V1;
10157 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10158 if (ShiftElts) {
10159 // Double ShiftElts because we're left shifting on v16i8 type.
10160 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10161 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10162 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10163 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10164 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10165 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10166 }
10167 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10168 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10169 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10170 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10171}
10172
10173/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10174/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10175/// return the default SDValue.
10176SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10177 SelectionDAG &DAG) const {
10178 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10179 // to v16i8. Peek through the bitcasts to get the actual operands.
10182
10183 auto ShuffleMask = SVN->getMask();
10184 SDValue VecShuffle(SVN, 0);
10185 SDLoc DL(SVN);
10186
10187 // Check that we have a four byte shuffle.
10188 if (!isNByteElemShuffleMask(SVN, 4, 1))
10189 return SDValue();
10190
10191 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10192 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10193 std::swap(LHS, RHS);
10195 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10196 if (!CommutedSV)
10197 return SDValue();
10198 ShuffleMask = CommutedSV->getMask();
10199 }
10200
10201 // Ensure that the RHS is a vector of constants.
10202 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10203 if (!BVN)
10204 return SDValue();
10205
10206 // Check if RHS is a splat of 4-bytes (or smaller).
10207 APInt APSplatValue, APSplatUndef;
10208 unsigned SplatBitSize;
10209 bool HasAnyUndefs;
10210 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10211 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10212 SplatBitSize > 32)
10213 return SDValue();
10214
10215 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10216 // The instruction splats a constant C into two words of the source vector
10217 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10218 // Thus we check that the shuffle mask is the equivalent of
10219 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10220 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10221 // within each word are consecutive, so we only need to check the first byte.
10222 SDValue Index;
10223 bool IsLE = Subtarget.isLittleEndian();
10224 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10225 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10226 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10227 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10228 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10229 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10230 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10231 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10232 else
10233 return SDValue();
10234
10235 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10236 // for XXSPLTI32DX.
10237 unsigned SplatVal = APSplatValue.getZExtValue();
10238 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10239 SplatVal |= (SplatVal << SplatBitSize);
10240
10241 SDValue SplatNode = DAG.getNode(
10242 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10243 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10244 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10245}
10246
10247/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10248/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10249/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10250/// i.e (or (shl x, C1), (srl x, 128-C1)).
10251SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10252 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10253 assert(Op.getValueType() == MVT::v1i128 &&
10254 "Only set v1i128 as custom, other type shouldn't reach here!");
10255 SDLoc dl(Op);
10256 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10257 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10258 unsigned SHLAmt = N1.getConstantOperandVal(0);
10259 if (SHLAmt % 8 == 0) {
10260 std::array<int, 16> Mask;
10261 std::iota(Mask.begin(), Mask.end(), 0);
10262 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10263 if (SDValue Shuffle =
10264 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10265 DAG.getUNDEF(MVT::v16i8), Mask))
10266 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10267 }
10268 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10269 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10270 DAG.getConstant(SHLAmt, dl, MVT::i32));
10271 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10272 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10273 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10274 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10275}
10276
10277/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10278/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10279/// return the code it can be lowered into. Worst case, it can always be
10280/// lowered into a vperm.
10281SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10282 SelectionDAG &DAG) const {
10283 SDLoc dl(Op);
10284 SDValue V1 = Op.getOperand(0);
10285 SDValue V2 = Op.getOperand(1);
10286 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10287
10288 // Any nodes that were combined in the target-independent combiner prior
10289 // to vector legalization will not be sent to the target combine. Try to
10290 // combine it here.
10291 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10292 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10293 return NewShuffle;
10294 Op = NewShuffle;
10296 V1 = Op.getOperand(0);
10297 V2 = Op.getOperand(1);
10298 }
10299 EVT VT = Op.getValueType();
10300 bool isLittleEndian = Subtarget.isLittleEndian();
10301
10302 unsigned ShiftElts, InsertAtByte;
10303 bool Swap = false;
10304
10305 // If this is a load-and-splat, we can do that with a single instruction
10306 // in some cases. However if the load has multiple uses, we don't want to
10307 // combine it because that will just produce multiple loads.
10308 bool IsPermutedLoad = false;
10309 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10310 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10311 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10312 InputLoad->hasOneUse()) {
10313 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10314 int SplatIdx =
10315 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10316
10317 // The splat index for permuted loads will be in the left half of the vector
10318 // which is strictly wider than the loaded value by 8 bytes. So we need to
10319 // adjust the splat index to point to the correct address in memory.
10320 if (IsPermutedLoad) {
10321 assert((isLittleEndian || IsFourByte) &&
10322 "Unexpected size for permuted load on big endian target");
10323 SplatIdx += IsFourByte ? 2 : 1;
10324 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10325 "Splat of a value outside of the loaded memory");
10326 }
10327
10328 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10329 // For 4-byte load-and-splat, we need Power9.
10330 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10331 uint64_t Offset = 0;
10332 if (IsFourByte)
10333 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10334 else
10335 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10336
10337 // If the width of the load is the same as the width of the splat,
10338 // loading with an offset would load the wrong memory.
10339 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10340 Offset = 0;
10341
10342 SDValue BasePtr = LD->getBasePtr();
10343 if (Offset != 0)
10345 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10346 SDValue Ops[] = {
10347 LD->getChain(), // Chain
10348 BasePtr, // BasePtr
10349 DAG.getValueType(Op.getValueType()) // VT
10350 };
10351 SDVTList VTL =
10352 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10353 SDValue LdSplt =
10354 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10355 Ops, LD->getMemoryVT(), LD->getMemOperand());
10356 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10357 if (LdSplt.getValueType() != SVOp->getValueType(0))
10358 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10359 return LdSplt;
10360 }
10361 }
10362
10363 // All v2i64 and v2f64 shuffles are legal
10364 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10365 return Op;
10366
10367 if (Subtarget.hasP9Vector() &&
10368 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10369 isLittleEndian)) {
10370 if (V2.isUndef())
10371 V2 = V1;
10372 else if (Swap)
10373 std::swap(V1, V2);
10374 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10375 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10376 if (ShiftElts) {
10377 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10378 DAG.getConstant(ShiftElts, dl, MVT::i32));
10379 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10380 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10381 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10382 }
10383 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10384 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10385 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10386 }
10387
10388 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10389 SDValue SplatInsertNode;
10390 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10391 return SplatInsertNode;
10392 }
10393
10394 if (Subtarget.hasP9Altivec()) {
10395 SDValue NewISDNode;
10396 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10397 return NewISDNode;
10398
10399 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10400 return NewISDNode;
10401 }
10402
10403 if (Subtarget.hasVSX() &&
10404 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10405 if (Swap)
10406 std::swap(V1, V2);
10407 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10408 SDValue Conv2 =
10409 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10410
10411 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10412 DAG.getConstant(ShiftElts, dl, MVT::i32));
10413 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10414 }
10415
10416 if (Subtarget.hasVSX() &&
10417 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10418 if (Swap)
10419 std::swap(V1, V2);
10420 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10421 SDValue Conv2 =
10422 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10423
10424 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10425 DAG.getConstant(ShiftElts, dl, MVT::i32));
10426 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10427 }
10428
10429 if (Subtarget.hasP9Vector()) {
10430 if (PPC::isXXBRHShuffleMask(SVOp)) {
10431 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10432 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10433 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10434 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10435 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10436 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10437 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10438 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10439 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10440 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10441 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10442 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10443 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10444 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10445 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10446 }
10447 }
10448
10449 if (Subtarget.hasVSX()) {
10450 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10451 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10452
10453 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10454 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10455 DAG.getConstant(SplatIdx, dl, MVT::i32));
10456 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10457 }
10458
10459 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10460 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10461 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10462 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10463 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10464 }
10465 }
10466
10467 // Cases that are handled by instructions that take permute immediates
10468 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10469 // selected by the instruction selector.
10470 if (V2.isUndef()) {
10471 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10472 PPC::isSplatShuffleMask(SVOp, 2) ||
10473 PPC::isSplatShuffleMask(SVOp, 4) ||
10474 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10475 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10476 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10477 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10478 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10479 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10480 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10481 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10482 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10483 (Subtarget.hasP8Altivec() && (
10484 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10485 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10486 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10487 return Op;
10488 }
10489 }
10490
10491 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10492 // and produce a fixed permutation. If any of these match, do not lower to
10493 // VPERM.
10494 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10495 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10496 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10497 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10498 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10499 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10500 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10501 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10502 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10503 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10504 (Subtarget.hasP8Altivec() && (
10505 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10506 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10507 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10508 return Op;
10509
10510 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10511 // perfect shuffle table to emit an optimal matching sequence.
10512 ArrayRef<int> PermMask = SVOp->getMask();
10513
10514 if (!DisablePerfectShuffle && !isLittleEndian) {
10515 unsigned PFIndexes[4];
10516 bool isFourElementShuffle = true;
10517 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10518 ++i) { // Element number
10519 unsigned EltNo = 8; // Start out undef.
10520 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10521 if (PermMask[i * 4 + j] < 0)
10522 continue; // Undef, ignore it.
10523
10524 unsigned ByteSource = PermMask[i * 4 + j];
10525 if ((ByteSource & 3) != j) {
10526 isFourElementShuffle = false;
10527 break;
10528 }
10529
10530 if (EltNo == 8) {
10531 EltNo = ByteSource / 4;
10532 } else if (EltNo != ByteSource / 4) {
10533 isFourElementShuffle = false;
10534 break;
10535 }
10536 }
10537 PFIndexes[i] = EltNo;
10538 }
10539
10540 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10541 // perfect shuffle vector to determine if it is cost effective to do this as
10542 // discrete instructions, or whether we should use a vperm.
10543 // For now, we skip this for little endian until such time as we have a
10544 // little-endian perfect shuffle table.
10545 if (isFourElementShuffle) {
10546 // Compute the index in the perfect shuffle table.
10547 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10548 PFIndexes[2] * 9 + PFIndexes[3];
10549
10550 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10551 unsigned Cost = (PFEntry >> 30);
10552
10553 // Determining when to avoid vperm is tricky. Many things affect the cost
10554 // of vperm, particularly how many times the perm mask needs to be
10555 // computed. For example, if the perm mask can be hoisted out of a loop or
10556 // is already used (perhaps because there are multiple permutes with the
10557 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10558 // permute mask out of the loop requires an extra register.
10559 //
10560 // As a compromise, we only emit discrete instructions if the shuffle can
10561 // be generated in 3 or fewer operations. When we have loop information
10562 // available, if this block is within a loop, we should avoid using vperm
10563 // for 3-operation perms and use a constant pool load instead.
10564 if (Cost < 3)
10565 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10566 }
10567 }
10568
10569 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10570 // vector that will get spilled to the constant pool.
10571 if (V2.isUndef()) V2 = V1;
10572
10573 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10574}
10575
10576SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10577 ArrayRef<int> PermMask, EVT VT,
10578 SDValue V1, SDValue V2) const {
10579 unsigned Opcode = PPCISD::VPERM;
10580 EVT ValType = V1.getValueType();
10581 SDLoc dl(Op);
10582 bool NeedSwap = false;
10583 bool isLittleEndian = Subtarget.isLittleEndian();
10584 bool isPPC64 = Subtarget.isPPC64();
10585
10586 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10587 (V1->hasOneUse() || V2->hasOneUse())) {
10588 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10589 "XXPERM instead\n");
10590 Opcode = PPCISD::XXPERM;
10591
10592 // The second input to XXPERM is also an output so if the second input has
10593 // multiple uses then copying is necessary, as a result we want the
10594 // single-use operand to be used as the second input to prevent copying.
10595 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10596 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10597 std::swap(V1, V2);
10598 NeedSwap = !NeedSwap;
10599 }
10600 }
10601
10602 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10603 // that it is in input element units, not in bytes. Convert now.
10604
10605 // For little endian, the order of the input vectors is reversed, and
10606 // the permutation mask is complemented with respect to 31. This is
10607 // necessary to produce proper semantics with the big-endian-based vperm
10608 // instruction.
10609 EVT EltVT = V1.getValueType().getVectorElementType();
10610 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10611
10612 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10613 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10614
10615 /*
10616 Vectors will be appended like so: [ V1 | v2 ]
10617 XXSWAPD on V1:
10618 [ A | B | C | D ] -> [ C | D | A | B ]
10619 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10620 i.e. index of A, B += 8, and index of C, D -= 8.
10621 XXSWAPD on V2:
10622 [ E | F | G | H ] -> [ G | H | E | F ]
10623 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10624 i.e. index of E, F += 8, index of G, H -= 8
10625 Swap V1 and V2:
10626 [ V1 | V2 ] -> [ V2 | V1 ]
10627 0-15 16-31 0-15 16-31
10628 i.e. index of V1 += 16, index of V2 -= 16
10629 */
10630
10631 SmallVector<SDValue, 16> ResultMask;
10632 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10633 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10634
10635 if (V1HasXXSWAPD) {
10636 if (SrcElt < 8)
10637 SrcElt += 8;
10638 else if (SrcElt < 16)
10639 SrcElt -= 8;
10640 }
10641 if (V2HasXXSWAPD) {
10642 if (SrcElt > 23)
10643 SrcElt -= 8;
10644 else if (SrcElt > 15)
10645 SrcElt += 8;
10646 }
10647 if (NeedSwap) {
10648 if (SrcElt < 16)
10649 SrcElt += 16;
10650 else
10651 SrcElt -= 16;
10652 }
10653 for (unsigned j = 0; j != BytesPerElement; ++j)
10654 if (isLittleEndian)
10655 ResultMask.push_back(
10656 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10657 else
10658 ResultMask.push_back(
10659 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10660 }
10661
10662 if (V1HasXXSWAPD) {
10663 dl = SDLoc(V1->getOperand(0));
10664 V1 = V1->getOperand(0)->getOperand(1);
10665 }
10666 if (V2HasXXSWAPD) {
10667 dl = SDLoc(V2->getOperand(0));
10668 V2 = V2->getOperand(0)->getOperand(1);
10669 }
10670
10671 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10672 if (ValType != MVT::v2f64)
10673 V1 = DAG.getBitcast(MVT::v2f64, V1);
10674 if (V2.getValueType() != MVT::v2f64)
10675 V2 = DAG.getBitcast(MVT::v2f64, V2);
10676 }
10677
10678 ShufflesHandledWithVPERM++;
10679 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10680 LLVM_DEBUG({
10681 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10682 if (Opcode == PPCISD::XXPERM) {
10683 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10684 } else {
10685 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10686 }
10687 SVOp->dump();
10688 dbgs() << "With the following permute control vector:\n";
10689 VPermMask.dump();
10690 });
10691
10692 if (Opcode == PPCISD::XXPERM)
10693 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10694
10695 // Only need to place items backwards in LE,
10696 // the mask was properly calculated.
10697 if (isLittleEndian)
10698 std::swap(V1, V2);
10699
10700 SDValue VPERMNode =
10701 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10702
10703 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10704 return VPERMNode;
10705}
10706
10707/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10708/// vector comparison. If it is, return true and fill in Opc/isDot with
10709/// information about the intrinsic.
10710static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10711 bool &isDot, const PPCSubtarget &Subtarget) {
10712 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10713 CompareOpc = -1;
10714 isDot = false;
10715 switch (IntrinsicID) {
10716 default:
10717 return false;
10718 // Comparison predicates.
10719 case Intrinsic::ppc_altivec_vcmpbfp_p:
10720 CompareOpc = 966;
10721 isDot = true;
10722 break;
10723 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10724 CompareOpc = 198;
10725 isDot = true;
10726 break;
10727 case Intrinsic::ppc_altivec_vcmpequb_p:
10728 CompareOpc = 6;
10729 isDot = true;
10730 break;
10731 case Intrinsic::ppc_altivec_vcmpequh_p:
10732 CompareOpc = 70;
10733 isDot = true;
10734 break;
10735 case Intrinsic::ppc_altivec_vcmpequw_p:
10736 CompareOpc = 134;
10737 isDot = true;
10738 break;
10739 case Intrinsic::ppc_altivec_vcmpequd_p:
10740 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10741 CompareOpc = 199;
10742 isDot = true;
10743 } else
10744 return false;
10745 break;
10746 case Intrinsic::ppc_altivec_vcmpneb_p:
10747 case Intrinsic::ppc_altivec_vcmpneh_p:
10748 case Intrinsic::ppc_altivec_vcmpnew_p:
10749 case Intrinsic::ppc_altivec_vcmpnezb_p:
10750 case Intrinsic::ppc_altivec_vcmpnezh_p:
10751 case Intrinsic::ppc_altivec_vcmpnezw_p:
10752 if (Subtarget.hasP9Altivec()) {
10753 switch (IntrinsicID) {
10754 default:
10755 llvm_unreachable("Unknown comparison intrinsic.");
10756 case Intrinsic::ppc_altivec_vcmpneb_p:
10757 CompareOpc = 7;
10758 break;
10759 case Intrinsic::ppc_altivec_vcmpneh_p:
10760 CompareOpc = 71;
10761 break;
10762 case Intrinsic::ppc_altivec_vcmpnew_p:
10763 CompareOpc = 135;
10764 break;
10765 case Intrinsic::ppc_altivec_vcmpnezb_p:
10766 CompareOpc = 263;
10767 break;
10768 case Intrinsic::ppc_altivec_vcmpnezh_p:
10769 CompareOpc = 327;
10770 break;
10771 case Intrinsic::ppc_altivec_vcmpnezw_p:
10772 CompareOpc = 391;
10773 break;
10774 }
10775 isDot = true;
10776 } else
10777 return false;
10778 break;
10779 case Intrinsic::ppc_altivec_vcmpgefp_p:
10780 CompareOpc = 454;
10781 isDot = true;
10782 break;
10783 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10784 CompareOpc = 710;
10785 isDot = true;
10786 break;
10787 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10788 CompareOpc = 774;
10789 isDot = true;
10790 break;
10791 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10792 CompareOpc = 838;
10793 isDot = true;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10796 CompareOpc = 902;
10797 isDot = true;
10798 break;
10799 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10800 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10801 CompareOpc = 967;
10802 isDot = true;
10803 } else
10804 return false;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpgtub_p:
10807 CompareOpc = 518;
10808 isDot = true;
10809 break;
10810 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10811 CompareOpc = 582;
10812 isDot = true;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10815 CompareOpc = 646;
10816 isDot = true;
10817 break;
10818 case Intrinsic::ppc_altivec_vcmpgtud_p:
10819 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10820 CompareOpc = 711;
10821 isDot = true;
10822 } else
10823 return false;
10824 break;
10825
10826 case Intrinsic::ppc_altivec_vcmpequq:
10827 case Intrinsic::ppc_altivec_vcmpgtsq:
10828 case Intrinsic::ppc_altivec_vcmpgtuq:
10829 if (!Subtarget.isISA3_1())
10830 return false;
10831 switch (IntrinsicID) {
10832 default:
10833 llvm_unreachable("Unknown comparison intrinsic.");
10834 case Intrinsic::ppc_altivec_vcmpequq:
10835 CompareOpc = 455;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpgtsq:
10838 CompareOpc = 903;
10839 break;
10840 case Intrinsic::ppc_altivec_vcmpgtuq:
10841 CompareOpc = 647;
10842 break;
10843 }
10844 break;
10845
10846 // VSX predicate comparisons use the same infrastructure
10847 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10848 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10849 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10850 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10851 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10852 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10853 if (Subtarget.hasVSX()) {
10854 switch (IntrinsicID) {
10855 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10856 CompareOpc = 99;
10857 break;
10858 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10859 CompareOpc = 115;
10860 break;
10861 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10862 CompareOpc = 107;
10863 break;
10864 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10865 CompareOpc = 67;
10866 break;
10867 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10868 CompareOpc = 83;
10869 break;
10870 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10871 CompareOpc = 75;
10872 break;
10873 }
10874 isDot = true;
10875 } else
10876 return false;
10877 break;
10878
10879 // Normal Comparisons.
10880 case Intrinsic::ppc_altivec_vcmpbfp:
10881 CompareOpc = 966;
10882 break;
10883 case Intrinsic::ppc_altivec_vcmpeqfp:
10884 CompareOpc = 198;
10885 break;
10886 case Intrinsic::ppc_altivec_vcmpequb:
10887 CompareOpc = 6;
10888 break;
10889 case Intrinsic::ppc_altivec_vcmpequh:
10890 CompareOpc = 70;
10891 break;
10892 case Intrinsic::ppc_altivec_vcmpequw:
10893 CompareOpc = 134;
10894 break;
10895 case Intrinsic::ppc_altivec_vcmpequd:
10896 if (Subtarget.hasP8Altivec())
10897 CompareOpc = 199;
10898 else
10899 return false;
10900 break;
10901 case Intrinsic::ppc_altivec_vcmpneb:
10902 case Intrinsic::ppc_altivec_vcmpneh:
10903 case Intrinsic::ppc_altivec_vcmpnew:
10904 case Intrinsic::ppc_altivec_vcmpnezb:
10905 case Intrinsic::ppc_altivec_vcmpnezh:
10906 case Intrinsic::ppc_altivec_vcmpnezw:
10907 if (Subtarget.hasP9Altivec())
10908 switch (IntrinsicID) {
10909 default:
10910 llvm_unreachable("Unknown comparison intrinsic.");
10911 case Intrinsic::ppc_altivec_vcmpneb:
10912 CompareOpc = 7;
10913 break;
10914 case Intrinsic::ppc_altivec_vcmpneh:
10915 CompareOpc = 71;
10916 break;
10917 case Intrinsic::ppc_altivec_vcmpnew:
10918 CompareOpc = 135;
10919 break;
10920 case Intrinsic::ppc_altivec_vcmpnezb:
10921 CompareOpc = 263;
10922 break;
10923 case Intrinsic::ppc_altivec_vcmpnezh:
10924 CompareOpc = 327;
10925 break;
10926 case Intrinsic::ppc_altivec_vcmpnezw:
10927 CompareOpc = 391;
10928 break;
10929 }
10930 else
10931 return false;
10932 break;
10933 case Intrinsic::ppc_altivec_vcmpgefp:
10934 CompareOpc = 454;
10935 break;
10936 case Intrinsic::ppc_altivec_vcmpgtfp:
10937 CompareOpc = 710;
10938 break;
10939 case Intrinsic::ppc_altivec_vcmpgtsb:
10940 CompareOpc = 774;
10941 break;
10942 case Intrinsic::ppc_altivec_vcmpgtsh:
10943 CompareOpc = 838;
10944 break;
10945 case Intrinsic::ppc_altivec_vcmpgtsw:
10946 CompareOpc = 902;
10947 break;
10948 case Intrinsic::ppc_altivec_vcmpgtsd:
10949 if (Subtarget.hasP8Altivec())
10950 CompareOpc = 967;
10951 else
10952 return false;
10953 break;
10954 case Intrinsic::ppc_altivec_vcmpgtub:
10955 CompareOpc = 518;
10956 break;
10957 case Intrinsic::ppc_altivec_vcmpgtuh:
10958 CompareOpc = 582;
10959 break;
10960 case Intrinsic::ppc_altivec_vcmpgtuw:
10961 CompareOpc = 646;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpgtud:
10964 if (Subtarget.hasP8Altivec())
10965 CompareOpc = 711;
10966 else
10967 return false;
10968 break;
10969 case Intrinsic::ppc_altivec_vcmpequq_p:
10970 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10971 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10972 if (!Subtarget.isISA3_1())
10973 return false;
10974 switch (IntrinsicID) {
10975 default:
10976 llvm_unreachable("Unknown comparison intrinsic.");
10977 case Intrinsic::ppc_altivec_vcmpequq_p:
10978 CompareOpc = 455;
10979 break;
10980 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10981 CompareOpc = 903;
10982 break;
10983 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10984 CompareOpc = 647;
10985 break;
10986 }
10987 isDot = true;
10988 break;
10989 }
10990 return true;
10991}
10992
10993/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10994/// lower, do it, otherwise return null.
10995SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10996 SelectionDAG &DAG) const {
10997 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10998
10999 SDLoc dl(Op);
11000
11001 switch (IntrinsicID) {
11002 case Intrinsic::thread_pointer:
11003 // Reads the thread pointer register, used for __builtin_thread_pointer.
11004 if (Subtarget.isPPC64())
11005 return DAG.getRegister(PPC::X13, MVT::i64);
11006 return DAG.getRegister(PPC::R2, MVT::i32);
11007
11008 case Intrinsic::ppc_rldimi: {
11009 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11010 SDValue Src = Op.getOperand(1);
11011 APInt Mask = Op.getConstantOperandAPInt(4);
11012 if (Mask.isZero())
11013 return Op.getOperand(2);
11014 if (Mask.isAllOnes())
11015 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11016 uint64_t SH = Op.getConstantOperandVal(3);
11017 unsigned MB = 0, ME = 0;
11018 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11019 report_fatal_error("invalid rldimi mask!");
11020 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11021 if (ME < 63 - SH) {
11022 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11023 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11024 } else if (ME > 63 - SH) {
11025 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11026 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11027 }
11028 return SDValue(
11029 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11030 {Op.getOperand(2), Src,
11031 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11032 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11033 0);
11034 }
11035
11036 case Intrinsic::ppc_rlwimi: {
11037 APInt Mask = Op.getConstantOperandAPInt(4);
11038 if (Mask.isZero())
11039 return Op.getOperand(2);
11040 if (Mask.isAllOnes())
11041 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11042 Op.getOperand(3));
11043 unsigned MB = 0, ME = 0;
11044 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11045 report_fatal_error("invalid rlwimi mask!");
11046 return SDValue(DAG.getMachineNode(
11047 PPC::RLWIMI, dl, MVT::i32,
11048 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11049 DAG.getTargetConstant(MB, dl, MVT::i32),
11050 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11051 0);
11052 }
11053
11054 case Intrinsic::ppc_rlwnm: {
11055 if (Op.getConstantOperandVal(3) == 0)
11056 return DAG.getConstant(0, dl, MVT::i32);
11057 unsigned MB = 0, ME = 0;
11058 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11059 report_fatal_error("invalid rlwnm mask!");
11060 return SDValue(
11061 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11062 {Op.getOperand(1), Op.getOperand(2),
11063 DAG.getTargetConstant(MB, dl, MVT::i32),
11064 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11065 0);
11066 }
11067
11068 case Intrinsic::ppc_mma_disassemble_acc: {
11069 if (Subtarget.isISAFuture()) {
11070 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11071 SDValue WideVec =
11072 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11073 Op.getOperand(1)),
11074 0);
11076 SDValue Value = SDValue(WideVec.getNode(), 0);
11077 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11078
11079 SDValue Extract;
11080 Extract = DAG.getNode(
11081 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11082 Subtarget.isLittleEndian() ? Value2 : Value,
11083 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11084 dl, getPointerTy(DAG.getDataLayout())));
11085 RetOps.push_back(Extract);
11086 Extract = DAG.getNode(
11087 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11088 Subtarget.isLittleEndian() ? Value2 : Value,
11089 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11090 dl, getPointerTy(DAG.getDataLayout())));
11091 RetOps.push_back(Extract);
11092 Extract = DAG.getNode(
11093 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11094 Subtarget.isLittleEndian() ? Value : Value2,
11095 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11096 dl, getPointerTy(DAG.getDataLayout())));
11097 RetOps.push_back(Extract);
11098 Extract = DAG.getNode(
11099 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11100 Subtarget.isLittleEndian() ? Value : Value2,
11101 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11102 dl, getPointerTy(DAG.getDataLayout())));
11103 RetOps.push_back(Extract);
11104 return DAG.getMergeValues(RetOps, dl);
11105 }
11106 [[fallthrough]];
11107 }
11108 case Intrinsic::ppc_vsx_disassemble_pair: {
11109 int NumVecs = 2;
11110 SDValue WideVec = Op.getOperand(1);
11111 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11112 NumVecs = 4;
11113 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11114 }
11116 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11117 SDValue Extract = DAG.getNode(
11118 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11119 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11120 : VecNo,
11121 dl, getPointerTy(DAG.getDataLayout())));
11122 RetOps.push_back(Extract);
11123 }
11124 return DAG.getMergeValues(RetOps, dl);
11125 }
11126
11127 case Intrinsic::ppc_mma_build_dmr: {
11130 for (int i = 1; i < 9; i += 2) {
11131 SDValue Hi = Op.getOperand(i);
11132 SDValue Lo = Op.getOperand(i + 1);
11133 if (Hi->getOpcode() == ISD::LOAD)
11134 Chains.push_back(Hi.getValue(1));
11135 if (Lo->getOpcode() == ISD::LOAD)
11136 Chains.push_back(Lo.getValue(1));
11137 Pairs.push_back(
11138 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11139 }
11140 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11141 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11142 return DAG.getMergeValues({Value, TF}, dl);
11143 }
11144
11145 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11146 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11147 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11148 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11149 "Specify P of 0 or 1 for lower or upper 512 bytes");
11150 unsigned HiLo = Idx->getSExtValue();
11151 unsigned Opcode;
11152 unsigned Subx;
11153 if (HiLo == 0) {
11154 Opcode = PPC::DMXXEXTFDMR512;
11155 Subx = PPC::sub_wacc_lo;
11156 } else {
11157 Opcode = PPC::DMXXEXTFDMR512_HI;
11158 Subx = PPC::sub_wacc_hi;
11159 }
11160 SDValue Subreg(
11161 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11162 Op.getOperand(1),
11163 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11164 0);
11165 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11166 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11167 }
11168
11169 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11170 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11171 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11172 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11173 "Specify a dmr row pair 0-3");
11174 unsigned IdxVal = Idx->getSExtValue();
11175 unsigned Subx;
11176 switch (IdxVal) {
11177 case 0:
11178 Subx = PPC::sub_dmrrowp0;
11179 break;
11180 case 1:
11181 Subx = PPC::sub_dmrrowp1;
11182 break;
11183 case 2:
11184 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11185 break;
11186 case 3:
11187 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11188 break;
11189 }
11190 SDValue Subreg(
11191 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11192 Op.getOperand(1),
11193 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11194 0);
11195 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11196 return SDValue(
11197 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11198 0);
11199 }
11200
11201 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11202 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11203 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11204 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11205 "Specify P of 0 or 1 for lower or upper 512 bytes");
11206 unsigned HiLo = Idx->getSExtValue();
11207 unsigned Opcode;
11208 unsigned Subx;
11209 if (HiLo == 0) {
11210 Opcode = PPC::DMXXINSTDMR512;
11211 Subx = PPC::sub_wacc_lo;
11212 } else {
11213 Opcode = PPC::DMXXINSTDMR512_HI;
11214 Subx = PPC::sub_wacc_hi;
11215 }
11216 SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
11217 SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
11218 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11219 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11220 Op.getOperand(1), Wacc, SubReg),
11221 0);
11222 }
11223
11224 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11225 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11226 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11227 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11228 "Specify a dmr row pair 0-3");
11229 unsigned IdxVal = Idx->getSExtValue();
11230 unsigned Subx;
11231 switch (IdxVal) {
11232 case 0:
11233 Subx = PPC::sub_dmrrowp0;
11234 break;
11235 case 1:
11236 Subx = PPC::sub_dmrrowp1;
11237 break;
11238 case 2:
11239 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11240 break;
11241 case 3:
11242 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11243 break;
11244 }
11245 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11246 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11247 SDValue Ops[] = {Op.getOperand(2), P};
11248 SDValue DMRRowp = SDValue(
11249 DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
11250 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11251 Op.getOperand(1), DMRRowp, SubReg),
11252 0);
11253 }
11254
11255 case Intrinsic::ppc_mma_xxmfacc:
11256 case Intrinsic::ppc_mma_xxmtacc: {
11257 // Allow pre-isa-future subtargets to lower as normal.
11258 if (!Subtarget.isISAFuture())
11259 return SDValue();
11260 // The intrinsics for xxmtacc and xxmfacc take one argument of
11261 // type v512i1, for future cpu the corresponding wacc instruction
11262 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11263 // the need to produce the xxm[t|f]acc.
11264 SDValue WideVec = Op.getOperand(1);
11265 DAG.ReplaceAllUsesWith(Op, WideVec);
11266 return SDValue();
11267 }
11268
11269 case Intrinsic::ppc_unpack_longdouble: {
11270 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11271 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11272 "Argument of long double unpack must be 0 or 1!");
11273 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11274 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11275 Idx->getValueType(0)));
11276 }
11277
11278 case Intrinsic::ppc_compare_exp_lt:
11279 case Intrinsic::ppc_compare_exp_gt:
11280 case Intrinsic::ppc_compare_exp_eq:
11281 case Intrinsic::ppc_compare_exp_uo: {
11282 unsigned Pred;
11283 switch (IntrinsicID) {
11284 case Intrinsic::ppc_compare_exp_lt:
11285 Pred = PPC::PRED_LT;
11286 break;
11287 case Intrinsic::ppc_compare_exp_gt:
11288 Pred = PPC::PRED_GT;
11289 break;
11290 case Intrinsic::ppc_compare_exp_eq:
11291 Pred = PPC::PRED_EQ;
11292 break;
11293 case Intrinsic::ppc_compare_exp_uo:
11294 Pred = PPC::PRED_UN;
11295 break;
11296 }
11297 return SDValue(
11298 DAG.getMachineNode(
11299 PPC::SELECT_CC_I4, dl, MVT::i32,
11300 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11301 Op.getOperand(1), Op.getOperand(2)),
11302 0),
11303 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11304 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11305 0);
11306 }
11307 case Intrinsic::ppc_test_data_class: {
11308 EVT OpVT = Op.getOperand(1).getValueType();
11309 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11310 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11311 : PPC::XSTSTDCSP);
11312 return SDValue(
11313 DAG.getMachineNode(
11314 PPC::SELECT_CC_I4, dl, MVT::i32,
11315 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11316 Op.getOperand(1)),
11317 0),
11318 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11319 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11320 0);
11321 }
11322 case Intrinsic::ppc_fnmsub: {
11323 EVT VT = Op.getOperand(1).getValueType();
11324 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11325 return DAG.getNode(
11326 ISD::FNEG, dl, VT,
11327 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11328 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11329 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11330 Op.getOperand(2), Op.getOperand(3));
11331 }
11332 case Intrinsic::ppc_convert_f128_to_ppcf128:
11333 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11334 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11335 ? RTLIB::CONVERT_PPCF128_F128
11336 : RTLIB::CONVERT_F128_PPCF128;
11337 MakeLibCallOptions CallOptions;
11338 std::pair<SDValue, SDValue> Result =
11339 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11340 dl, SDValue());
11341 return Result.first;
11342 }
11343 case Intrinsic::ppc_maxfe:
11344 case Intrinsic::ppc_maxfl:
11345 case Intrinsic::ppc_maxfs:
11346 case Intrinsic::ppc_minfe:
11347 case Intrinsic::ppc_minfl:
11348 case Intrinsic::ppc_minfs: {
11349 EVT VT = Op.getValueType();
11350 assert(
11351 all_of(Op->ops().drop_front(4),
11352 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11353 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11354 (void)VT;
11356 if (IntrinsicID == Intrinsic::ppc_minfe ||
11357 IntrinsicID == Intrinsic::ppc_minfl ||
11358 IntrinsicID == Intrinsic::ppc_minfs)
11359 CC = ISD::SETLT;
11360 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11361 SDValue Res = Op.getOperand(I);
11362 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11363 Res =
11364 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11365 }
11366 return Res;
11367 }
11368 }
11369
11370 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11371 // opcode number of the comparison.
11372 int CompareOpc;
11373 bool isDot;
11374 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11375 return SDValue(); // Don't custom lower most intrinsics.
11376
11377 // If this is a non-dot comparison, make the VCMP node and we are done.
11378 if (!isDot) {
11379 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11380 Op.getOperand(1), Op.getOperand(2),
11381 DAG.getConstant(CompareOpc, dl, MVT::i32));
11382 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11383 }
11384
11385 // Create the PPCISD altivec 'dot' comparison node.
11386 SDValue Ops[] = {
11387 Op.getOperand(2), // LHS
11388 Op.getOperand(3), // RHS
11389 DAG.getConstant(CompareOpc, dl, MVT::i32)
11390 };
11391 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11392 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11393
11394 // Unpack the result based on how the target uses it.
11395 unsigned BitNo; // Bit # of CR6.
11396 bool InvertBit; // Invert result?
11397 unsigned Bitx;
11398 unsigned SetOp;
11399 switch (Op.getConstantOperandVal(1)) {
11400 default: // Can't happen, don't crash on invalid number though.
11401 case 0: // Return the value of the EQ bit of CR6.
11402 BitNo = 0;
11403 InvertBit = false;
11404 Bitx = PPC::sub_eq;
11405 SetOp = PPCISD::SETBC;
11406 break;
11407 case 1: // Return the inverted value of the EQ bit of CR6.
11408 BitNo = 0;
11409 InvertBit = true;
11410 Bitx = PPC::sub_eq;
11411 SetOp = PPCISD::SETBCR;
11412 break;
11413 case 2: // Return the value of the LT bit of CR6.
11414 BitNo = 2;
11415 InvertBit = false;
11416 Bitx = PPC::sub_lt;
11417 SetOp = PPCISD::SETBC;
11418 break;
11419 case 3: // Return the inverted value of the LT bit of CR6.
11420 BitNo = 2;
11421 InvertBit = true;
11422 Bitx = PPC::sub_lt;
11423 SetOp = PPCISD::SETBCR;
11424 break;
11425 }
11426
11427 SDValue GlueOp = CompNode.getValue(1);
11428 if (Subtarget.isISA3_1()) {
11429 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11430 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11431 SDValue CRBit =
11432 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11433 CR6Reg, SubRegIdx, GlueOp),
11434 0);
11435 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11436 }
11437
11438 // Now that we have the comparison, emit a copy from the CR to a GPR.
11439 // This is flagged to the above dot comparison.
11440 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11441 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11442
11443 // Shift the bit into the low position.
11444 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11445 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11446 // Isolate the bit.
11447 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11448 DAG.getConstant(1, dl, MVT::i32));
11449
11450 // If we are supposed to, toggle the bit.
11451 if (InvertBit)
11452 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11453 DAG.getConstant(1, dl, MVT::i32));
11454 return Flags;
11455}
11456
11457SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11458 SelectionDAG &DAG) const {
11459 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11460 // the beginning of the argument list.
11461 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11462 SDLoc DL(Op);
11463 switch (Op.getConstantOperandVal(ArgStart)) {
11464 case Intrinsic::ppc_cfence: {
11465 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11466 SDValue Val = Op.getOperand(ArgStart + 1);
11467 EVT Ty = Val.getValueType();
11468 if (Ty == MVT::i128) {
11469 // FIXME: Testing one of two paired registers is sufficient to guarantee
11470 // ordering?
11471 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11472 }
11473 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11474 return SDValue(
11475 DAG.getMachineNode(
11476 Opcode, DL, MVT::Other,
11477 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11478 Op.getOperand(0)),
11479 0);
11480 }
11481 case Intrinsic::ppc_mma_disassemble_dmr: {
11482 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11483 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11484 }
11485 default:
11486 break;
11487 }
11488 return SDValue();
11489}
11490
11491// Lower scalar BSWAP64 to xxbrd.
11492SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11493 SDLoc dl(Op);
11494 if (!Subtarget.isPPC64())
11495 return Op;
11496 // MTVSRDD
11497 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11498 Op.getOperand(0));
11499 // XXBRD
11500 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11501 // MFVSRD
11502 int VectorIndex = 0;
11503 if (Subtarget.isLittleEndian())
11504 VectorIndex = 1;
11505 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11506 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11507 return Op;
11508}
11509
11510// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11511// compared to a value that is atomically loaded (atomic loads zero-extend).
11512SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11513 SelectionDAG &DAG) const {
11514 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11515 "Expecting an atomic compare-and-swap here.");
11516 SDLoc dl(Op);
11517 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11518 EVT MemVT = AtomicNode->getMemoryVT();
11519 if (MemVT.getSizeInBits() >= 32)
11520 return Op;
11521
11522 SDValue CmpOp = Op.getOperand(2);
11523 // If this is already correctly zero-extended, leave it alone.
11524 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11525 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11526 return Op;
11527
11528 // Clear the high bits of the compare operand.
11529 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11530 SDValue NewCmpOp =
11531 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11532 DAG.getConstant(MaskVal, dl, MVT::i32));
11533
11534 // Replace the existing compare operand with the properly zero-extended one.
11536 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11537 Ops.push_back(AtomicNode->getOperand(i));
11538 Ops[2] = NewCmpOp;
11539 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11540 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11541 auto NodeTy =
11542 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11543 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11544}
11545
11546SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11547 SelectionDAG &DAG) const {
11548 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11549 EVT MemVT = N->getMemoryVT();
11550 assert(MemVT.getSimpleVT() == MVT::i128 &&
11551 "Expect quadword atomic operations");
11552 SDLoc dl(N);
11553 unsigned Opc = N->getOpcode();
11554 switch (Opc) {
11555 case ISD::ATOMIC_LOAD: {
11556 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11557 // lowered to ppc instructions by pattern matching instruction selector.
11558 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11560 N->getOperand(0),
11561 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11562 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11563 Ops.push_back(N->getOperand(I));
11564 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11565 Ops, MemVT, N->getMemOperand());
11566 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11567 SDValue ValHi =
11568 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11569 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11570 DAG.getConstant(64, dl, MVT::i32));
11571 SDValue Val =
11572 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11573 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11574 {Val, LoadedVal.getValue(2)});
11575 }
11576 case ISD::ATOMIC_STORE: {
11577 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11578 // lowered to ppc instructions by pattern matching instruction selector.
11579 SDVTList Tys = DAG.getVTList(MVT::Other);
11581 N->getOperand(0),
11582 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11583 SDValue Val = N->getOperand(1);
11584 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11585 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11586 DAG.getConstant(64, dl, MVT::i32));
11587 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11588 Ops.push_back(ValLo);
11589 Ops.push_back(ValHi);
11590 Ops.push_back(N->getOperand(2));
11591 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11592 N->getMemOperand());
11593 }
11594 default:
11595 llvm_unreachable("Unexpected atomic opcode");
11596 }
11597}
11598
11600 SelectionDAG &DAG,
11601 const PPCSubtarget &Subtarget) {
11602 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11603
11604 enum DataClassMask {
11605 DC_NAN = 1 << 6,
11606 DC_NEG_INF = 1 << 4,
11607 DC_POS_INF = 1 << 5,
11608 DC_NEG_ZERO = 1 << 2,
11609 DC_POS_ZERO = 1 << 3,
11610 DC_NEG_SUBNORM = 1,
11611 DC_POS_SUBNORM = 1 << 1,
11612 };
11613
11614 EVT VT = Op.getValueType();
11615
11616 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11617 : VT == MVT::f64 ? PPC::XSTSTDCDP
11618 : PPC::XSTSTDCSP;
11619
11620 if (Mask == fcAllFlags)
11621 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11622 if (Mask == 0)
11623 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11624
11625 // When it's cheaper or necessary to test reverse flags.
11626 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11627 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11628 return DAG.getNOT(Dl, Rev, MVT::i1);
11629 }
11630
11631 // Power doesn't support testing whether a value is 'normal'. Test the rest
11632 // first, and test if it's 'not not-normal' with expected sign.
11633 if (Mask & fcNormal) {
11634 SDValue Rev(DAG.getMachineNode(
11635 TestOp, Dl, MVT::i32,
11636 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11637 DC_NEG_ZERO | DC_POS_ZERO |
11638 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11639 Dl, MVT::i32),
11640 Op),
11641 0);
11642 // Sign are stored in CR bit 0, result are in CR bit 2.
11643 SDValue Sign(
11644 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11645 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11646 0);
11647 SDValue Normal(DAG.getNOT(
11648 Dl,
11650 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11651 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11652 0),
11653 MVT::i1));
11654 if (Mask & fcPosNormal)
11655 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11656 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11657 if (Mask == fcPosNormal || Mask == fcNegNormal)
11658 return Result;
11659
11660 return DAG.getNode(
11661 ISD::OR, Dl, MVT::i1,
11662 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11663 }
11664
11665 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11666 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11667 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11668 bool IsQuiet = Mask & fcQNan;
11669 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11670
11671 // Quietness is determined by the first bit in fraction field.
11672 uint64_t QuietMask = 0;
11673 SDValue HighWord;
11674 if (VT == MVT::f128) {
11675 HighWord = DAG.getNode(
11676 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11677 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11678 QuietMask = 0x8000;
11679 } else if (VT == MVT::f64) {
11680 if (Subtarget.isPPC64()) {
11681 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11682 DAG.getBitcast(MVT::i64, Op),
11683 DAG.getConstant(1, Dl, MVT::i32));
11684 } else {
11685 SDValue Vec = DAG.getBitcast(
11686 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11687 HighWord = DAG.getNode(
11688 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11689 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11690 }
11691 QuietMask = 0x80000;
11692 } else if (VT == MVT::f32) {
11693 HighWord = DAG.getBitcast(MVT::i32, Op);
11694 QuietMask = 0x400000;
11695 }
11696 SDValue NanRes = DAG.getSetCC(
11697 Dl, MVT::i1,
11698 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11699 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11700 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11701 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11702 if (Mask == fcQNan || Mask == fcSNan)
11703 return NanRes;
11704
11705 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11706 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11707 NanRes);
11708 }
11709
11710 unsigned NativeMask = 0;
11711 if ((Mask & fcNan) == fcNan)
11712 NativeMask |= DC_NAN;
11713 if (Mask & fcNegInf)
11714 NativeMask |= DC_NEG_INF;
11715 if (Mask & fcPosInf)
11716 NativeMask |= DC_POS_INF;
11717 if (Mask & fcNegZero)
11718 NativeMask |= DC_NEG_ZERO;
11719 if (Mask & fcPosZero)
11720 NativeMask |= DC_POS_ZERO;
11721 if (Mask & fcNegSubnormal)
11722 NativeMask |= DC_NEG_SUBNORM;
11723 if (Mask & fcPosSubnormal)
11724 NativeMask |= DC_POS_SUBNORM;
11725 return SDValue(
11726 DAG.getMachineNode(
11727 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11729 TestOp, Dl, MVT::i32,
11730 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11731 0),
11732 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11733 0);
11734}
11735
11736SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11737 SelectionDAG &DAG) const {
11738 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11739 SDValue LHS = Op.getOperand(0);
11740 uint64_t RHSC = Op.getConstantOperandVal(1);
11741 SDLoc Dl(Op);
11742 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11743 if (LHS.getValueType() == MVT::ppcf128) {
11744 // The higher part determines the value class.
11745 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11746 DAG.getConstant(1, Dl, MVT::i32));
11747 }
11748
11749 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11750}
11751
11752// Adjust the length value for a load/store with length to account for the
11753// instructions requiring a left justified length, and for non-byte element
11754// types requiring scaling by element size.
11755static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11756 SelectionDAG &DAG) {
11757 SDLoc dl(Val);
11758 EVT VT = Val->getValueType(0);
11759 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11760 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11761 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11762 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11763}
11764
11765SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11766 auto VPLD = cast<VPLoadSDNode>(Op);
11767 bool Future = Subtarget.isISAFuture();
11768 SDLoc dl(Op);
11769 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11770 "Mask predication not supported");
11771 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11772 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11773 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11774 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11775 Len = AdjustLength(Len, EltBits, !Future, DAG);
11776 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11777 VPLD->getOperand(1), Len};
11778 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11779 SDValue VPL =
11781 VPLD->getMemoryVT(), VPLD->getMemOperand());
11782 return VPL;
11783}
11784
11785SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11786 auto VPST = cast<VPStoreSDNode>(Op);
11787 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11788 "Mask predication not supported");
11789 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11790 SDLoc dl(Op);
11791 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11792 unsigned EltBits =
11793 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11794 bool Future = Subtarget.isISAFuture();
11795 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11796 Len = AdjustLength(Len, EltBits, !Future, DAG);
11797 SDValue Ops[] = {
11798 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11799 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11800 VPST->getOperand(2), Len};
11801 SDVTList Tys = DAG.getVTList(MVT::Other);
11802 SDValue VPS =
11804 VPST->getMemoryVT(), VPST->getMemOperand());
11805 return VPS;
11806}
11807
11808SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11809 SelectionDAG &DAG) const {
11810 SDLoc dl(Op);
11811
11812 MachineFunction &MF = DAG.getMachineFunction();
11813 SDValue Op0 = Op.getOperand(0);
11814 EVT ValVT = Op0.getValueType();
11815 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11816 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11817 int64_t IntVal = Op.getConstantOperandVal(0);
11818 if (IntVal >= -16 && IntVal <= 15)
11819 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11820 dl);
11821 }
11822
11823 ReuseLoadInfo RLI;
11824 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11825 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11826 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11827 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11828
11829 MachineMemOperand *MMO =
11831 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11832 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11834 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11835 MVT::i32, MMO);
11836 if (RLI.ResChain)
11837 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11838 return Bits.getValue(0);
11839 }
11840
11841 // Create a stack slot that is 16-byte aligned.
11842 MachineFrameInfo &MFI = MF.getFrameInfo();
11843 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11844 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11845 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11846
11847 SDValue Val = Op0;
11848 // P10 hardware store forwarding requires that a single store contains all
11849 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11850 // to avoid load hit store on P10 when running binaries compiled for older
11851 // processors by generating two mergeable scalar stores to forward with the
11852 // vector load.
11853 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11854 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11855 ValVT.getSizeInBits() <= 64) {
11856 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11857 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11858 SDValue ShiftBy = DAG.getConstant(
11859 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11860 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11861 SDValue Plus8 =
11862 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11863 SDValue Store2 =
11864 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11865 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11866 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11867 MachinePointerInfo());
11868 }
11869
11870 // Store the input value into Value#0 of the stack slot.
11871 SDValue Store =
11872 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11873 // Load it out.
11874 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11875}
11876
11877SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11878 SelectionDAG &DAG) const {
11879 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11880 "Should only be called for ISD::INSERT_VECTOR_ELT");
11881
11882 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11883
11884 EVT VT = Op.getValueType();
11885 SDLoc dl(Op);
11886 SDValue V1 = Op.getOperand(0);
11887 SDValue V2 = Op.getOperand(1);
11888
11889 if (VT == MVT::v2f64 && C)
11890 return Op;
11891
11892 if (Subtarget.hasP9Vector()) {
11893 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11894 // because on P10, it allows this specific insert_vector_elt load pattern to
11895 // utilize the refactored load and store infrastructure in order to exploit
11896 // prefixed loads.
11897 // On targets with inexpensive direct moves (Power9 and up), a
11898 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11899 // load since a single precision load will involve conversion to double
11900 // precision on the load followed by another conversion to single precision.
11901 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11902 (isa<LoadSDNode>(V2))) {
11903 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11904 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11905 SDValue InsVecElt =
11906 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11907 BitcastLoad, Op.getOperand(2));
11908 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11909 }
11910 }
11911
11912 if (Subtarget.isISA3_1()) {
11913 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11914 return SDValue();
11915 // On P10, we have legal lowering for constant and variable indices for
11916 // all vectors.
11917 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11918 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11919 return Op;
11920 }
11921
11922 // Before P10, we have legal lowering for constant indices but not for
11923 // variable ones.
11924 if (!C)
11925 return SDValue();
11926
11927 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11928 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11929 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11930 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11931 unsigned InsertAtElement = C->getZExtValue();
11932 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11933 if (Subtarget.isLittleEndian()) {
11934 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11935 }
11936 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11937 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11938 }
11939 return Op;
11940}
11941
11942SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11943 SelectionDAG &DAG) const {
11944 SDLoc dl(Op);
11945 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11946 SDValue LoadChain = LN->getChain();
11947 SDValue BasePtr = LN->getBasePtr();
11948 EVT VT = Op.getValueType();
11949 bool IsV1024i1 = VT == MVT::v1024i1;
11950 bool IsV2048i1 = VT == MVT::v2048i1;
11951
11952 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
11953 // Dense Math dmr pair registers, respectively.
11954 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
11955 (void)IsV2048i1;
11956 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11957 "Dense Math support required.");
11958 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11959
11961 SmallVector<SDValue, 8> LoadChains;
11962
11963 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
11964 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
11965 MachineMemOperand *MMO = LN->getMemOperand();
11966 unsigned NumVecs = VT.getSizeInBits() / 256;
11967 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11968 MachineMemOperand *NewMMO =
11969 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
11970 if (Idx > 0) {
11971 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11972 DAG.getConstant(32, dl, BasePtr.getValueType()));
11973 LoadOps[2] = BasePtr;
11974 }
11976 DAG.getVTList(MVT::v256i1, MVT::Other),
11977 LoadOps, MVT::v256i1, NewMMO);
11978 LoadChains.push_back(Ld.getValue(1));
11979 Loads.push_back(Ld);
11980 }
11981
11982 if (Subtarget.isLittleEndian()) {
11983 std::reverse(Loads.begin(), Loads.end());
11984 std::reverse(LoadChains.begin(), LoadChains.end());
11985 }
11986
11987 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11988 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[0],
11989 Loads[1]),
11990 0);
11991 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
11992 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
11993 Loads[2], Loads[3]),
11994 0);
11995 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
11996 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
11997 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11998
11999 SDValue Value =
12000 SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12001
12002 if (IsV1024i1) {
12003 return DAG.getMergeValues({Value, TF}, dl);
12004 }
12005
12006 // Handle Loads for V2048i1 which represents a dmr pair.
12007 SDValue DmrPValue;
12008 SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
12009 Loads[4], Loads[5]),
12010 0);
12011 SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12012 Loads[6], Loads[7]),
12013 0);
12014 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
12015 SDValue Dmr1Value = SDValue(
12016 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
12017
12018 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12019 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12020
12021 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12022 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12023
12024 DmrPValue = SDValue(
12025 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12026
12027 return DAG.getMergeValues({DmrPValue, TF}, dl);
12028}
12029
12030SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12031 const SDLoc &dl,
12032 SelectionDAG &DAG) const {
12033 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
12034 Pairs[1]),
12035 0);
12036 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12037 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12038 Pairs[2], Pairs[3]),
12039 0);
12040 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12041 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12042
12043 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12044 {RC, Lo, LoSub, Hi, HiSub}),
12045 0);
12046}
12047
12048SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12049 SelectionDAG &DAG) const {
12050 SDLoc dl(Op);
12051 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12052 SDValue LoadChain = LN->getChain();
12053 SDValue BasePtr = LN->getBasePtr();
12054 EVT VT = Op.getValueType();
12055
12056 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12057 return LowerDMFVectorLoad(Op, DAG);
12058
12059 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12060 return Op;
12061
12062 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12063 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12064 // 2 or 4 vsx registers.
12065 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12066 "Type unsupported without MMA");
12067 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12068 "Type unsupported without paired vector support");
12069 Align Alignment = LN->getAlign();
12071 SmallVector<SDValue, 4> LoadChains;
12072 unsigned NumVecs = VT.getSizeInBits() / 128;
12073 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12074 SDValue Load =
12075 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12076 LN->getPointerInfo().getWithOffset(Idx * 16),
12077 commonAlignment(Alignment, Idx * 16),
12078 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12079 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12080 DAG.getConstant(16, dl, BasePtr.getValueType()));
12081 Loads.push_back(Load);
12082 LoadChains.push_back(Load.getValue(1));
12083 }
12084 if (Subtarget.isLittleEndian()) {
12085 std::reverse(Loads.begin(), Loads.end());
12086 std::reverse(LoadChains.begin(), LoadChains.end());
12087 }
12088 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12089 SDValue Value =
12090 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12091 dl, VT, Loads);
12092 SDValue RetOps[] = {Value, TF};
12093 return DAG.getMergeValues(RetOps, dl);
12094}
12095
12096SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12097 SelectionDAG &DAG) const {
12098
12099 SDLoc dl(Op);
12100 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12101 SDValue StoreChain = SN->getChain();
12102 SDValue BasePtr = SN->getBasePtr();
12105 EVT VT = SN->getValue().getValueType();
12106 bool IsV1024i1 = VT == MVT::v1024i1;
12107 bool IsV2048i1 = VT == MVT::v2048i1;
12108
12109 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12110 // Dense Math dmr pair registers, respectively.
12111 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12112 (void)IsV2048i1;
12113 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12114 "Dense Math support required.");
12115 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12116
12117 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12118 if (IsV1024i1) {
12120 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12121 Op.getOperand(1),
12122 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12123 0);
12125 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12126 Op.getOperand(1),
12127 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12128 0);
12129 MachineSDNode *ExtNode =
12130 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12131 Values.push_back(SDValue(ExtNode, 0));
12132 Values.push_back(SDValue(ExtNode, 1));
12133 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12134 Values.push_back(SDValue(ExtNode, 0));
12135 Values.push_back(SDValue(ExtNode, 1));
12136 } else {
12137 // This corresponds to v2048i1 which represents a dmr pair.
12138 SDValue Dmr0(
12139 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12140 Op.getOperand(1),
12141 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12142 0);
12143
12144 SDValue Dmr1(
12145 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12146 Op.getOperand(1),
12147 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12148 0);
12149
12150 SDValue Dmr0Lo(DAG.getMachineNode(
12151 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12152 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12153 0);
12154
12155 SDValue Dmr0Hi(DAG.getMachineNode(
12156 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12157 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12158 0);
12159
12160 SDValue Dmr1Lo(DAG.getMachineNode(
12161 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12162 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12163 0);
12164
12165 SDValue Dmr1Hi(DAG.getMachineNode(
12166 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12167 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12168 0);
12169
12170 MachineSDNode *ExtNode =
12171 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12172 Values.push_back(SDValue(ExtNode, 0));
12173 Values.push_back(SDValue(ExtNode, 1));
12174 ExtNode =
12175 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12176 Values.push_back(SDValue(ExtNode, 0));
12177 Values.push_back(SDValue(ExtNode, 1));
12178 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12179 Values.push_back(SDValue(ExtNode, 0));
12180 Values.push_back(SDValue(ExtNode, 1));
12181 ExtNode =
12182 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12183 Values.push_back(SDValue(ExtNode, 0));
12184 Values.push_back(SDValue(ExtNode, 1));
12185 }
12186
12187 if (Subtarget.isLittleEndian())
12188 std::reverse(Values.begin(), Values.end());
12189
12190 SDVTList Tys = DAG.getVTList(MVT::Other);
12192 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12193 Values[0], BasePtr};
12194 MachineMemOperand *MMO = SN->getMemOperand();
12195 unsigned NumVecs = VT.getSizeInBits() / 256;
12196 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12197 MachineMemOperand *NewMMO =
12198 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12199 if (Idx > 0) {
12200 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12201 DAG.getConstant(32, dl, BasePtr.getValueType()));
12202 Ops[3] = BasePtr;
12203 }
12204 Ops[2] = Values[Idx];
12206 MVT::v256i1, NewMMO);
12207 Stores.push_back(St);
12208 }
12209
12210 SDValue TF = DAG.getTokenFactor(dl, Stores);
12211 return TF;
12212}
12213
12214SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12215 SelectionDAG &DAG) const {
12216 SDLoc dl(Op);
12217 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12218 SDValue StoreChain = SN->getChain();
12219 SDValue BasePtr = SN->getBasePtr();
12220 SDValue Value = SN->getValue();
12221 SDValue Value2 = SN->getValue();
12222 EVT StoreVT = Value.getValueType();
12223
12224 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12225 return LowerDMFVectorStore(Op, DAG);
12226
12227 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12228 return Op;
12229
12230 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12231 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12232 // underlying registers individually.
12233 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12234 "Type unsupported without MMA");
12235 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12236 "Type unsupported without paired vector support");
12237 Align Alignment = SN->getAlign();
12239 unsigned NumVecs = 2;
12240 if (StoreVT == MVT::v512i1) {
12241 if (Subtarget.isISAFuture()) {
12242 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12243 MachineSDNode *ExtNode = DAG.getMachineNode(
12244 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12245
12246 Value = SDValue(ExtNode, 0);
12247 Value2 = SDValue(ExtNode, 1);
12248 } else
12249 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12250 NumVecs = 4;
12251 }
12252 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12253 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12254 SDValue Elt;
12255 if (Subtarget.isISAFuture()) {
12256 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12257 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12258 Idx > 1 ? Value2 : Value,
12259 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12260 } else
12261 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12262 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12263
12264 SDValue Store =
12265 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12266 SN->getPointerInfo().getWithOffset(Idx * 16),
12267 commonAlignment(Alignment, Idx * 16),
12268 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12269 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12270 DAG.getConstant(16, dl, BasePtr.getValueType()));
12271 Stores.push_back(Store);
12272 }
12273 SDValue TF = DAG.getTokenFactor(dl, Stores);
12274 return TF;
12275}
12276
12277SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12278 SDLoc dl(Op);
12279 if (Op.getValueType() == MVT::v4i32) {
12280 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12281
12282 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12283 // +16 as shift amt.
12284 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12285 SDValue RHSSwap = // = vrlw RHS, 16
12286 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12287
12288 // Shrinkify inputs to v8i16.
12289 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12290 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12291 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12292
12293 // Low parts multiplied together, generating 32-bit results (we ignore the
12294 // top parts).
12295 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12296 LHS, RHS, DAG, dl, MVT::v4i32);
12297
12298 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12299 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12300 // Shift the high parts up 16 bits.
12301 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12302 Neg16, DAG, dl);
12303 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12304 } else if (Op.getValueType() == MVT::v16i8) {
12305 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12306 bool isLittleEndian = Subtarget.isLittleEndian();
12307
12308 // Multiply the even 8-bit parts, producing 16-bit sums.
12309 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12310 LHS, RHS, DAG, dl, MVT::v8i16);
12311 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12312
12313 // Multiply the odd 8-bit parts, producing 16-bit sums.
12314 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12315 LHS, RHS, DAG, dl, MVT::v8i16);
12316 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12317
12318 // Merge the results together. Because vmuleub and vmuloub are
12319 // instructions with a big-endian bias, we must reverse the
12320 // element numbering and reverse the meaning of "odd" and "even"
12321 // when generating little endian code.
12322 int Ops[16];
12323 for (unsigned i = 0; i != 8; ++i) {
12324 if (isLittleEndian) {
12325 Ops[i*2 ] = 2*i;
12326 Ops[i*2+1] = 2*i+16;
12327 } else {
12328 Ops[i*2 ] = 2*i+1;
12329 Ops[i*2+1] = 2*i+1+16;
12330 }
12331 }
12332 if (isLittleEndian)
12333 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12334 else
12335 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12336 } else {
12337 llvm_unreachable("Unknown mul to lower!");
12338 }
12339}
12340
12341SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12342 bool IsStrict = Op->isStrictFPOpcode();
12343 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12344 !Subtarget.hasP9Vector())
12345 return SDValue();
12346
12347 return Op;
12348}
12349
12350// Custom lowering for fpext vf32 to v2f64
12351SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12352
12353 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12354 "Should only be called for ISD::FP_EXTEND");
12355
12356 // FIXME: handle extends from half precision float vectors on P9.
12357 // We only want to custom lower an extend from v2f32 to v2f64.
12358 if (Op.getValueType() != MVT::v2f64 ||
12359 Op.getOperand(0).getValueType() != MVT::v2f32)
12360 return SDValue();
12361
12362 SDLoc dl(Op);
12363 SDValue Op0 = Op.getOperand(0);
12364
12365 switch (Op0.getOpcode()) {
12366 default:
12367 return SDValue();
12369 assert(Op0.getNumOperands() == 2 &&
12371 "Node should have 2 operands with second one being a constant!");
12372
12373 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12374 return SDValue();
12375
12376 // Custom lower is only done for high or low doubleword.
12377 int Idx = Op0.getConstantOperandVal(1);
12378 if (Idx % 2 != 0)
12379 return SDValue();
12380
12381 // Since input is v4f32, at this point Idx is either 0 or 2.
12382 // Shift to get the doubleword position we want.
12383 int DWord = Idx >> 1;
12384
12385 // High and low word positions are different on little endian.
12386 if (Subtarget.isLittleEndian())
12387 DWord ^= 0x1;
12388
12389 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12390 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12391 }
12392 case ISD::FADD:
12393 case ISD::FMUL:
12394 case ISD::FSUB: {
12395 SDValue NewLoad[2];
12396 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12397 // Ensure both input are loads.
12398 SDValue LdOp = Op0.getOperand(i);
12399 if (LdOp.getOpcode() != ISD::LOAD)
12400 return SDValue();
12401 // Generate new load node.
12402 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12403 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12404 NewLoad[i] = DAG.getMemIntrinsicNode(
12405 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12406 LD->getMemoryVT(), LD->getMemOperand());
12407 }
12408 SDValue NewOp =
12409 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12410 NewLoad[1], Op0.getNode()->getFlags());
12411 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12412 DAG.getConstant(0, dl, MVT::i32));
12413 }
12414 case ISD::LOAD: {
12415 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12416 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12417 SDValue NewLd = DAG.getMemIntrinsicNode(
12418 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12419 LD->getMemoryVT(), LD->getMemOperand());
12420 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12421 DAG.getConstant(0, dl, MVT::i32));
12422 }
12423 }
12424 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12425}
12426
12428 SelectionDAG &DAG,
12429 const PPCSubtarget &STI) {
12430 SDLoc DL(Value);
12431 if (STI.useCRBits())
12432 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12433 DAG.getConstant(1, DL, SumType),
12434 DAG.getConstant(0, DL, SumType));
12435 else
12436 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12437 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12438 Value, DAG.getAllOnesConstant(DL, SumType));
12439 return Sum.getValue(1);
12440}
12441
12443 EVT CarryType, SelectionDAG &DAG,
12444 const PPCSubtarget &STI) {
12445 SDLoc DL(Flag);
12446 SDValue Zero = DAG.getConstant(0, DL, SumType);
12447 SDValue Carry = DAG.getNode(
12448 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12449 if (STI.useCRBits())
12450 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12451 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12452}
12453
12454SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12455
12456 SDLoc DL(Op);
12457 SDNode *N = Op.getNode();
12458 EVT VT = N->getValueType(0);
12459 EVT CarryType = N->getValueType(1);
12460 unsigned Opc = N->getOpcode();
12461 bool IsAdd = Opc == ISD::UADDO;
12462 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12463 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12464 N->getOperand(0), N->getOperand(1));
12465 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12466 DAG, Subtarget);
12467 if (!IsAdd)
12468 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12469 DAG.getConstant(1UL, DL, CarryType));
12470 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12471}
12472
12473SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12474 SelectionDAG &DAG) const {
12475 SDLoc DL(Op);
12476 SDNode *N = Op.getNode();
12477 unsigned Opc = N->getOpcode();
12478 EVT VT = N->getValueType(0);
12479 EVT CarryType = N->getValueType(1);
12480 SDValue CarryOp = N->getOperand(2);
12481 bool IsAdd = Opc == ISD::UADDO_CARRY;
12482 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12483 if (!IsAdd)
12484 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12485 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12486 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12487 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12488 Op.getOperand(0), Op.getOperand(1), CarryOp);
12489 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12490 Subtarget);
12491 if (!IsAdd)
12492 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12493 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12494 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12495}
12496
12497SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12498
12499 SDLoc dl(Op);
12500 SDValue LHS = Op.getOperand(0);
12501 SDValue RHS = Op.getOperand(1);
12502 EVT VT = Op.getNode()->getValueType(0);
12503
12504 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12505
12506 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12507 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12508
12509 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12510
12511 SDValue Overflow =
12512 DAG.getNode(ISD::SRL, dl, VT, And,
12513 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12514
12515 SDValue OverflowTrunc =
12516 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12517
12518 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12519}
12520
12521/// Implements signed add with overflow detection using the rule:
12522/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12523SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12524
12525 SDLoc dl(Op);
12526 SDValue LHS = Op.getOperand(0);
12527 SDValue RHS = Op.getOperand(1);
12528 EVT VT = Op.getNode()->getValueType(0);
12529
12530 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12531
12532 // Compute ~(x xor y)
12533 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12534 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12535 // Compute (s xor x)
12536 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12537
12538 // overflow = (x eqv y) & (s xor x)
12539 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12540
12541 // Shift sign bit down to LSB
12542 SDValue Overflow =
12543 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12544 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12545 // Truncate to the overflow type (i1)
12546 SDValue OverflowTrunc =
12547 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12548
12549 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12550}
12551
12552// Lower unsigned 3-way compare producing -1/0/1.
12553SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12554 SDLoc DL(Op);
12555 SDValue A = DAG.getFreeze(Op.getOperand(0));
12556 SDValue B = DAG.getFreeze(Op.getOperand(1));
12557 EVT OpVT = A.getValueType(); // operand type
12558 EVT ResVT = Op.getValueType(); // result type
12559
12560 // First compute diff = A - B (will become subf).
12561 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12562
12563 // Generate B - A using SUBC to capture carry.
12564 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12565 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12566 SDValue CA0 = SubC.getValue(1);
12567
12568 // t2 = A - B + CA0 using SUBE.
12569 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12570 SDValue CA1 = SubE1.getValue(1);
12571
12572 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12573 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12574
12575 // Extract the first result and truncate to result type if needed
12576 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12577}
12578
12579/// LowerOperation - Provide custom lowering hooks for some operations.
12580///
12582 switch (Op.getOpcode()) {
12583 default:
12584 llvm_unreachable("Wasn't expecting to be able to lower this!");
12585 case ISD::FPOW: return lowerPow(Op, DAG);
12586 case ISD::FSIN: return lowerSin(Op, DAG);
12587 case ISD::FCOS: return lowerCos(Op, DAG);
12588 case ISD::FLOG: return lowerLog(Op, DAG);
12589 case ISD::FLOG10: return lowerLog10(Op, DAG);
12590 case ISD::FEXP: return lowerExp(Op, DAG);
12591 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12592 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12593 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12594 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12595 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12596 case ISD::STRICT_FSETCC:
12598 case ISD::SETCC: return LowerSETCC(Op, DAG);
12599 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12600 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12601 case ISD::SSUBO:
12602 return LowerSSUBO(Op, DAG);
12603 case ISD::SADDO:
12604 return LowerSADDO(Op, DAG);
12605
12606 case ISD::INLINEASM:
12607 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12608 // Variable argument lowering.
12609 case ISD::VASTART: return LowerVASTART(Op, DAG);
12610 case ISD::VAARG: return LowerVAARG(Op, DAG);
12611 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12612
12613 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12614 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12615 case ISD::GET_DYNAMIC_AREA_OFFSET:
12616 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12617
12618 // Exception handling lowering.
12619 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12620 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12621 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12622
12623 case ISD::LOAD: return LowerLOAD(Op, DAG);
12624 case ISD::STORE: return LowerSTORE(Op, DAG);
12625 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12626 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12629 case ISD::FP_TO_UINT:
12630 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12633 case ISD::UINT_TO_FP:
12634 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12635 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12636 case ISD::SET_ROUNDING:
12637 return LowerSET_ROUNDING(Op, DAG);
12638
12639 // Lower 64-bit shifts.
12640 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12641 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12642 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12643
12644 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12645 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12646
12647 // Vector-related lowering.
12648 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12649 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12650 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12651 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12652 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12653 case ISD::MUL: return LowerMUL(Op, DAG);
12654 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12656 case ISD::FP_ROUND:
12657 return LowerFP_ROUND(Op, DAG);
12658 case ISD::ROTL: return LowerROTL(Op, DAG);
12659
12660 // For counter-based loop handling.
12662 return SDValue();
12663
12664 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12665
12666 // Frame & Return address.
12667 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12668 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12669
12671 return LowerINTRINSIC_VOID(Op, DAG);
12672 case ISD::BSWAP:
12673 return LowerBSWAP(Op, DAG);
12674 case ISD::ATOMIC_CMP_SWAP:
12675 return LowerATOMIC_CMP_SWAP(Op, DAG);
12676 case ISD::ATOMIC_STORE:
12677 return LowerATOMIC_LOAD_STORE(Op, DAG);
12678 case ISD::IS_FPCLASS:
12679 return LowerIS_FPCLASS(Op, DAG);
12680 case ISD::UADDO:
12681 case ISD::USUBO:
12682 return LowerADDSUBO(Op, DAG);
12683 case ISD::UADDO_CARRY:
12684 case ISD::USUBO_CARRY:
12685 return LowerADDSUBO_CARRY(Op, DAG);
12686 case ISD::UCMP:
12687 return LowerUCMP(Op, DAG);
12688 case ISD::STRICT_LRINT:
12689 case ISD::STRICT_LLRINT:
12690 case ISD::STRICT_LROUND:
12693 if (Op->getFlags().hasNoFPExcept())
12694 return Op;
12695 return SDValue();
12696 case ISD::VP_LOAD:
12697 return LowerVP_LOAD(Op, DAG);
12698 case ISD::VP_STORE:
12699 return LowerVP_STORE(Op, DAG);
12700 }
12701}
12702
12705 SelectionDAG &DAG) const {
12706 SDLoc dl(N);
12707 switch (N->getOpcode()) {
12708 default:
12709 llvm_unreachable("Do not know how to custom type legalize this operation!");
12710 case ISD::ATOMIC_LOAD: {
12711 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12712 Results.push_back(Res);
12713 Results.push_back(Res.getValue(1));
12714 break;
12715 }
12716 case ISD::READCYCLECOUNTER: {
12717 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12718 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12719
12720 Results.push_back(
12721 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12722 Results.push_back(RTB.getValue(2));
12723 break;
12724 }
12726 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12727 break;
12728
12729 assert(N->getValueType(0) == MVT::i1 &&
12730 "Unexpected result type for CTR decrement intrinsic");
12731 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12732 N->getValueType(0));
12733 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12734 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12735 N->getOperand(1));
12736
12737 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12738 Results.push_back(NewInt.getValue(1));
12739 break;
12740 }
12742 switch (N->getConstantOperandVal(0)) {
12743 case Intrinsic::ppc_pack_longdouble:
12744 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12745 N->getOperand(2), N->getOperand(1)));
12746 break;
12747 case Intrinsic::ppc_maxfe:
12748 case Intrinsic::ppc_minfe:
12749 case Intrinsic::ppc_fnmsub:
12750 case Intrinsic::ppc_convert_f128_to_ppcf128:
12751 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12752 break;
12753 }
12754 break;
12755 }
12756 case ISD::VAARG: {
12757 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12758 return;
12759
12760 EVT VT = N->getValueType(0);
12761
12762 if (VT == MVT::i64) {
12763 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12764
12765 Results.push_back(NewNode);
12766 Results.push_back(NewNode.getValue(1));
12767 }
12768 return;
12769 }
12772 case ISD::FP_TO_SINT:
12773 case ISD::FP_TO_UINT: {
12774 // LowerFP_TO_INT() can only handle f32 and f64.
12775 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12776 MVT::ppcf128)
12777 return;
12778 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12779 Results.push_back(LoweredValue);
12780 if (N->isStrictFPOpcode())
12781 Results.push_back(LoweredValue.getValue(1));
12782 return;
12783 }
12784 case ISD::TRUNCATE: {
12785 if (!N->getValueType(0).isVector())
12786 return;
12787 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12788 if (Lowered)
12789 Results.push_back(Lowered);
12790 return;
12791 }
12792 case ISD::SCALAR_TO_VECTOR: {
12793 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12794 if (Lowered)
12795 Results.push_back(Lowered);
12796 return;
12797 }
12798 case ISD::FSHL:
12799 case ISD::FSHR:
12800 // Don't handle funnel shifts here.
12801 return;
12802 case ISD::BITCAST:
12803 // Don't handle bitcast here.
12804 return;
12805 case ISD::FP_EXTEND:
12806 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12807 if (Lowered)
12808 Results.push_back(Lowered);
12809 return;
12810 }
12811}
12812
12813//===----------------------------------------------------------------------===//
12814// Other Lowering Code
12815//===----------------------------------------------------------------------===//
12816
12818 return Builder.CreateIntrinsic(Id, {});
12819}
12820
12822 Value *Addr,
12823 AtomicOrdering Ord) const {
12824 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12825
12826 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12827 "Only 8/16/32/64-bit atomic loads supported");
12828 Intrinsic::ID IntID;
12829 switch (SZ) {
12830 default:
12831 llvm_unreachable("Unexpected PrimitiveSize");
12832 case 8:
12833 IntID = Intrinsic::ppc_lbarx;
12834 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12835 break;
12836 case 16:
12837 IntID = Intrinsic::ppc_lharx;
12838 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12839 break;
12840 case 32:
12841 IntID = Intrinsic::ppc_lwarx;
12842 break;
12843 case 64:
12844 IntID = Intrinsic::ppc_ldarx;
12845 break;
12846 }
12847 Value *Call =
12848 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12849
12850 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12851}
12852
12853// Perform a store-conditional operation to Addr. Return the status of the
12854// store. This should be 0 if the store succeeded, non-zero otherwise.
12856 Value *Val, Value *Addr,
12857 AtomicOrdering Ord) const {
12858 Type *Ty = Val->getType();
12859 unsigned SZ = Ty->getPrimitiveSizeInBits();
12860
12861 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12862 "Only 8/16/32/64-bit atomic loads supported");
12863 Intrinsic::ID IntID;
12864 switch (SZ) {
12865 default:
12866 llvm_unreachable("Unexpected PrimitiveSize");
12867 case 8:
12868 IntID = Intrinsic::ppc_stbcx;
12869 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12870 break;
12871 case 16:
12872 IntID = Intrinsic::ppc_sthcx;
12873 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12874 break;
12875 case 32:
12876 IntID = Intrinsic::ppc_stwcx;
12877 break;
12878 case 64:
12879 IntID = Intrinsic::ppc_stdcx;
12880 break;
12881 }
12882
12883 if (SZ == 8 || SZ == 16)
12884 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12885
12886 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12887 /*FMFSource=*/nullptr, "stcx");
12888 return Builder.CreateXor(Call, Builder.getInt32(1));
12889}
12890
12891// The mappings for emitLeading/TrailingFence is taken from
12892// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12894 Instruction *Inst,
12895 AtomicOrdering Ord) const {
12897 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12898 if (isReleaseOrStronger(Ord))
12899 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12900 return nullptr;
12901}
12902
12904 Instruction *Inst,
12905 AtomicOrdering Ord) const {
12906 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12907 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12908 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12909 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12910 if (isa<LoadInst>(Inst))
12911 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12912 {Inst});
12913 // FIXME: Can use isync for rmw operation.
12914 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12915 }
12916 return nullptr;
12917}
12918
12921 unsigned AtomicSize,
12922 unsigned BinOpcode,
12923 unsigned CmpOpcode,
12924 unsigned CmpPred) const {
12925 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12926 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12927
12928 auto LoadMnemonic = PPC::LDARX;
12929 auto StoreMnemonic = PPC::STDCX;
12930 switch (AtomicSize) {
12931 default:
12932 llvm_unreachable("Unexpected size of atomic entity");
12933 case 1:
12934 LoadMnemonic = PPC::LBARX;
12935 StoreMnemonic = PPC::STBCX;
12936 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12937 break;
12938 case 2:
12939 LoadMnemonic = PPC::LHARX;
12940 StoreMnemonic = PPC::STHCX;
12941 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12942 break;
12943 case 4:
12944 LoadMnemonic = PPC::LWARX;
12945 StoreMnemonic = PPC::STWCX;
12946 break;
12947 case 8:
12948 LoadMnemonic = PPC::LDARX;
12949 StoreMnemonic = PPC::STDCX;
12950 break;
12951 }
12952
12953 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12954 MachineFunction *F = BB->getParent();
12956
12957 Register dest = MI.getOperand(0).getReg();
12958 Register ptrA = MI.getOperand(1).getReg();
12959 Register ptrB = MI.getOperand(2).getReg();
12960 Register incr = MI.getOperand(3).getReg();
12961 DebugLoc dl = MI.getDebugLoc();
12962
12963 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12964 MachineBasicBlock *loop2MBB =
12965 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12966 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12967 F->insert(It, loopMBB);
12968 if (CmpOpcode)
12969 F->insert(It, loop2MBB);
12970 F->insert(It, exitMBB);
12971 exitMBB->splice(exitMBB->begin(), BB,
12972 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12974
12975 MachineRegisterInfo &RegInfo = F->getRegInfo();
12976 Register TmpReg = (!BinOpcode) ? incr :
12977 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12978 : &PPC::GPRCRegClass);
12979
12980 // thisMBB:
12981 // ...
12982 // fallthrough --> loopMBB
12983 BB->addSuccessor(loopMBB);
12984
12985 // loopMBB:
12986 // l[wd]arx dest, ptr
12987 // add r0, dest, incr
12988 // st[wd]cx. r0, ptr
12989 // bne- loopMBB
12990 // fallthrough --> exitMBB
12991
12992 // For max/min...
12993 // loopMBB:
12994 // l[wd]arx dest, ptr
12995 // cmpl?[wd] dest, incr
12996 // bgt exitMBB
12997 // loop2MBB:
12998 // st[wd]cx. dest, ptr
12999 // bne- loopMBB
13000 // fallthrough --> exitMBB
13001
13002 BB = loopMBB;
13003 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13004 .addReg(ptrA).addReg(ptrB);
13005 if (BinOpcode)
13006 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13007 if (CmpOpcode) {
13008 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13009 // Signed comparisons of byte or halfword values must be sign-extended.
13010 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13011 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13012 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13013 ExtReg).addReg(dest);
13014 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13015 } else
13016 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13017
13018 BuildMI(BB, dl, TII->get(PPC::BCC))
13019 .addImm(CmpPred)
13020 .addReg(CrReg)
13021 .addMBB(exitMBB);
13022 BB->addSuccessor(loop2MBB);
13023 BB->addSuccessor(exitMBB);
13024 BB = loop2MBB;
13025 }
13026 BuildMI(BB, dl, TII->get(StoreMnemonic))
13027 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13028 BuildMI(BB, dl, TII->get(PPC::BCC))
13030 .addReg(PPC::CR0)
13031 .addMBB(loopMBB);
13032 BB->addSuccessor(loopMBB);
13033 BB->addSuccessor(exitMBB);
13034
13035 // exitMBB:
13036 // ...
13037 BB = exitMBB;
13038 return BB;
13039}
13040
13042 switch(MI.getOpcode()) {
13043 default:
13044 return false;
13045 case PPC::COPY:
13046 return TII->isSignExtended(MI.getOperand(1).getReg(),
13047 &MI.getMF()->getRegInfo());
13048 case PPC::LHA:
13049 case PPC::LHA8:
13050 case PPC::LHAU:
13051 case PPC::LHAU8:
13052 case PPC::LHAUX:
13053 case PPC::LHAUX8:
13054 case PPC::LHAX:
13055 case PPC::LHAX8:
13056 case PPC::LWA:
13057 case PPC::LWAUX:
13058 case PPC::LWAX:
13059 case PPC::LWAX_32:
13060 case PPC::LWA_32:
13061 case PPC::PLHA:
13062 case PPC::PLHA8:
13063 case PPC::PLHA8pc:
13064 case PPC::PLHApc:
13065 case PPC::PLWA:
13066 case PPC::PLWA8:
13067 case PPC::PLWA8pc:
13068 case PPC::PLWApc:
13069 case PPC::EXTSB:
13070 case PPC::EXTSB8:
13071 case PPC::EXTSB8_32_64:
13072 case PPC::EXTSB8_rec:
13073 case PPC::EXTSB_rec:
13074 case PPC::EXTSH:
13075 case PPC::EXTSH8:
13076 case PPC::EXTSH8_32_64:
13077 case PPC::EXTSH8_rec:
13078 case PPC::EXTSH_rec:
13079 case PPC::EXTSW:
13080 case PPC::EXTSWSLI:
13081 case PPC::EXTSWSLI_32_64:
13082 case PPC::EXTSWSLI_32_64_rec:
13083 case PPC::EXTSWSLI_rec:
13084 case PPC::EXTSW_32:
13085 case PPC::EXTSW_32_64:
13086 case PPC::EXTSW_32_64_rec:
13087 case PPC::EXTSW_rec:
13088 case PPC::SRAW:
13089 case PPC::SRAWI:
13090 case PPC::SRAWI_rec:
13091 case PPC::SRAW_rec:
13092 return true;
13093 }
13094 return false;
13095}
13096
13099 bool is8bit, // operation
13100 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13101 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13102 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13103
13104 // If this is a signed comparison and the value being compared is not known
13105 // to be sign extended, sign extend it here.
13106 DebugLoc dl = MI.getDebugLoc();
13107 MachineFunction *F = BB->getParent();
13108 MachineRegisterInfo &RegInfo = F->getRegInfo();
13109 Register incr = MI.getOperand(3).getReg();
13110 bool IsSignExtended =
13111 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13112
13113 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13114 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13115 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13116 .addReg(MI.getOperand(3).getReg());
13117 MI.getOperand(3).setReg(ValueReg);
13118 incr = ValueReg;
13119 }
13120 // If we support part-word atomic mnemonics, just use them
13121 if (Subtarget.hasPartwordAtomics())
13122 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13123 CmpPred);
13124
13125 // In 64 bit mode we have to use 64 bits for addresses, even though the
13126 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13127 // registers without caring whether they're 32 or 64, but here we're
13128 // doing actual arithmetic on the addresses.
13129 bool is64bit = Subtarget.isPPC64();
13130 bool isLittleEndian = Subtarget.isLittleEndian();
13131 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13132
13133 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13135
13136 Register dest = MI.getOperand(0).getReg();
13137 Register ptrA = MI.getOperand(1).getReg();
13138 Register ptrB = MI.getOperand(2).getReg();
13139
13140 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13141 MachineBasicBlock *loop2MBB =
13142 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13143 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13144 F->insert(It, loopMBB);
13145 if (CmpOpcode)
13146 F->insert(It, loop2MBB);
13147 F->insert(It, exitMBB);
13148 exitMBB->splice(exitMBB->begin(), BB,
13149 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13151
13152 const TargetRegisterClass *RC =
13153 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13154 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13155
13156 Register PtrReg = RegInfo.createVirtualRegister(RC);
13157 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13158 Register ShiftReg =
13159 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13160 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13161 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13162 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13163 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13164 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13165 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13166 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13167 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13168 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13169 Register Ptr1Reg;
13170 Register TmpReg =
13171 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13172
13173 // thisMBB:
13174 // ...
13175 // fallthrough --> loopMBB
13176 BB->addSuccessor(loopMBB);
13177
13178 // The 4-byte load must be aligned, while a char or short may be
13179 // anywhere in the word. Hence all this nasty bookkeeping code.
13180 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13181 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13182 // xori shift, shift1, 24 [16]
13183 // rlwinm ptr, ptr1, 0, 0, 29
13184 // slw incr2, incr, shift
13185 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13186 // slw mask, mask2, shift
13187 // loopMBB:
13188 // lwarx tmpDest, ptr
13189 // add tmp, tmpDest, incr2
13190 // andc tmp2, tmpDest, mask
13191 // and tmp3, tmp, mask
13192 // or tmp4, tmp3, tmp2
13193 // stwcx. tmp4, ptr
13194 // bne- loopMBB
13195 // fallthrough --> exitMBB
13196 // srw SrwDest, tmpDest, shift
13197 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13198 if (ptrA != ZeroReg) {
13199 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13200 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13201 .addReg(ptrA)
13202 .addReg(ptrB);
13203 } else {
13204 Ptr1Reg = ptrB;
13205 }
13206 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13207 // mode.
13208 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13209 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13210 .addImm(3)
13211 .addImm(27)
13212 .addImm(is8bit ? 28 : 27);
13213 if (!isLittleEndian)
13214 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13215 .addReg(Shift1Reg)
13216 .addImm(is8bit ? 24 : 16);
13217 if (is64bit)
13218 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13219 .addReg(Ptr1Reg)
13220 .addImm(0)
13221 .addImm(61);
13222 else
13223 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13224 .addReg(Ptr1Reg)
13225 .addImm(0)
13226 .addImm(0)
13227 .addImm(29);
13228 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13229 if (is8bit)
13230 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13231 else {
13232 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13233 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13234 .addReg(Mask3Reg)
13235 .addImm(65535);
13236 }
13237 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13238 .addReg(Mask2Reg)
13239 .addReg(ShiftReg);
13240
13241 BB = loopMBB;
13242 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13243 .addReg(ZeroReg)
13244 .addReg(PtrReg);
13245 if (BinOpcode)
13246 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13247 .addReg(Incr2Reg)
13248 .addReg(TmpDestReg);
13249 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13250 .addReg(TmpDestReg)
13251 .addReg(MaskReg);
13252 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13253 if (CmpOpcode) {
13254 // For unsigned comparisons, we can directly compare the shifted values.
13255 // For signed comparisons we shift and sign extend.
13256 Register SReg = RegInfo.createVirtualRegister(GPRC);
13257 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13258 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13259 .addReg(TmpDestReg)
13260 .addReg(MaskReg);
13261 unsigned ValueReg = SReg;
13262 unsigned CmpReg = Incr2Reg;
13263 if (CmpOpcode == PPC::CMPW) {
13264 ValueReg = RegInfo.createVirtualRegister(GPRC);
13265 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13266 .addReg(SReg)
13267 .addReg(ShiftReg);
13268 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13269 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13270 .addReg(ValueReg);
13271 ValueReg = ValueSReg;
13272 CmpReg = incr;
13273 }
13274 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13275 BuildMI(BB, dl, TII->get(PPC::BCC))
13276 .addImm(CmpPred)
13277 .addReg(CrReg)
13278 .addMBB(exitMBB);
13279 BB->addSuccessor(loop2MBB);
13280 BB->addSuccessor(exitMBB);
13281 BB = loop2MBB;
13282 }
13283 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13284 BuildMI(BB, dl, TII->get(PPC::STWCX))
13285 .addReg(Tmp4Reg)
13286 .addReg(ZeroReg)
13287 .addReg(PtrReg);
13288 BuildMI(BB, dl, TII->get(PPC::BCC))
13290 .addReg(PPC::CR0)
13291 .addMBB(loopMBB);
13292 BB->addSuccessor(loopMBB);
13293 BB->addSuccessor(exitMBB);
13294
13295 // exitMBB:
13296 // ...
13297 BB = exitMBB;
13298 // Since the shift amount is not a constant, we need to clear
13299 // the upper bits with a separate RLWINM.
13300 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13301 .addReg(SrwDestReg)
13302 .addImm(0)
13303 .addImm(is8bit ? 24 : 16)
13304 .addImm(31);
13305 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13306 .addReg(TmpDestReg)
13307 .addReg(ShiftReg);
13308 return BB;
13309}
13310
13313 MachineBasicBlock *MBB) const {
13314 DebugLoc DL = MI.getDebugLoc();
13315 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13316 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13317
13318 MachineFunction *MF = MBB->getParent();
13320
13321 const BasicBlock *BB = MBB->getBasicBlock();
13322 MachineFunction::iterator I = ++MBB->getIterator();
13323
13324 Register DstReg = MI.getOperand(0).getReg();
13325 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13326 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13327 Register mainDstReg = MRI.createVirtualRegister(RC);
13328 Register restoreDstReg = MRI.createVirtualRegister(RC);
13329
13330 MVT PVT = getPointerTy(MF->getDataLayout());
13331 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13332 "Invalid Pointer Size!");
13333 // For v = setjmp(buf), we generate
13334 //
13335 // thisMBB:
13336 // SjLjSetup mainMBB
13337 // bl mainMBB
13338 // v_restore = 1
13339 // b sinkMBB
13340 //
13341 // mainMBB:
13342 // buf[LabelOffset] = LR
13343 // v_main = 0
13344 //
13345 // sinkMBB:
13346 // v = phi(main, restore)
13347 //
13348
13349 MachineBasicBlock *thisMBB = MBB;
13350 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13351 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13352 MF->insert(I, mainMBB);
13353 MF->insert(I, sinkMBB);
13354
13356
13357 // Transfer the remainder of BB and its successor edges to sinkMBB.
13358 sinkMBB->splice(sinkMBB->begin(), MBB,
13359 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13361
13362 // Note that the structure of the jmp_buf used here is not compatible
13363 // with that used by libc, and is not designed to be. Specifically, it
13364 // stores only those 'reserved' registers that LLVM does not otherwise
13365 // understand how to spill. Also, by convention, by the time this
13366 // intrinsic is called, Clang has already stored the frame address in the
13367 // first slot of the buffer and stack address in the third. Following the
13368 // X86 target code, we'll store the jump address in the second slot. We also
13369 // need to save the TOC pointer (R2) to handle jumps between shared
13370 // libraries, and that will be stored in the fourth slot. The thread
13371 // identifier (R13) is not affected.
13372
13373 // thisMBB:
13374 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13375 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13376 const int64_t BPOffset = 4 * PVT.getStoreSize();
13377
13378 // Prepare IP either in reg.
13379 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13380 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13381 Register BufReg = MI.getOperand(1).getReg();
13382
13383 if (Subtarget.is64BitELFABI()) {
13384 setUsesTOCBasePtr(*MBB->getParent());
13385 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13386 .addReg(PPC::X2)
13387 .addImm(TOCOffset)
13388 .addReg(BufReg)
13389 .cloneMemRefs(MI);
13390 }
13391
13392 // Naked functions never have a base pointer, and so we use r1. For all
13393 // other functions, this decision must be delayed until during PEI.
13394 unsigned BaseReg;
13395 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13396 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13397 else
13398 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13399
13400 MIB = BuildMI(*thisMBB, MI, DL,
13401 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13402 .addReg(BaseReg)
13403 .addImm(BPOffset)
13404 .addReg(BufReg)
13405 .cloneMemRefs(MI);
13406
13407 // Setup
13408 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13409 MIB.addRegMask(TRI->getNoPreservedMask());
13410
13411 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13412
13413 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13414 .addMBB(mainMBB);
13415 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13416
13417 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13418 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13419
13420 // mainMBB:
13421 // mainDstReg = 0
13422 MIB =
13423 BuildMI(mainMBB, DL,
13424 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13425
13426 // Store IP
13427 if (Subtarget.isPPC64()) {
13428 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13429 .addReg(LabelReg)
13430 .addImm(LabelOffset)
13431 .addReg(BufReg);
13432 } else {
13433 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13434 .addReg(LabelReg)
13435 .addImm(LabelOffset)
13436 .addReg(BufReg);
13437 }
13438 MIB.cloneMemRefs(MI);
13439
13440 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13441 mainMBB->addSuccessor(sinkMBB);
13442
13443 // sinkMBB:
13444 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13445 TII->get(PPC::PHI), DstReg)
13446 .addReg(mainDstReg).addMBB(mainMBB)
13447 .addReg(restoreDstReg).addMBB(thisMBB);
13448
13449 MI.eraseFromParent();
13450 return sinkMBB;
13451}
13452
13455 MachineBasicBlock *MBB) const {
13456 DebugLoc DL = MI.getDebugLoc();
13457 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13458
13459 MachineFunction *MF = MBB->getParent();
13461
13462 MVT PVT = getPointerTy(MF->getDataLayout());
13463 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13464 "Invalid Pointer Size!");
13465
13466 const TargetRegisterClass *RC =
13467 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13468 Register Tmp = MRI.createVirtualRegister(RC);
13469 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13470 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13471 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13472 unsigned BP =
13473 (PVT == MVT::i64)
13474 ? PPC::X30
13475 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13476 : PPC::R30);
13477
13479
13480 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13481 const int64_t SPOffset = 2 * PVT.getStoreSize();
13482 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13483 const int64_t BPOffset = 4 * PVT.getStoreSize();
13484
13485 Register BufReg = MI.getOperand(0).getReg();
13486
13487 // Reload FP (the jumped-to function may not have had a
13488 // frame pointer, and if so, then its r31 will be restored
13489 // as necessary).
13490 if (PVT == MVT::i64) {
13491 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13492 .addImm(0)
13493 .addReg(BufReg);
13494 } else {
13495 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13496 .addImm(0)
13497 .addReg(BufReg);
13498 }
13499 MIB.cloneMemRefs(MI);
13500
13501 // Reload IP
13502 if (PVT == MVT::i64) {
13503 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13504 .addImm(LabelOffset)
13505 .addReg(BufReg);
13506 } else {
13507 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13508 .addImm(LabelOffset)
13509 .addReg(BufReg);
13510 }
13511 MIB.cloneMemRefs(MI);
13512
13513 // Reload SP
13514 if (PVT == MVT::i64) {
13515 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13516 .addImm(SPOffset)
13517 .addReg(BufReg);
13518 } else {
13519 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13520 .addImm(SPOffset)
13521 .addReg(BufReg);
13522 }
13523 MIB.cloneMemRefs(MI);
13524
13525 // Reload BP
13526 if (PVT == MVT::i64) {
13527 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13528 .addImm(BPOffset)
13529 .addReg(BufReg);
13530 } else {
13531 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13532 .addImm(BPOffset)
13533 .addReg(BufReg);
13534 }
13535 MIB.cloneMemRefs(MI);
13536
13537 // Reload TOC
13538 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13539 setUsesTOCBasePtr(*MBB->getParent());
13540 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13541 .addImm(TOCOffset)
13542 .addReg(BufReg)
13543 .cloneMemRefs(MI);
13544 }
13545
13546 // Jump
13547 BuildMI(*MBB, MI, DL,
13548 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13549 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13550
13551 MI.eraseFromParent();
13552 return MBB;
13553}
13554
13556 // If the function specifically requests inline stack probes, emit them.
13557 if (MF.getFunction().hasFnAttribute("probe-stack"))
13558 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13559 "inline-asm";
13560 return false;
13561}
13562
13564 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13565 unsigned StackAlign = TFI->getStackAlignment();
13566 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13567 "Unexpected stack alignment");
13568 // The default stack probe size is 4096 if the function has no
13569 // stack-probe-size attribute.
13570 const Function &Fn = MF.getFunction();
13571 unsigned StackProbeSize =
13572 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13573 // Round down to the stack alignment.
13574 StackProbeSize &= ~(StackAlign - 1);
13575 return StackProbeSize ? StackProbeSize : StackAlign;
13576}
13577
13578// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13579// into three phases. In the first phase, it uses pseudo instruction
13580// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13581// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13582// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13583// MaxCallFrameSize so that it can calculate correct data area pointer.
13586 MachineBasicBlock *MBB) const {
13587 const bool isPPC64 = Subtarget.isPPC64();
13588 MachineFunction *MF = MBB->getParent();
13589 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13590 DebugLoc DL = MI.getDebugLoc();
13591 const unsigned ProbeSize = getStackProbeSize(*MF);
13592 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13594 // The CFG of probing stack looks as
13595 // +-----+
13596 // | MBB |
13597 // +--+--+
13598 // |
13599 // +----v----+
13600 // +--->+ TestMBB +---+
13601 // | +----+----+ |
13602 // | | |
13603 // | +-----v----+ |
13604 // +---+ BlockMBB | |
13605 // +----------+ |
13606 // |
13607 // +---------+ |
13608 // | TailMBB +<--+
13609 // +---------+
13610 // In MBB, calculate previous frame pointer and final stack pointer.
13611 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13612 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13613 // TailMBB is spliced via \p MI.
13614 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13615 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13616 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13617
13618 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13619 MF->insert(MBBIter, TestMBB);
13620 MF->insert(MBBIter, BlockMBB);
13621 MF->insert(MBBIter, TailMBB);
13622
13623 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13624 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13625
13626 Register DstReg = MI.getOperand(0).getReg();
13627 Register NegSizeReg = MI.getOperand(1).getReg();
13628 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13629 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13630 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13631 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13632
13633 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13634 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13635 // NegSize.
13636 unsigned ProbeOpc;
13637 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13638 ProbeOpc =
13639 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13640 else
13641 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13642 // and NegSizeReg will be allocated in the same phyreg to avoid
13643 // redundant copy when NegSizeReg has only one use which is current MI and
13644 // will be replaced by PREPARE_PROBED_ALLOCA then.
13645 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13646 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13647 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13648 .addDef(ActualNegSizeReg)
13649 .addReg(NegSizeReg)
13650 .add(MI.getOperand(2))
13651 .add(MI.getOperand(3));
13652
13653 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13654 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13655 FinalStackPtr)
13656 .addReg(SPReg)
13657 .addReg(ActualNegSizeReg);
13658
13659 // Materialize a scratch register for update.
13660 int64_t NegProbeSize = -(int64_t)ProbeSize;
13661 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13662 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13663 if (!isInt<16>(NegProbeSize)) {
13664 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13665 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13666 .addImm(NegProbeSize >> 16);
13667 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13668 ScratchReg)
13669 .addReg(TempReg)
13670 .addImm(NegProbeSize & 0xFFFF);
13671 } else
13672 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13673 .addImm(NegProbeSize);
13674
13675 {
13676 // Probing leading residual part.
13677 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13678 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13679 .addReg(ActualNegSizeReg)
13680 .addReg(ScratchReg);
13681 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13682 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13683 .addReg(Div)
13684 .addReg(ScratchReg);
13685 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13686 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13687 .addReg(Mul)
13688 .addReg(ActualNegSizeReg);
13689 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13690 .addReg(FramePointer)
13691 .addReg(SPReg)
13692 .addReg(NegMod);
13693 }
13694
13695 {
13696 // Remaining part should be multiple of ProbeSize.
13697 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13698 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13699 .addReg(SPReg)
13700 .addReg(FinalStackPtr);
13701 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13703 .addReg(CmpResult)
13704 .addMBB(TailMBB);
13705 TestMBB->addSuccessor(BlockMBB);
13706 TestMBB->addSuccessor(TailMBB);
13707 }
13708
13709 {
13710 // Touch the block.
13711 // |P...|P...|P...
13712 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13713 .addReg(FramePointer)
13714 .addReg(SPReg)
13715 .addReg(ScratchReg);
13716 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13717 BlockMBB->addSuccessor(TestMBB);
13718 }
13719
13720 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13721 // DYNAREAOFFSET pseudo instruction to get the future result.
13722 Register MaxCallFrameSizeReg =
13723 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13724 BuildMI(TailMBB, DL,
13725 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13726 MaxCallFrameSizeReg)
13727 .add(MI.getOperand(2))
13728 .add(MI.getOperand(3));
13729 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13730 .addReg(SPReg)
13731 .addReg(MaxCallFrameSizeReg);
13732
13733 // Splice instructions after MI to TailMBB.
13734 TailMBB->splice(TailMBB->end(), MBB,
13735 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13737 MBB->addSuccessor(TestMBB);
13738
13739 // Delete the pseudo instruction.
13740 MI.eraseFromParent();
13741
13742 ++NumDynamicAllocaProbed;
13743 return TailMBB;
13744}
13745
13747 switch (MI.getOpcode()) {
13748 case PPC::SELECT_CC_I4:
13749 case PPC::SELECT_CC_I8:
13750 case PPC::SELECT_CC_F4:
13751 case PPC::SELECT_CC_F8:
13752 case PPC::SELECT_CC_F16:
13753 case PPC::SELECT_CC_VRRC:
13754 case PPC::SELECT_CC_VSFRC:
13755 case PPC::SELECT_CC_VSSRC:
13756 case PPC::SELECT_CC_VSRC:
13757 case PPC::SELECT_CC_SPE4:
13758 case PPC::SELECT_CC_SPE:
13759 return true;
13760 default:
13761 return false;
13762 }
13763}
13764
13765static bool IsSelect(MachineInstr &MI) {
13766 switch (MI.getOpcode()) {
13767 case PPC::SELECT_I4:
13768 case PPC::SELECT_I8:
13769 case PPC::SELECT_F4:
13770 case PPC::SELECT_F8:
13771 case PPC::SELECT_F16:
13772 case PPC::SELECT_SPE:
13773 case PPC::SELECT_SPE4:
13774 case PPC::SELECT_VRRC:
13775 case PPC::SELECT_VSFRC:
13776 case PPC::SELECT_VSSRC:
13777 case PPC::SELECT_VSRC:
13778 return true;
13779 default:
13780 return false;
13781 }
13782}
13783
13786 MachineBasicBlock *BB) const {
13787 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13788 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13789 if (Subtarget.is64BitELFABI() &&
13790 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13791 !Subtarget.isUsingPCRelativeCalls()) {
13792 // Call lowering should have added an r2 operand to indicate a dependence
13793 // on the TOC base pointer value. It can't however, because there is no
13794 // way to mark the dependence as implicit there, and so the stackmap code
13795 // will confuse it with a regular operand. Instead, add the dependence
13796 // here.
13797 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13798 }
13799
13800 return emitPatchPoint(MI, BB);
13801 }
13802
13803 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13804 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13805 return emitEHSjLjSetJmp(MI, BB);
13806 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13807 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13808 return emitEHSjLjLongJmp(MI, BB);
13809 }
13810
13811 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13812
13813 // To "insert" these instructions we actually have to insert their
13814 // control-flow patterns.
13815 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13817
13818 MachineFunction *F = BB->getParent();
13819 MachineRegisterInfo &MRI = F->getRegInfo();
13820
13821 if (Subtarget.hasISEL() &&
13822 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13823 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13824 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13826 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13827 MI.getOpcode() == PPC::SELECT_CC_I8)
13828 Cond.push_back(MI.getOperand(4));
13829 else
13831 Cond.push_back(MI.getOperand(1));
13832
13833 DebugLoc dl = MI.getDebugLoc();
13834 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13835 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13836 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13837 // The incoming instruction knows the destination vreg to set, the
13838 // condition code register to branch on, the true/false values to
13839 // select between, and a branch opcode to use.
13840
13841 // thisMBB:
13842 // ...
13843 // TrueVal = ...
13844 // cmpTY ccX, r1, r2
13845 // bCC sinkMBB
13846 // fallthrough --> copy0MBB
13847 MachineBasicBlock *thisMBB = BB;
13848 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13849 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13850 DebugLoc dl = MI.getDebugLoc();
13851 F->insert(It, copy0MBB);
13852 F->insert(It, sinkMBB);
13853
13854 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13855 copy0MBB->addLiveIn(PPC::CARRY);
13856 sinkMBB->addLiveIn(PPC::CARRY);
13857 }
13858
13859 // Set the call frame size on entry to the new basic blocks.
13860 // See https://reviews.llvm.org/D156113.
13861 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13862 copy0MBB->setCallFrameSize(CallFrameSize);
13863 sinkMBB->setCallFrameSize(CallFrameSize);
13864
13865 // Transfer the remainder of BB and its successor edges to sinkMBB.
13866 sinkMBB->splice(sinkMBB->begin(), BB,
13867 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13869
13870 // Next, add the true and fallthrough blocks as its successors.
13871 BB->addSuccessor(copy0MBB);
13872 BB->addSuccessor(sinkMBB);
13873
13874 if (IsSelect(MI)) {
13875 BuildMI(BB, dl, TII->get(PPC::BC))
13876 .addReg(MI.getOperand(1).getReg())
13877 .addMBB(sinkMBB);
13878 } else {
13879 unsigned SelectPred = MI.getOperand(4).getImm();
13880 BuildMI(BB, dl, TII->get(PPC::BCC))
13881 .addImm(SelectPred)
13882 .addReg(MI.getOperand(1).getReg())
13883 .addMBB(sinkMBB);
13884 }
13885
13886 // copy0MBB:
13887 // %FalseValue = ...
13888 // # fallthrough to sinkMBB
13889 BB = copy0MBB;
13890
13891 // Update machine-CFG edges
13892 BB->addSuccessor(sinkMBB);
13893
13894 // sinkMBB:
13895 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13896 // ...
13897 BB = sinkMBB;
13898 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13899 .addReg(MI.getOperand(3).getReg())
13900 .addMBB(copy0MBB)
13901 .addReg(MI.getOperand(2).getReg())
13902 .addMBB(thisMBB);
13903 } else if (MI.getOpcode() == PPC::ReadTB) {
13904 // To read the 64-bit time-base register on a 32-bit target, we read the
13905 // two halves. Should the counter have wrapped while it was being read, we
13906 // need to try again.
13907 // ...
13908 // readLoop:
13909 // mfspr Rx,TBU # load from TBU
13910 // mfspr Ry,TB # load from TB
13911 // mfspr Rz,TBU # load from TBU
13912 // cmpw crX,Rx,Rz # check if 'old'='new'
13913 // bne readLoop # branch if they're not equal
13914 // ...
13915
13916 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13917 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13918 DebugLoc dl = MI.getDebugLoc();
13919 F->insert(It, readMBB);
13920 F->insert(It, sinkMBB);
13921
13922 // Transfer the remainder of BB and its successor edges to sinkMBB.
13923 sinkMBB->splice(sinkMBB->begin(), BB,
13924 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13926
13927 BB->addSuccessor(readMBB);
13928 BB = readMBB;
13929
13930 MachineRegisterInfo &RegInfo = F->getRegInfo();
13931 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13932 Register LoReg = MI.getOperand(0).getReg();
13933 Register HiReg = MI.getOperand(1).getReg();
13934
13935 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13936 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13937 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13938
13939 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13940
13941 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13942 .addReg(HiReg)
13943 .addReg(ReadAgainReg);
13944 BuildMI(BB, dl, TII->get(PPC::BCC))
13946 .addReg(CmpReg)
13947 .addMBB(readMBB);
13948
13949 BB->addSuccessor(readMBB);
13950 BB->addSuccessor(sinkMBB);
13951 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13952 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13953 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13954 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13955 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13956 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13957 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13958 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13959
13960 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13961 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13962 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13963 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13964 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13965 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13966 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13967 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13968
13969 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13970 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13971 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13972 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13973 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13974 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13975 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13976 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13977
13978 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13979 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13980 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13981 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13982 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13983 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13984 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13985 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13986
13987 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13988 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13989 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13990 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13991 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13992 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13993 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13994 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13995
13996 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13997 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13998 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13999 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14000 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14001 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14002 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14003 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14004
14005 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14006 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14007 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14008 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14009 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14010 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14011 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14012 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14013
14014 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14015 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14016 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14017 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14018 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14019 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14020 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14021 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14022
14023 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14024 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14025 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14026 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14027 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14028 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14029 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14030 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14031
14032 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14033 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14034 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14035 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14036 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14037 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14038 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14039 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14040
14041 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14042 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14043 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14044 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14045 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14046 BB = EmitAtomicBinary(MI, BB, 4, 0);
14047 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14048 BB = EmitAtomicBinary(MI, BB, 8, 0);
14049 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14050 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14051 (Subtarget.hasPartwordAtomics() &&
14052 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14053 (Subtarget.hasPartwordAtomics() &&
14054 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14055 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14056
14057 auto LoadMnemonic = PPC::LDARX;
14058 auto StoreMnemonic = PPC::STDCX;
14059 switch (MI.getOpcode()) {
14060 default:
14061 llvm_unreachable("Compare and swap of unknown size");
14062 case PPC::ATOMIC_CMP_SWAP_I8:
14063 LoadMnemonic = PPC::LBARX;
14064 StoreMnemonic = PPC::STBCX;
14065 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14066 break;
14067 case PPC::ATOMIC_CMP_SWAP_I16:
14068 LoadMnemonic = PPC::LHARX;
14069 StoreMnemonic = PPC::STHCX;
14070 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14071 break;
14072 case PPC::ATOMIC_CMP_SWAP_I32:
14073 LoadMnemonic = PPC::LWARX;
14074 StoreMnemonic = PPC::STWCX;
14075 break;
14076 case PPC::ATOMIC_CMP_SWAP_I64:
14077 LoadMnemonic = PPC::LDARX;
14078 StoreMnemonic = PPC::STDCX;
14079 break;
14080 }
14081 MachineRegisterInfo &RegInfo = F->getRegInfo();
14082 Register dest = MI.getOperand(0).getReg();
14083 Register ptrA = MI.getOperand(1).getReg();
14084 Register ptrB = MI.getOperand(2).getReg();
14085 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14086 Register oldval = MI.getOperand(3).getReg();
14087 Register newval = MI.getOperand(4).getReg();
14088 DebugLoc dl = MI.getDebugLoc();
14089
14090 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14091 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14092 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14093 F->insert(It, loop1MBB);
14094 F->insert(It, loop2MBB);
14095 F->insert(It, exitMBB);
14096 exitMBB->splice(exitMBB->begin(), BB,
14097 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14099
14100 // thisMBB:
14101 // ...
14102 // fallthrough --> loopMBB
14103 BB->addSuccessor(loop1MBB);
14104
14105 // loop1MBB:
14106 // l[bhwd]arx dest, ptr
14107 // cmp[wd] dest, oldval
14108 // bne- exitBB
14109 // loop2MBB:
14110 // st[bhwd]cx. newval, ptr
14111 // bne- loopMBB
14112 // b exitBB
14113 // exitBB:
14114 BB = loop1MBB;
14115 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14116 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14117 .addReg(dest)
14118 .addReg(oldval);
14119 BuildMI(BB, dl, TII->get(PPC::BCC))
14121 .addReg(CrReg)
14122 .addMBB(exitMBB);
14123 BB->addSuccessor(loop2MBB);
14124 BB->addSuccessor(exitMBB);
14125
14126 BB = loop2MBB;
14127 BuildMI(BB, dl, TII->get(StoreMnemonic))
14128 .addReg(newval)
14129 .addReg(ptrA)
14130 .addReg(ptrB);
14131 BuildMI(BB, dl, TII->get(PPC::BCC))
14133 .addReg(PPC::CR0)
14134 .addMBB(loop1MBB);
14135 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14136 BB->addSuccessor(loop1MBB);
14137 BB->addSuccessor(exitMBB);
14138
14139 // exitMBB:
14140 // ...
14141 BB = exitMBB;
14142 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14143 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14144 // We must use 64-bit registers for addresses when targeting 64-bit,
14145 // since we're actually doing arithmetic on them. Other registers
14146 // can be 32-bit.
14147 bool is64bit = Subtarget.isPPC64();
14148 bool isLittleEndian = Subtarget.isLittleEndian();
14149 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14150
14151 Register dest = MI.getOperand(0).getReg();
14152 Register ptrA = MI.getOperand(1).getReg();
14153 Register ptrB = MI.getOperand(2).getReg();
14154 Register oldval = MI.getOperand(3).getReg();
14155 Register newval = MI.getOperand(4).getReg();
14156 DebugLoc dl = MI.getDebugLoc();
14157
14158 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14159 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14160 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14161 F->insert(It, loop1MBB);
14162 F->insert(It, loop2MBB);
14163 F->insert(It, exitMBB);
14164 exitMBB->splice(exitMBB->begin(), BB,
14165 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14167
14168 MachineRegisterInfo &RegInfo = F->getRegInfo();
14169 const TargetRegisterClass *RC =
14170 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14171 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14172
14173 Register PtrReg = RegInfo.createVirtualRegister(RC);
14174 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14175 Register ShiftReg =
14176 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14177 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14178 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14179 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14180 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14181 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14182 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14183 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14184 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14185 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14186 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14187 Register Ptr1Reg;
14188 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14189 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14190 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14191 // thisMBB:
14192 // ...
14193 // fallthrough --> loopMBB
14194 BB->addSuccessor(loop1MBB);
14195
14196 // The 4-byte load must be aligned, while a char or short may be
14197 // anywhere in the word. Hence all this nasty bookkeeping code.
14198 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14199 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14200 // xori shift, shift1, 24 [16]
14201 // rlwinm ptr, ptr1, 0, 0, 29
14202 // slw newval2, newval, shift
14203 // slw oldval2, oldval,shift
14204 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14205 // slw mask, mask2, shift
14206 // and newval3, newval2, mask
14207 // and oldval3, oldval2, mask
14208 // loop1MBB:
14209 // lwarx tmpDest, ptr
14210 // and tmp, tmpDest, mask
14211 // cmpw tmp, oldval3
14212 // bne- exitBB
14213 // loop2MBB:
14214 // andc tmp2, tmpDest, mask
14215 // or tmp4, tmp2, newval3
14216 // stwcx. tmp4, ptr
14217 // bne- loop1MBB
14218 // b exitBB
14219 // exitBB:
14220 // srw dest, tmpDest, shift
14221 if (ptrA != ZeroReg) {
14222 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14223 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14224 .addReg(ptrA)
14225 .addReg(ptrB);
14226 } else {
14227 Ptr1Reg = ptrB;
14228 }
14229
14230 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14231 // mode.
14232 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14233 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
14234 .addImm(3)
14235 .addImm(27)
14236 .addImm(is8bit ? 28 : 27);
14237 if (!isLittleEndian)
14238 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14239 .addReg(Shift1Reg)
14240 .addImm(is8bit ? 24 : 16);
14241 if (is64bit)
14242 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14243 .addReg(Ptr1Reg)
14244 .addImm(0)
14245 .addImm(61);
14246 else
14247 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14248 .addReg(Ptr1Reg)
14249 .addImm(0)
14250 .addImm(0)
14251 .addImm(29);
14252 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14253 .addReg(newval)
14254 .addReg(ShiftReg);
14255 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14256 .addReg(oldval)
14257 .addReg(ShiftReg);
14258 if (is8bit)
14259 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14260 else {
14261 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14262 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14263 .addReg(Mask3Reg)
14264 .addImm(65535);
14265 }
14266 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14267 .addReg(Mask2Reg)
14268 .addReg(ShiftReg);
14269 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14270 .addReg(NewVal2Reg)
14271 .addReg(MaskReg);
14272 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14273 .addReg(OldVal2Reg)
14274 .addReg(MaskReg);
14275
14276 BB = loop1MBB;
14277 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14278 .addReg(ZeroReg)
14279 .addReg(PtrReg);
14280 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14281 .addReg(TmpDestReg)
14282 .addReg(MaskReg);
14283 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14284 .addReg(TmpReg)
14285 .addReg(OldVal3Reg);
14286 BuildMI(BB, dl, TII->get(PPC::BCC))
14288 .addReg(CrReg)
14289 .addMBB(exitMBB);
14290 BB->addSuccessor(loop2MBB);
14291 BB->addSuccessor(exitMBB);
14292
14293 BB = loop2MBB;
14294 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14295 .addReg(TmpDestReg)
14296 .addReg(MaskReg);
14297 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14298 .addReg(Tmp2Reg)
14299 .addReg(NewVal3Reg);
14300 BuildMI(BB, dl, TII->get(PPC::STWCX))
14301 .addReg(Tmp4Reg)
14302 .addReg(ZeroReg)
14303 .addReg(PtrReg);
14304 BuildMI(BB, dl, TII->get(PPC::BCC))
14306 .addReg(PPC::CR0)
14307 .addMBB(loop1MBB);
14308 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14309 BB->addSuccessor(loop1MBB);
14310 BB->addSuccessor(exitMBB);
14311
14312 // exitMBB:
14313 // ...
14314 BB = exitMBB;
14315 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14316 .addReg(TmpReg)
14317 .addReg(ShiftReg);
14318 } else if (MI.getOpcode() == PPC::FADDrtz) {
14319 // This pseudo performs an FADD with rounding mode temporarily forced
14320 // to round-to-zero. We emit this via custom inserter since the FPSCR
14321 // is not modeled at the SelectionDAG level.
14322 Register Dest = MI.getOperand(0).getReg();
14323 Register Src1 = MI.getOperand(1).getReg();
14324 Register Src2 = MI.getOperand(2).getReg();
14325 DebugLoc dl = MI.getDebugLoc();
14326
14327 MachineRegisterInfo &RegInfo = F->getRegInfo();
14328 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14329
14330 // Save FPSCR value.
14331 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14332
14333 // Set rounding mode to round-to-zero.
14334 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14335 .addImm(31)
14337
14338 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14339 .addImm(30)
14341
14342 // Perform addition.
14343 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14344 .addReg(Src1)
14345 .addReg(Src2);
14346 if (MI.getFlag(MachineInstr::NoFPExcept))
14348
14349 // Restore FPSCR value.
14350 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14351 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14352 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14353 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14354 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14355 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14356 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14357 ? PPC::ANDI8_rec
14358 : PPC::ANDI_rec;
14359 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14360 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14361
14362 MachineRegisterInfo &RegInfo = F->getRegInfo();
14363 Register Dest = RegInfo.createVirtualRegister(
14364 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14365
14366 DebugLoc Dl = MI.getDebugLoc();
14367 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14368 .addReg(MI.getOperand(1).getReg())
14369 .addImm(1);
14370 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14371 MI.getOperand(0).getReg())
14372 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14373 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14374 DebugLoc Dl = MI.getDebugLoc();
14375 MachineRegisterInfo &RegInfo = F->getRegInfo();
14376 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14377 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14378 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14379 MI.getOperand(0).getReg())
14380 .addReg(CRReg);
14381 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14382 DebugLoc Dl = MI.getDebugLoc();
14383 unsigned Imm = MI.getOperand(1).getImm();
14384 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14385 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14386 MI.getOperand(0).getReg())
14387 .addReg(PPC::CR0EQ);
14388 } else if (MI.getOpcode() == PPC::SETRNDi) {
14389 DebugLoc dl = MI.getDebugLoc();
14390 Register OldFPSCRReg = MI.getOperand(0).getReg();
14391
14392 // Save FPSCR value.
14393 if (MRI.use_empty(OldFPSCRReg))
14394 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14395 else
14396 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14397
14398 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14399 // the following settings:
14400 // 00 Round to nearest
14401 // 01 Round to 0
14402 // 10 Round to +inf
14403 // 11 Round to -inf
14404
14405 // When the operand is immediate, using the two least significant bits of
14406 // the immediate to set the bits 62:63 of FPSCR.
14407 unsigned Mode = MI.getOperand(1).getImm();
14408 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14409 .addImm(31)
14411
14412 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14413 .addImm(30)
14415 } else if (MI.getOpcode() == PPC::SETRND) {
14416 DebugLoc dl = MI.getDebugLoc();
14417
14418 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14419 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14420 // If the target doesn't have DirectMove, we should use stack to do the
14421 // conversion, because the target doesn't have the instructions like mtvsrd
14422 // or mfvsrd to do this conversion directly.
14423 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14424 if (Subtarget.hasDirectMove()) {
14425 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14426 .addReg(SrcReg);
14427 } else {
14428 // Use stack to do the register copy.
14429 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14430 MachineRegisterInfo &RegInfo = F->getRegInfo();
14431 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14432 if (RC == &PPC::F8RCRegClass) {
14433 // Copy register from F8RCRegClass to G8RCRegclass.
14434 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14435 "Unsupported RegClass.");
14436
14437 StoreOp = PPC::STFD;
14438 LoadOp = PPC::LD;
14439 } else {
14440 // Copy register from G8RCRegClass to F8RCRegclass.
14441 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14442 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14443 "Unsupported RegClass.");
14444 }
14445
14446 MachineFrameInfo &MFI = F->getFrameInfo();
14447 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14448
14449 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14450 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14452 MFI.getObjectAlign(FrameIdx));
14453
14454 // Store the SrcReg into the stack.
14455 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14456 .addReg(SrcReg)
14457 .addImm(0)
14458 .addFrameIndex(FrameIdx)
14459 .addMemOperand(MMOStore);
14460
14461 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14462 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14464 MFI.getObjectAlign(FrameIdx));
14465
14466 // Load from the stack where SrcReg is stored, and save to DestReg,
14467 // so we have done the RegClass conversion from RegClass::SrcReg to
14468 // RegClass::DestReg.
14469 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14470 .addImm(0)
14471 .addFrameIndex(FrameIdx)
14472 .addMemOperand(MMOLoad);
14473 }
14474 };
14475
14476 Register OldFPSCRReg = MI.getOperand(0).getReg();
14477
14478 // Save FPSCR value.
14479 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14480
14481 // When the operand is gprc register, use two least significant bits of the
14482 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14483 //
14484 // copy OldFPSCRTmpReg, OldFPSCRReg
14485 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14486 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14487 // copy NewFPSCRReg, NewFPSCRTmpReg
14488 // mtfsf 255, NewFPSCRReg
14489 MachineOperand SrcOp = MI.getOperand(1);
14490 MachineRegisterInfo &RegInfo = F->getRegInfo();
14491 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14492
14493 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14494
14495 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14496 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14497
14498 // The first operand of INSERT_SUBREG should be a register which has
14499 // subregisters, we only care about its RegClass, so we should use an
14500 // IMPLICIT_DEF register.
14501 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14502 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14503 .addReg(ImDefReg)
14504 .add(SrcOp)
14505 .addImm(1);
14506
14507 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14508 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14509 .addReg(OldFPSCRTmpReg)
14510 .addReg(ExtSrcReg)
14511 .addImm(0)
14512 .addImm(62);
14513
14514 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14515 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14516
14517 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14518 // bits of FPSCR.
14519 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14520 .addImm(255)
14521 .addReg(NewFPSCRReg)
14522 .addImm(0)
14523 .addImm(0);
14524 } else if (MI.getOpcode() == PPC::SETFLM) {
14525 DebugLoc Dl = MI.getDebugLoc();
14526
14527 // Result of setflm is previous FPSCR content, so we need to save it first.
14528 Register OldFPSCRReg = MI.getOperand(0).getReg();
14529 if (MRI.use_empty(OldFPSCRReg))
14530 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14531 else
14532 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14533
14534 // Put bits in 32:63 to FPSCR.
14535 Register NewFPSCRReg = MI.getOperand(1).getReg();
14536 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14537 .addImm(255)
14538 .addReg(NewFPSCRReg)
14539 .addImm(0)
14540 .addImm(0);
14541 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14542 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14543 return emitProbedAlloca(MI, BB);
14544 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14545 DebugLoc DL = MI.getDebugLoc();
14546 Register Src = MI.getOperand(2).getReg();
14547 Register Lo = MI.getOperand(0).getReg();
14548 Register Hi = MI.getOperand(1).getReg();
14549 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14550 .addDef(Lo)
14551 .addUse(Src, 0, PPC::sub_gp8_x1);
14552 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14553 .addDef(Hi)
14554 .addUse(Src, 0, PPC::sub_gp8_x0);
14555 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14556 MI.getOpcode() == PPC::STQX_PSEUDO) {
14557 DebugLoc DL = MI.getDebugLoc();
14558 // Ptr is used as the ptr_rc_no_r0 part
14559 // of LQ/STQ's memory operand and adding result of RA and RB,
14560 // so it has to be g8rc_and_g8rc_nox0.
14561 Register Ptr =
14562 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14563 Register Val = MI.getOperand(0).getReg();
14564 Register RA = MI.getOperand(1).getReg();
14565 Register RB = MI.getOperand(2).getReg();
14566 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14567 BuildMI(*BB, MI, DL,
14568 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14569 : TII->get(PPC::STQ))
14570 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
14571 .addImm(0)
14572 .addReg(Ptr);
14573 } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
14574 MI.getOpcode() == PPC::LDAT_PSEUDO) {
14575 DebugLoc DL = MI.getDebugLoc();
14576 Register DstReg = MI.getOperand(0).getReg();
14577 Register PtrReg = MI.getOperand(1).getReg();
14578 Register ValReg = MI.getOperand(2).getReg();
14579 unsigned FC = MI.getOperand(3).getImm();
14580 bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14581 Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14582 if (IsLwat)
14583 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64)
14584 .addImm(0)
14585 .addReg(ValReg)
14586 .addImm(PPC::sub_32);
14587 else
14588 Val64 = ValReg;
14589
14590 Register G8rPair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14591 Register UndefG8r = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14592 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), UndefG8r);
14593 BuildMI(*BB, MI, DL, TII->get(PPC::REG_SEQUENCE), G8rPair)
14594 .addReg(UndefG8r)
14595 .addImm(PPC::sub_gp8_x0)
14596 .addReg(Val64)
14597 .addImm(PPC::sub_gp8_x1);
14598
14599 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14600 BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult)
14601 .addReg(G8rPair)
14602 .addReg(PtrReg)
14603 .addImm(FC);
14604 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14605 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14606 .addReg(PairResult, 0, PPC::sub_gp8_x0);
14607 if (IsLwat)
14608 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14609 .addReg(Result64, 0, PPC::sub_32);
14610 else
14611 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14612 .addReg(Result64);
14613 } else {
14614 llvm_unreachable("Unexpected instr type to insert");
14615 }
14616
14617 MI.eraseFromParent(); // The pseudo instruction is gone now.
14618 return BB;
14619}
14620
14621//===----------------------------------------------------------------------===//
14622// Target Optimization Hooks
14623//===----------------------------------------------------------------------===//
14624
14625static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14626 // For the estimates, convergence is quadratic, so we essentially double the
14627 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14628 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14629 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14630 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14631 if (VT.getScalarType() == MVT::f64)
14632 RefinementSteps++;
14633 return RefinementSteps;
14634}
14635
14636SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14637 const DenormalMode &Mode) const {
14638 // We only have VSX Vector Test for software Square Root.
14639 EVT VT = Op.getValueType();
14640 if (!isTypeLegal(MVT::i1) ||
14641 (VT != MVT::f64 &&
14642 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14644
14645 SDLoc DL(Op);
14646 // The output register of FTSQRT is CR field.
14647 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14648 // ftsqrt BF,FRB
14649 // Let e_b be the unbiased exponent of the double-precision
14650 // floating-point operand in register FRB.
14651 // fe_flag is set to 1 if either of the following conditions occurs.
14652 // - The double-precision floating-point operand in register FRB is a zero,
14653 // a NaN, or an infinity, or a negative value.
14654 // - e_b is less than or equal to -970.
14655 // Otherwise fe_flag is set to 0.
14656 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14657 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14658 // exponent is less than -970)
14659 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14660 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14661 FTSQRT, SRIdxVal),
14662 0);
14663}
14664
14665SDValue
14666PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14667 SelectionDAG &DAG) const {
14668 // We only have VSX Vector Square Root.
14669 EVT VT = Op.getValueType();
14670 if (VT != MVT::f64 &&
14671 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14673
14674 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14675}
14676
14677SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14678 int Enabled, int &RefinementSteps,
14679 bool &UseOneConstNR,
14680 bool Reciprocal) const {
14681 EVT VT = Operand.getValueType();
14682 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14683 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14684 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14685 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14686 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14687 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14688
14689 // The Newton-Raphson computation with a single constant does not provide
14690 // enough accuracy on some CPUs.
14691 UseOneConstNR = !Subtarget.needsTwoConstNR();
14692 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14693 }
14694 return SDValue();
14695}
14696
14697SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14698 int Enabled,
14699 int &RefinementSteps) const {
14700 EVT VT = Operand.getValueType();
14701 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14702 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14703 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14704 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14705 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14706 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14707 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14708 }
14709 return SDValue();
14710}
14711
14713 // Note: This functionality is used only when arcp is enabled, and
14714 // on cores with reciprocal estimates (which are used when arcp is
14715 // enabled for division), this functionality is redundant with the default
14716 // combiner logic (once the division -> reciprocal/multiply transformation
14717 // has taken place). As a result, this matters more for older cores than for
14718 // newer ones.
14719
14720 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14721 // reciprocal if there are two or more FDIVs (for embedded cores with only
14722 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14723 switch (Subtarget.getCPUDirective()) {
14724 default:
14725 return 3;
14726 case PPC::DIR_440:
14727 case PPC::DIR_A2:
14728 case PPC::DIR_E500:
14729 case PPC::DIR_E500mc:
14730 case PPC::DIR_E5500:
14731 return 2;
14732 }
14733}
14734
14735// isConsecutiveLSLoc needs to work even if all adds have not yet been
14736// collapsed, and so we need to look through chains of them.
14738 int64_t& Offset, SelectionDAG &DAG) {
14739 if (DAG.isBaseWithConstantOffset(Loc)) {
14740 Base = Loc.getOperand(0);
14741 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14742
14743 // The base might itself be a base plus an offset, and if so, accumulate
14744 // that as well.
14745 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14746 }
14747}
14748
14750 unsigned Bytes, int Dist,
14751 SelectionDAG &DAG) {
14752 if (VT.getSizeInBits() / 8 != Bytes)
14753 return false;
14754
14755 SDValue BaseLoc = Base->getBasePtr();
14756 if (Loc.getOpcode() == ISD::FrameIndex) {
14757 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14758 return false;
14760 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14761 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14762 int FS = MFI.getObjectSize(FI);
14763 int BFS = MFI.getObjectSize(BFI);
14764 if (FS != BFS || FS != (int)Bytes) return false;
14765 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14766 }
14767
14768 SDValue Base1 = Loc, Base2 = BaseLoc;
14769 int64_t Offset1 = 0, Offset2 = 0;
14770 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14771 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14772 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14773 return true;
14774
14775 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14776 const GlobalValue *GV1 = nullptr;
14777 const GlobalValue *GV2 = nullptr;
14778 Offset1 = 0;
14779 Offset2 = 0;
14780 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14781 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14782 if (isGA1 && isGA2 && GV1 == GV2)
14783 return Offset1 == (Offset2 + Dist*Bytes);
14784 return false;
14785}
14786
14787// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14788// not enforce equality of the chain operands.
14790 unsigned Bytes, int Dist,
14791 SelectionDAG &DAG) {
14793 EVT VT = LS->getMemoryVT();
14794 SDValue Loc = LS->getBasePtr();
14795 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14796 }
14797
14798 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14799 EVT VT;
14800 switch (N->getConstantOperandVal(1)) {
14801 default: return false;
14802 case Intrinsic::ppc_altivec_lvx:
14803 case Intrinsic::ppc_altivec_lvxl:
14804 case Intrinsic::ppc_vsx_lxvw4x:
14805 case Intrinsic::ppc_vsx_lxvw4x_be:
14806 VT = MVT::v4i32;
14807 break;
14808 case Intrinsic::ppc_vsx_lxvd2x:
14809 case Intrinsic::ppc_vsx_lxvd2x_be:
14810 VT = MVT::v2f64;
14811 break;
14812 case Intrinsic::ppc_altivec_lvebx:
14813 VT = MVT::i8;
14814 break;
14815 case Intrinsic::ppc_altivec_lvehx:
14816 VT = MVT::i16;
14817 break;
14818 case Intrinsic::ppc_altivec_lvewx:
14819 VT = MVT::i32;
14820 break;
14821 }
14822
14823 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14824 }
14825
14826 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14827 EVT VT;
14828 switch (N->getConstantOperandVal(1)) {
14829 default: return false;
14830 case Intrinsic::ppc_altivec_stvx:
14831 case Intrinsic::ppc_altivec_stvxl:
14832 case Intrinsic::ppc_vsx_stxvw4x:
14833 VT = MVT::v4i32;
14834 break;
14835 case Intrinsic::ppc_vsx_stxvd2x:
14836 VT = MVT::v2f64;
14837 break;
14838 case Intrinsic::ppc_vsx_stxvw4x_be:
14839 VT = MVT::v4i32;
14840 break;
14841 case Intrinsic::ppc_vsx_stxvd2x_be:
14842 VT = MVT::v2f64;
14843 break;
14844 case Intrinsic::ppc_altivec_stvebx:
14845 VT = MVT::i8;
14846 break;
14847 case Intrinsic::ppc_altivec_stvehx:
14848 VT = MVT::i16;
14849 break;
14850 case Intrinsic::ppc_altivec_stvewx:
14851 VT = MVT::i32;
14852 break;
14853 }
14854
14855 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14856 }
14857
14858 return false;
14859}
14860
14861// Return true is there is a nearyby consecutive load to the one provided
14862// (regardless of alignment). We search up and down the chain, looking though
14863// token factors and other loads (but nothing else). As a result, a true result
14864// indicates that it is safe to create a new consecutive load adjacent to the
14865// load provided.
14867 SDValue Chain = LD->getChain();
14868 EVT VT = LD->getMemoryVT();
14869
14870 SmallPtrSet<SDNode *, 16> LoadRoots;
14871 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14873
14874 // First, search up the chain, branching to follow all token-factor operands.
14875 // If we find a consecutive load, then we're done, otherwise, record all
14876 // nodes just above the top-level loads and token factors.
14877 while (!Queue.empty()) {
14878 SDNode *ChainNext = Queue.pop_back_val();
14879 if (!Visited.insert(ChainNext).second)
14880 continue;
14881
14882 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14883 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14884 return true;
14885
14886 if (!Visited.count(ChainLD->getChain().getNode()))
14887 Queue.push_back(ChainLD->getChain().getNode());
14888 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14889 for (const SDUse &O : ChainNext->ops())
14890 if (!Visited.count(O.getNode()))
14891 Queue.push_back(O.getNode());
14892 } else
14893 LoadRoots.insert(ChainNext);
14894 }
14895
14896 // Second, search down the chain, starting from the top-level nodes recorded
14897 // in the first phase. These top-level nodes are the nodes just above all
14898 // loads and token factors. Starting with their uses, recursively look though
14899 // all loads (just the chain uses) and token factors to find a consecutive
14900 // load.
14901 Visited.clear();
14902 Queue.clear();
14903
14904 for (SDNode *I : LoadRoots) {
14905 Queue.push_back(I);
14906
14907 while (!Queue.empty()) {
14908 SDNode *LoadRoot = Queue.pop_back_val();
14909 if (!Visited.insert(LoadRoot).second)
14910 continue;
14911
14912 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14913 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14914 return true;
14915
14916 for (SDNode *U : LoadRoot->users())
14917 if (((isa<MemSDNode>(U) &&
14918 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14919 U->getOpcode() == ISD::TokenFactor) &&
14920 !Visited.count(U))
14921 Queue.push_back(U);
14922 }
14923 }
14924
14925 return false;
14926}
14927
14928/// This function is called when we have proved that a SETCC node can be replaced
14929/// by subtraction (and other supporting instructions) so that the result of
14930/// comparison is kept in a GPR instead of CR. This function is purely for
14931/// codegen purposes and has some flags to guide the codegen process.
14932static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14933 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14934 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14935
14936 // Zero extend the operands to the largest legal integer. Originally, they
14937 // must be of a strictly smaller size.
14938 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14939 DAG.getConstant(Size, DL, MVT::i32));
14940 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14941 DAG.getConstant(Size, DL, MVT::i32));
14942
14943 // Swap if needed. Depends on the condition code.
14944 if (Swap)
14945 std::swap(Op0, Op1);
14946
14947 // Subtract extended integers.
14948 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14949
14950 // Move the sign bit to the least significant position and zero out the rest.
14951 // Now the least significant bit carries the result of original comparison.
14952 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14953 DAG.getConstant(Size - 1, DL, MVT::i32));
14954 auto Final = Shifted;
14955
14956 // Complement the result if needed. Based on the condition code.
14957 if (Complement)
14958 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14959 DAG.getConstant(1, DL, MVT::i64));
14960
14961 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14962}
14963
14964SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14965 DAGCombinerInfo &DCI) const {
14966 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14967
14968 SelectionDAG &DAG = DCI.DAG;
14969 SDLoc DL(N);
14970
14971 // Size of integers being compared has a critical role in the following
14972 // analysis, so we prefer to do this when all types are legal.
14973 if (!DCI.isAfterLegalizeDAG())
14974 return SDValue();
14975
14976 // If all users of SETCC extend its value to a legal integer type
14977 // then we replace SETCC with a subtraction
14978 for (const SDNode *U : N->users())
14979 if (U->getOpcode() != ISD::ZERO_EXTEND)
14980 return SDValue();
14981
14982 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14983 auto OpSize = N->getOperand(0).getValueSizeInBits();
14984
14986
14987 if (OpSize < Size) {
14988 switch (CC) {
14989 default: break;
14990 case ISD::SETULT:
14991 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14992 case ISD::SETULE:
14993 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14994 case ISD::SETUGT:
14995 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14996 case ISD::SETUGE:
14997 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14998 }
14999 }
15000
15001 return SDValue();
15002}
15003
15004SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15005 DAGCombinerInfo &DCI) const {
15006 SelectionDAG &DAG = DCI.DAG;
15007 SDLoc dl(N);
15008
15009 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15010 // If we're tracking CR bits, we need to be careful that we don't have:
15011 // trunc(binary-ops(zext(x), zext(y)))
15012 // or
15013 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15014 // such that we're unnecessarily moving things into GPRs when it would be
15015 // better to keep them in CR bits.
15016
15017 // Note that trunc here can be an actual i1 trunc, or can be the effective
15018 // truncation that comes from a setcc or select_cc.
15019 if (N->getOpcode() == ISD::TRUNCATE &&
15020 N->getValueType(0) != MVT::i1)
15021 return SDValue();
15022
15023 if (N->getOperand(0).getValueType() != MVT::i32 &&
15024 N->getOperand(0).getValueType() != MVT::i64)
15025 return SDValue();
15026
15027 if (N->getOpcode() == ISD::SETCC ||
15028 N->getOpcode() == ISD::SELECT_CC) {
15029 // If we're looking at a comparison, then we need to make sure that the
15030 // high bits (all except for the first) don't matter the result.
15031 ISD::CondCode CC =
15032 cast<CondCodeSDNode>(N->getOperand(
15033 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15034 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15035
15036 if (ISD::isSignedIntSetCC(CC)) {
15037 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15038 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15039 return SDValue();
15040 } else if (ISD::isUnsignedIntSetCC(CC)) {
15041 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15042 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15043 !DAG.MaskedValueIsZero(N->getOperand(1),
15044 APInt::getHighBitsSet(OpBits, OpBits-1)))
15045 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15046 : SDValue());
15047 } else {
15048 // This is neither a signed nor an unsigned comparison, just make sure
15049 // that the high bits are equal.
15050 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15051 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15052
15053 // We don't really care about what is known about the first bit (if
15054 // anything), so pretend that it is known zero for both to ensure they can
15055 // be compared as constants.
15056 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15057 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15058
15059 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15060 Op1Known.getConstant() != Op2Known.getConstant())
15061 return SDValue();
15062 }
15063 }
15064
15065 // We now know that the higher-order bits are irrelevant, we just need to
15066 // make sure that all of the intermediate operations are bit operations, and
15067 // all inputs are extensions.
15068 if (N->getOperand(0).getOpcode() != ISD::AND &&
15069 N->getOperand(0).getOpcode() != ISD::OR &&
15070 N->getOperand(0).getOpcode() != ISD::XOR &&
15071 N->getOperand(0).getOpcode() != ISD::SELECT &&
15072 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15073 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15074 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15075 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15076 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15077 return SDValue();
15078
15079 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15080 N->getOperand(1).getOpcode() != ISD::AND &&
15081 N->getOperand(1).getOpcode() != ISD::OR &&
15082 N->getOperand(1).getOpcode() != ISD::XOR &&
15083 N->getOperand(1).getOpcode() != ISD::SELECT &&
15084 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15085 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15086 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15087 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15088 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15089 return SDValue();
15090
15092 SmallVector<SDValue, 8> BinOps, PromOps;
15093 SmallPtrSet<SDNode *, 16> Visited;
15094
15095 for (unsigned i = 0; i < 2; ++i) {
15096 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15097 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15098 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15099 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15100 isa<ConstantSDNode>(N->getOperand(i)))
15101 Inputs.push_back(N->getOperand(i));
15102 else
15103 BinOps.push_back(N->getOperand(i));
15104
15105 if (N->getOpcode() == ISD::TRUNCATE)
15106 break;
15107 }
15108
15109 // Visit all inputs, collect all binary operations (and, or, xor and
15110 // select) that are all fed by extensions.
15111 while (!BinOps.empty()) {
15112 SDValue BinOp = BinOps.pop_back_val();
15113
15114 if (!Visited.insert(BinOp.getNode()).second)
15115 continue;
15116
15117 PromOps.push_back(BinOp);
15118
15119 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15120 // The condition of the select is not promoted.
15121 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15122 continue;
15123 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15124 continue;
15125
15126 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15127 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15128 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15129 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15130 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15131 Inputs.push_back(BinOp.getOperand(i));
15132 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15133 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15134 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15135 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15136 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15137 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15138 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15139 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15140 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15141 BinOps.push_back(BinOp.getOperand(i));
15142 } else {
15143 // We have an input that is not an extension or another binary
15144 // operation; we'll abort this transformation.
15145 return SDValue();
15146 }
15147 }
15148 }
15149
15150 // Make sure that this is a self-contained cluster of operations (which
15151 // is not quite the same thing as saying that everything has only one
15152 // use).
15153 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15154 if (isa<ConstantSDNode>(Inputs[i]))
15155 continue;
15156
15157 for (const SDNode *User : Inputs[i].getNode()->users()) {
15158 if (User != N && !Visited.count(User))
15159 return SDValue();
15160
15161 // Make sure that we're not going to promote the non-output-value
15162 // operand(s) or SELECT or SELECT_CC.
15163 // FIXME: Although we could sometimes handle this, and it does occur in
15164 // practice that one of the condition inputs to the select is also one of
15165 // the outputs, we currently can't deal with this.
15166 if (User->getOpcode() == ISD::SELECT) {
15167 if (User->getOperand(0) == Inputs[i])
15168 return SDValue();
15169 } else if (User->getOpcode() == ISD::SELECT_CC) {
15170 if (User->getOperand(0) == Inputs[i] ||
15171 User->getOperand(1) == Inputs[i])
15172 return SDValue();
15173 }
15174 }
15175 }
15176
15177 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15178 for (const SDNode *User : PromOps[i].getNode()->users()) {
15179 if (User != N && !Visited.count(User))
15180 return SDValue();
15181
15182 // Make sure that we're not going to promote the non-output-value
15183 // operand(s) or SELECT or SELECT_CC.
15184 // FIXME: Although we could sometimes handle this, and it does occur in
15185 // practice that one of the condition inputs to the select is also one of
15186 // the outputs, we currently can't deal with this.
15187 if (User->getOpcode() == ISD::SELECT) {
15188 if (User->getOperand(0) == PromOps[i])
15189 return SDValue();
15190 } else if (User->getOpcode() == ISD::SELECT_CC) {
15191 if (User->getOperand(0) == PromOps[i] ||
15192 User->getOperand(1) == PromOps[i])
15193 return SDValue();
15194 }
15195 }
15196 }
15197
15198 // Replace all inputs with the extension operand.
15199 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15200 // Constants may have users outside the cluster of to-be-promoted nodes,
15201 // and so we need to replace those as we do the promotions.
15202 if (isa<ConstantSDNode>(Inputs[i]))
15203 continue;
15204 else
15205 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15206 }
15207
15208 std::list<HandleSDNode> PromOpHandles;
15209 for (auto &PromOp : PromOps)
15210 PromOpHandles.emplace_back(PromOp);
15211
15212 // Replace all operations (these are all the same, but have a different
15213 // (i1) return type). DAG.getNode will validate that the types of
15214 // a binary operator match, so go through the list in reverse so that
15215 // we've likely promoted both operands first. Any intermediate truncations or
15216 // extensions disappear.
15217 while (!PromOpHandles.empty()) {
15218 SDValue PromOp = PromOpHandles.back().getValue();
15219 PromOpHandles.pop_back();
15220
15221 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15222 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15223 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15224 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15225 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15226 PromOp.getOperand(0).getValueType() != MVT::i1) {
15227 // The operand is not yet ready (see comment below).
15228 PromOpHandles.emplace_front(PromOp);
15229 continue;
15230 }
15231
15232 SDValue RepValue = PromOp.getOperand(0);
15233 if (isa<ConstantSDNode>(RepValue))
15234 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15235
15236 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15237 continue;
15238 }
15239
15240 unsigned C;
15241 switch (PromOp.getOpcode()) {
15242 default: C = 0; break;
15243 case ISD::SELECT: C = 1; break;
15244 case ISD::SELECT_CC: C = 2; break;
15245 }
15246
15247 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15248 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15249 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15250 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15251 // The to-be-promoted operands of this node have not yet been
15252 // promoted (this should be rare because we're going through the
15253 // list backward, but if one of the operands has several users in
15254 // this cluster of to-be-promoted nodes, it is possible).
15255 PromOpHandles.emplace_front(PromOp);
15256 continue;
15257 }
15258
15260
15261 // If there are any constant inputs, make sure they're replaced now.
15262 for (unsigned i = 0; i < 2; ++i)
15263 if (isa<ConstantSDNode>(Ops[C+i]))
15264 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15265
15266 DAG.ReplaceAllUsesOfValueWith(PromOp,
15267 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15268 }
15269
15270 // Now we're left with the initial truncation itself.
15271 if (N->getOpcode() == ISD::TRUNCATE)
15272 return N->getOperand(0);
15273
15274 // Otherwise, this is a comparison. The operands to be compared have just
15275 // changed type (to i1), but everything else is the same.
15276 return SDValue(N, 0);
15277}
15278
15279SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15280 DAGCombinerInfo &DCI) const {
15281 SelectionDAG &DAG = DCI.DAG;
15282 SDLoc dl(N);
15283
15284 // If we're tracking CR bits, we need to be careful that we don't have:
15285 // zext(binary-ops(trunc(x), trunc(y)))
15286 // or
15287 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15288 // such that we're unnecessarily moving things into CR bits that can more
15289 // efficiently stay in GPRs. Note that if we're not certain that the high
15290 // bits are set as required by the final extension, we still may need to do
15291 // some masking to get the proper behavior.
15292
15293 // This same functionality is important on PPC64 when dealing with
15294 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15295 // the return values of functions. Because it is so similar, it is handled
15296 // here as well.
15297
15298 if (N->getValueType(0) != MVT::i32 &&
15299 N->getValueType(0) != MVT::i64)
15300 return SDValue();
15301
15302 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15303 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15304 return SDValue();
15305
15306 if (N->getOperand(0).getOpcode() != ISD::AND &&
15307 N->getOperand(0).getOpcode() != ISD::OR &&
15308 N->getOperand(0).getOpcode() != ISD::XOR &&
15309 N->getOperand(0).getOpcode() != ISD::SELECT &&
15310 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15311 return SDValue();
15312
15314 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15315 SmallPtrSet<SDNode *, 16> Visited;
15316
15317 // Visit all inputs, collect all binary operations (and, or, xor and
15318 // select) that are all fed by truncations.
15319 while (!BinOps.empty()) {
15320 SDValue BinOp = BinOps.pop_back_val();
15321
15322 if (!Visited.insert(BinOp.getNode()).second)
15323 continue;
15324
15325 PromOps.push_back(BinOp);
15326
15327 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15328 // The condition of the select is not promoted.
15329 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15330 continue;
15331 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15332 continue;
15333
15334 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15335 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15336 Inputs.push_back(BinOp.getOperand(i));
15337 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15338 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15339 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15340 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15341 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15342 BinOps.push_back(BinOp.getOperand(i));
15343 } else {
15344 // We have an input that is not a truncation or another binary
15345 // operation; we'll abort this transformation.
15346 return SDValue();
15347 }
15348 }
15349 }
15350
15351 // The operands of a select that must be truncated when the select is
15352 // promoted because the operand is actually part of the to-be-promoted set.
15353 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15354
15355 // Make sure that this is a self-contained cluster of operations (which
15356 // is not quite the same thing as saying that everything has only one
15357 // use).
15358 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15359 if (isa<ConstantSDNode>(Inputs[i]))
15360 continue;
15361
15362 for (SDNode *User : Inputs[i].getNode()->users()) {
15363 if (User != N && !Visited.count(User))
15364 return SDValue();
15365
15366 // If we're going to promote the non-output-value operand(s) or SELECT or
15367 // SELECT_CC, record them for truncation.
15368 if (User->getOpcode() == ISD::SELECT) {
15369 if (User->getOperand(0) == Inputs[i])
15370 SelectTruncOp[0].insert(std::make_pair(User,
15371 User->getOperand(0).getValueType()));
15372 } else if (User->getOpcode() == ISD::SELECT_CC) {
15373 if (User->getOperand(0) == Inputs[i])
15374 SelectTruncOp[0].insert(std::make_pair(User,
15375 User->getOperand(0).getValueType()));
15376 if (User->getOperand(1) == Inputs[i])
15377 SelectTruncOp[1].insert(std::make_pair(User,
15378 User->getOperand(1).getValueType()));
15379 }
15380 }
15381 }
15382
15383 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15384 for (SDNode *User : PromOps[i].getNode()->users()) {
15385 if (User != N && !Visited.count(User))
15386 return SDValue();
15387
15388 // If we're going to promote the non-output-value operand(s) or SELECT or
15389 // SELECT_CC, record them for truncation.
15390 if (User->getOpcode() == ISD::SELECT) {
15391 if (User->getOperand(0) == PromOps[i])
15392 SelectTruncOp[0].insert(std::make_pair(User,
15393 User->getOperand(0).getValueType()));
15394 } else if (User->getOpcode() == ISD::SELECT_CC) {
15395 if (User->getOperand(0) == PromOps[i])
15396 SelectTruncOp[0].insert(std::make_pair(User,
15397 User->getOperand(0).getValueType()));
15398 if (User->getOperand(1) == PromOps[i])
15399 SelectTruncOp[1].insert(std::make_pair(User,
15400 User->getOperand(1).getValueType()));
15401 }
15402 }
15403 }
15404
15405 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15406 bool ReallyNeedsExt = false;
15407 if (N->getOpcode() != ISD::ANY_EXTEND) {
15408 // If all of the inputs are not already sign/zero extended, then
15409 // we'll still need to do that at the end.
15410 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15411 if (isa<ConstantSDNode>(Inputs[i]))
15412 continue;
15413
15414 unsigned OpBits =
15415 Inputs[i].getOperand(0).getValueSizeInBits();
15416 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15417
15418 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15419 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15420 APInt::getHighBitsSet(OpBits,
15421 OpBits-PromBits))) ||
15422 (N->getOpcode() == ISD::SIGN_EXTEND &&
15423 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15424 (OpBits-(PromBits-1)))) {
15425 ReallyNeedsExt = true;
15426 break;
15427 }
15428 }
15429 }
15430
15431 // Convert PromOps to handles before doing any RAUW operations, as these
15432 // may CSE with existing nodes, deleting the originals.
15433 std::list<HandleSDNode> PromOpHandles;
15434 for (auto &PromOp : PromOps)
15435 PromOpHandles.emplace_back(PromOp);
15436
15437 // Replace all inputs, either with the truncation operand, or a
15438 // truncation or extension to the final output type.
15439 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15440 // Constant inputs need to be replaced with the to-be-promoted nodes that
15441 // use them because they might have users outside of the cluster of
15442 // promoted nodes.
15443 if (isa<ConstantSDNode>(Inputs[i]))
15444 continue;
15445
15446 SDValue InSrc = Inputs[i].getOperand(0);
15447 if (Inputs[i].getValueType() == N->getValueType(0))
15448 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15449 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15450 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15451 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15452 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15453 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15454 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15455 else
15456 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15457 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15458 }
15459
15460 // Replace all operations (these are all the same, but have a different
15461 // (promoted) return type). DAG.getNode will validate that the types of
15462 // a binary operator match, so go through the list in reverse so that
15463 // we've likely promoted both operands first.
15464 while (!PromOpHandles.empty()) {
15465 SDValue PromOp = PromOpHandles.back().getValue();
15466 PromOpHandles.pop_back();
15467
15468 unsigned C;
15469 switch (PromOp.getOpcode()) {
15470 default: C = 0; break;
15471 case ISD::SELECT: C = 1; break;
15472 case ISD::SELECT_CC: C = 2; break;
15473 }
15474
15475 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15476 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15477 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15478 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15479 // The to-be-promoted operands of this node have not yet been
15480 // promoted (this should be rare because we're going through the
15481 // list backward, but if one of the operands has several users in
15482 // this cluster of to-be-promoted nodes, it is possible).
15483 PromOpHandles.emplace_front(PromOp);
15484 continue;
15485 }
15486
15487 // For SELECT and SELECT_CC nodes, we do a similar check for any
15488 // to-be-promoted comparison inputs.
15489 if (PromOp.getOpcode() == ISD::SELECT ||
15490 PromOp.getOpcode() == ISD::SELECT_CC) {
15491 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15492 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15493 (SelectTruncOp[1].count(PromOp.getNode()) &&
15494 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15495 PromOpHandles.emplace_front(PromOp);
15496 continue;
15497 }
15498 }
15499
15501
15502 // If this node has constant inputs, then they'll need to be promoted here.
15503 for (unsigned i = 0; i < 2; ++i) {
15504 if (!isa<ConstantSDNode>(Ops[C+i]))
15505 continue;
15506 if (Ops[C+i].getValueType() == N->getValueType(0))
15507 continue;
15508
15509 if (N->getOpcode() == ISD::SIGN_EXTEND)
15510 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15511 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15512 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15513 else
15514 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15515 }
15516
15517 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15518 // truncate them again to the original value type.
15519 if (PromOp.getOpcode() == ISD::SELECT ||
15520 PromOp.getOpcode() == ISD::SELECT_CC) {
15521 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15522 if (SI0 != SelectTruncOp[0].end())
15523 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15524 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15525 if (SI1 != SelectTruncOp[1].end())
15526 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15527 }
15528
15529 DAG.ReplaceAllUsesOfValueWith(PromOp,
15530 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15531 }
15532
15533 // Now we're left with the initial extension itself.
15534 if (!ReallyNeedsExt)
15535 return N->getOperand(0);
15536
15537 // To zero extend, just mask off everything except for the first bit (in the
15538 // i1 case).
15539 if (N->getOpcode() == ISD::ZERO_EXTEND)
15540 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15542 N->getValueSizeInBits(0), PromBits),
15543 dl, N->getValueType(0)));
15544
15545 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15546 "Invalid extension type");
15547 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15548 SDValue ShiftCst =
15549 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15550 return DAG.getNode(
15551 ISD::SRA, dl, N->getValueType(0),
15552 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15553 ShiftCst);
15554}
15555
15556// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15558
15559 auto isValidForConvert = [](SDValue &Operand) {
15560 if (!Operand.hasOneUse())
15561 return false;
15562
15563 if (Operand.getValueType() != MVT::i128)
15564 return false;
15565
15566 if (Operand.getOpcode() == ISD::Constant)
15567 return true;
15568
15569 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15570 if (!LoadNode)
15571 return false;
15572
15573 // If memory operation is volatile, do not perform any
15574 // optimization or transformation. Volatile operations must be preserved
15575 // as written to ensure correct program behavior, so we return an empty
15576 // SDValue to indicate no action.
15577
15578 if (LoadNode->isVolatile())
15579 return false;
15580
15581 // Only combine loads if both use the unindexed addressing mode.
15582 // PowerPC AltiVec/VMX does not support vector loads or stores with
15583 // pre/post-increment addressing. Indexed modes may imply implicit
15584 // pointer updates, which are not compatible with AltiVec vector
15585 // instructions.
15586 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15587 return false;
15588
15589 // Only combine loads if both are non-extending loads
15590 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15591 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15592 // loaded value's semantics and are not compatible with vector loads.
15593 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15594 return false;
15595
15596 return true;
15597 };
15598
15599 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15600}
15601
15603 const SDLoc &DL) {
15604
15605 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15606
15607 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15608 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15609 "CC mus be ISD::SETNE or ISD::SETEQ");
15610
15611 auto getV16i8Load = [&](const SDValue &Operand) {
15612 if (Operand.getOpcode() == ISD::Constant)
15613 return DAG.getBitcast(MVT::v16i8, Operand);
15614
15615 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15616
15617 auto *LoadNode = cast<LoadSDNode>(Operand);
15618 return DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15619 LoadNode->getBasePtr(), LoadNode->getMemOperand());
15620 };
15621
15622 // Following code transforms the DAG
15623 // t0: ch,glue = EntryToken
15624 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15625 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15626 // undef:i64
15627 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15628 // t5: i128,ch =
15629 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15630 // setcc t3, t5, setne:ch
15631 //
15632 // ---->
15633 //
15634 // t0: ch,glue = EntryToken
15635 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15636 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15637 // undef:i64
15638 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15639 // t5: v16i8,ch =
15640 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15641 // t6: i32 =
15642 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15643 // Constant:i32<2>, t3, t5
15644 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15645
15646 // Or transforms the DAG
15647 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15648 // t8: i1 =
15649 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15650 //
15651 // --->
15652 //
15653 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15654 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15655 // t7: i32 =
15656 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15657
15658 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15659 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15660
15661 SDValue IntrID =
15662 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15663 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15664 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15665 IntrID, CRSel, LHSVec, RHSVec);
15666 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15667 // so we need to invert the CC opcode.
15668 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15669 DAG.getConstant(0, DL, MVT::i32),
15670 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15671}
15672
15673// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15674// If it is , return true; otherwise return false.
15676 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15677
15678 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15679 if (CC != ISD::SETEQ)
15680 return false;
15681
15682 SDValue LHS = N->getOperand(0);
15683 SDValue RHS = N->getOperand(1);
15684
15685 // Check the `SDValue &V` is from `and` with `1`.
15686 auto IsAndWithOne = [](SDValue &V) {
15687 if (V.getOpcode() == ISD::AND) {
15688 for (const SDValue &Op : V->ops())
15689 if (auto *C = dyn_cast<ConstantSDNode>(Op))
15690 if (C->isOne())
15691 return true;
15692 }
15693 return false;
15694 };
15695
15696 // Check whether the SETCC compare with zero.
15697 auto IsCompareWithZero = [](SDValue &V) {
15698 if (auto *C = dyn_cast<ConstantSDNode>(V))
15699 if (C->isZero())
15700 return true;
15701 return false;
15702 };
15703
15704 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15705 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15706}
15707
15708// You must check whether the `SDNode* N` can be converted to Xori using
15709// the function `static bool canConvertSETCCToXori(SDNode *N)`
15710// before calling the function; otherwise, it may produce incorrect results.
15712
15713 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15714 SDValue LHS = N->getOperand(0);
15715 SDValue RHS = N->getOperand(1);
15716 SDLoc DL(N);
15717
15718 [[maybe_unused]] ISD::CondCode CC =
15719 cast<CondCodeSDNode>(N->getOperand(2))->get();
15720 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15721 // Rewrite it as XORI (and X, 1), 1.
15722 auto MakeXor1 = [&](SDValue V) {
15723 EVT VT = V.getValueType();
15724 SDValue One = DAG.getConstant(1, DL, VT);
15725 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
15726 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
15727 };
15728
15729 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15730 return MakeXor1(LHS);
15731
15732 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15733 return MakeXor1(RHS);
15734
15735 llvm_unreachable("Should not reach here.");
15736}
15737
15738SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15739 DAGCombinerInfo &DCI) const {
15740 assert(N->getOpcode() == ISD::SETCC &&
15741 "Should be called with a SETCC node");
15742
15743 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
15744 // If it is, rewrite it as XORI (and X, 1), 1.
15746 return ConvertSETCCToXori(N, DCI.DAG);
15747
15748 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15749 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15750 SDValue LHS = N->getOperand(0);
15751 SDValue RHS = N->getOperand(1);
15752
15753 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15754 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15755 LHS.hasOneUse())
15756 std::swap(LHS, RHS);
15757
15758 // x == 0-y --> x+y == 0
15759 // x != 0-y --> x+y != 0
15760 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15761 RHS.hasOneUse()) {
15762 SDLoc DL(N);
15763 SelectionDAG &DAG = DCI.DAG;
15764 EVT VT = N->getValueType(0);
15765 EVT OpVT = LHS.getValueType();
15766 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15767 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15768 }
15769
15770 // Optimization: Fold i128 equality/inequality compares of two loads into a
15771 // vectorized compare using vcmpequb.p when Altivec is available.
15772 //
15773 // Rationale:
15774 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15775 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
15776 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
15777 // perform a full 128-bit equality check in a single vector compare.
15778 //
15779 // Example Result:
15780 // This transformation replaces memcmp(a, b, 16) with two vector loads
15781 // and one vector compare instruction.
15782
15783 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15784 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
15785 }
15786
15787 return DAGCombineTruncBoolExt(N, DCI);
15788}
15789
15790// Is this an extending load from an f32 to an f64?
15791static bool isFPExtLoad(SDValue Op) {
15792 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15793 return LD->getExtensionType() == ISD::EXTLOAD &&
15794 Op.getValueType() == MVT::f64;
15795 return false;
15796}
15797
15798/// Reduces the number of fp-to-int conversion when building a vector.
15799///
15800/// If this vector is built out of floating to integer conversions,
15801/// transform it to a vector built out of floating point values followed by a
15802/// single floating to integer conversion of the vector.
15803/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15804/// becomes (fptosi (build_vector ($A, $B, ...)))
15805SDValue PPCTargetLowering::
15806combineElementTruncationToVectorTruncation(SDNode *N,
15807 DAGCombinerInfo &DCI) const {
15808 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15809 "Should be called with a BUILD_VECTOR node");
15810
15811 SelectionDAG &DAG = DCI.DAG;
15812 SDLoc dl(N);
15813
15814 SDValue FirstInput = N->getOperand(0);
15815 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15816 "The input operand must be an fp-to-int conversion.");
15817
15818 // This combine happens after legalization so the fp_to_[su]i nodes are
15819 // already converted to PPCSISD nodes.
15820 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15821 if (FirstConversion == PPCISD::FCTIDZ ||
15822 FirstConversion == PPCISD::FCTIDUZ ||
15823 FirstConversion == PPCISD::FCTIWZ ||
15824 FirstConversion == PPCISD::FCTIWUZ) {
15825 bool IsSplat = true;
15826 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15827 FirstConversion == PPCISD::FCTIWUZ;
15828 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15830 EVT TargetVT = N->getValueType(0);
15831 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15832 SDValue NextOp = N->getOperand(i);
15833 if (NextOp.getOpcode() != PPCISD::MFVSR)
15834 return SDValue();
15835 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15836 if (NextConversion != FirstConversion)
15837 return SDValue();
15838 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15839 // This is not valid if the input was originally double precision. It is
15840 // also not profitable to do unless this is an extending load in which
15841 // case doing this combine will allow us to combine consecutive loads.
15842 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15843 return SDValue();
15844 if (N->getOperand(i) != FirstInput)
15845 IsSplat = false;
15846 }
15847
15848 // If this is a splat, we leave it as-is since there will be only a single
15849 // fp-to-int conversion followed by a splat of the integer. This is better
15850 // for 32-bit and smaller ints and neutral for 64-bit ints.
15851 if (IsSplat)
15852 return SDValue();
15853
15854 // Now that we know we have the right type of node, get its operands
15855 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15856 SDValue In = N->getOperand(i).getOperand(0);
15857 if (Is32Bit) {
15858 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15859 // here, we know that all inputs are extending loads so this is safe).
15860 if (In.isUndef())
15861 Ops.push_back(DAG.getUNDEF(SrcVT));
15862 else {
15863 SDValue Trunc =
15864 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15865 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15866 Ops.push_back(Trunc);
15867 }
15868 } else
15869 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15870 }
15871
15872 unsigned Opcode;
15873 if (FirstConversion == PPCISD::FCTIDZ ||
15874 FirstConversion == PPCISD::FCTIWZ)
15875 Opcode = ISD::FP_TO_SINT;
15876 else
15877 Opcode = ISD::FP_TO_UINT;
15878
15879 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15880 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15881 return DAG.getNode(Opcode, dl, TargetVT, BV);
15882 }
15883 return SDValue();
15884}
15885
15886// LXVKQ instruction load VSX vector with a special quadword value
15887// based on an immediate value. This helper method returns the details of the
15888// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15889// to help generate the LXVKQ instruction and the subsequent shift instruction
15890// required to match the original build vector pattern.
15891
15892// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15893using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15894
15895static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15896
15897 // LXVKQ instruction loads the Quadword value:
15898 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15899 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15900 static const uint32_t Uim = 16;
15901
15902 // Check for direct LXVKQ match (no shift needed)
15903 if (FullVal == BasePattern)
15904 return std::make_tuple(Uim, uint8_t{0});
15905
15906 // Check if FullValue is 1 (the result of the base pattern >> 127)
15907 if (FullVal == APInt(128, 1))
15908 return std::make_tuple(Uim, uint8_t{127});
15909
15910 return std::nullopt;
15911}
15912
15913/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15914/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15915/// LXVKQ instruction load VSX vector with a special quadword value based on an
15916/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15917/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15918/// This can be used to inline the build vector constants that have the
15919/// following patterns:
15920///
15921/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15922/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15923/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15924/// combination of splatting and right shift instructions.
15925
15926SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15927 SelectionDAG &DAG) const {
15928
15929 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
15930 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
15931
15932 // This transformation is only supported if we are loading either a byte,
15933 // halfword, word, or doubleword.
15934 EVT VT = Op.getValueType();
15935 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
15936 VT == MVT::v2i64))
15937 return SDValue();
15938
15939 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
15940 << VT.getEVTString() << "): ";
15941 Op->dump());
15942
15943 unsigned NumElems = VT.getVectorNumElements();
15944 unsigned ElemBits = VT.getScalarSizeInBits();
15945
15946 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
15947
15948 // Check for Non-constant operand in the build vector.
15949 for (const SDValue &Operand : Op.getNode()->op_values()) {
15950 if (!isa<ConstantSDNode>(Operand))
15951 return SDValue();
15952 }
15953
15954 // Assemble build vector operands as a 128-bit register value
15955 // We need to reconstruct what the 128-bit register pattern would be
15956 // that produces this vector when interpreted with the current endianness
15957 APInt FullVal = APInt::getZero(128);
15958
15959 for (unsigned Index = 0; Index < NumElems; ++Index) {
15960 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
15961
15962 // Get element value as raw bits (zero-extended)
15963 uint64_t ElemValue = C->getZExtValue();
15964
15965 // Mask to element size to ensure we only get the relevant bits
15966 if (ElemBits < 64)
15967 ElemValue &= ((1ULL << ElemBits) - 1);
15968
15969 // Calculate bit position for this element in the 128-bit register
15970 unsigned BitPos =
15971 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
15972
15973 // Create APInt for the element value and shift it to correct position
15974 APInt ElemAPInt(128, ElemValue);
15975 ElemAPInt <<= BitPos;
15976
15977 // Place the element value at the correct bit position
15978 FullVal |= ElemAPInt;
15979 }
15980
15981 if (FullVal.isZero() || FullVal.isAllOnes())
15982 return SDValue();
15983
15984 if (auto UIMOpt = getPatternInfo(FullVal)) {
15985 const auto &[Uim, ShiftAmount] = *UIMOpt;
15986 SDLoc Dl(Op);
15987
15988 // Generate LXVKQ instruction if the shift amount is zero.
15989 if (ShiftAmount == 0) {
15990 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
15991 SDValue LxvkqInstr =
15992 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
15994 << "combineBVLoadsSpecialValue: Instruction Emitted ";
15995 LxvkqInstr.dump());
15996 return LxvkqInstr;
15997 }
15998
15999 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16000
16001 // The right shifted pattern can be constructed using a combination of
16002 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16003 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16004 // value 255.
16005 SDValue ShiftAmountVec =
16006 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16007 DAG.getTargetConstant(255, Dl, MVT::i32)),
16008 0);
16009 // Generate appropriate right shift instruction
16010 SDValue ShiftVec = SDValue(
16011 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16012 0);
16014 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16015 ShiftVec.dump());
16016 return ShiftVec;
16017 }
16018 // No patterns matched for build vectors.
16019 return SDValue();
16020}
16021
16022/// Reduce the number of loads when building a vector.
16023///
16024/// Building a vector out of multiple loads can be converted to a load
16025/// of the vector type if the loads are consecutive. If the loads are
16026/// consecutive but in descending order, a shuffle is added at the end
16027/// to reorder the vector.
16029 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16030 "Should be called with a BUILD_VECTOR node");
16031
16032 SDLoc dl(N);
16033
16034 // Return early for non byte-sized type, as they can't be consecutive.
16035 if (!N->getValueType(0).getVectorElementType().isByteSized())
16036 return SDValue();
16037
16038 bool InputsAreConsecutiveLoads = true;
16039 bool InputsAreReverseConsecutive = true;
16040 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16041 SDValue FirstInput = N->getOperand(0);
16042 bool IsRoundOfExtLoad = false;
16043 LoadSDNode *FirstLoad = nullptr;
16044
16045 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16046 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16047 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16048 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16049 }
16050 // Not a build vector of (possibly fp_rounded) loads.
16051 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16052 N->getNumOperands() == 1)
16053 return SDValue();
16054
16055 if (!IsRoundOfExtLoad)
16056 FirstLoad = cast<LoadSDNode>(FirstInput);
16057
16059 InputLoads.push_back(FirstLoad);
16060 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16061 // If any inputs are fp_round(extload), they all must be.
16062 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16063 return SDValue();
16064
16065 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16066 N->getOperand(i);
16067 if (NextInput.getOpcode() != ISD::LOAD)
16068 return SDValue();
16069
16070 SDValue PreviousInput =
16071 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16072 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16073 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16074
16075 // If any inputs are fp_round(extload), they all must be.
16076 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16077 return SDValue();
16078
16079 // We only care about regular loads. The PPC-specific load intrinsics
16080 // will not lead to a merge opportunity.
16081 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16082 InputsAreConsecutiveLoads = false;
16083 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16084 InputsAreReverseConsecutive = false;
16085
16086 // Exit early if the loads are neither consecutive nor reverse consecutive.
16087 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16088 return SDValue();
16089 InputLoads.push_back(LD2);
16090 }
16091
16092 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16093 "The loads cannot be both consecutive and reverse consecutive.");
16094
16095 SDValue WideLoad;
16096 SDValue ReturnSDVal;
16097 if (InputsAreConsecutiveLoads) {
16098 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16099 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16100 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16101 FirstLoad->getAlign());
16102 ReturnSDVal = WideLoad;
16103 } else if (InputsAreReverseConsecutive) {
16104 LoadSDNode *LastLoad = InputLoads.back();
16105 assert(LastLoad && "Input needs to be a LoadSDNode.");
16106 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16107 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16108 LastLoad->getAlign());
16110 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16111 Ops.push_back(i);
16112
16113 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16114 DAG.getUNDEF(N->getValueType(0)), Ops);
16115 } else
16116 return SDValue();
16117
16118 for (auto *LD : InputLoads)
16119 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16120 return ReturnSDVal;
16121}
16122
16123// This function adds the required vector_shuffle needed to get
16124// the elements of the vector extract in the correct position
16125// as specified by the CorrectElems encoding.
16127 SDValue Input, uint64_t Elems,
16128 uint64_t CorrectElems) {
16129 SDLoc dl(N);
16130
16131 unsigned NumElems = Input.getValueType().getVectorNumElements();
16132 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16133
16134 // Knowing the element indices being extracted from the original
16135 // vector and the order in which they're being inserted, just put
16136 // them at element indices required for the instruction.
16137 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16138 if (DAG.getDataLayout().isLittleEndian())
16139 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16140 else
16141 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16142 CorrectElems = CorrectElems >> 8;
16143 Elems = Elems >> 8;
16144 }
16145
16146 SDValue Shuffle =
16147 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16148 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16149
16150 EVT VT = N->getValueType(0);
16151 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16152
16153 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16154 Input.getValueType().getVectorElementType(),
16156 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16157 DAG.getValueType(ExtVT));
16158}
16159
16160// Look for build vector patterns where input operands come from sign
16161// extended vector_extract elements of specific indices. If the correct indices
16162// aren't used, add a vector shuffle to fix up the indices and create
16163// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16164// during instruction selection.
16166 // This array encodes the indices that the vector sign extend instructions
16167 // extract from when extending from one type to another for both BE and LE.
16168 // The right nibble of each byte corresponds to the LE incides.
16169 // and the left nibble of each byte corresponds to the BE incides.
16170 // For example: 0x3074B8FC byte->word
16171 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16172 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16173 // For example: 0x000070F8 byte->double word
16174 // For LE: the allowed indices are: 0x0,0x8
16175 // For BE: the allowed indices are: 0x7,0xF
16176 uint64_t TargetElems[] = {
16177 0x3074B8FC, // b->w
16178 0x000070F8, // b->d
16179 0x10325476, // h->w
16180 0x00003074, // h->d
16181 0x00001032, // w->d
16182 };
16183
16184 uint64_t Elems = 0;
16185 int Index;
16186 SDValue Input;
16187
16188 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16189 if (!Op)
16190 return false;
16191 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16192 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16193 return false;
16194
16195 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16196 // of the right width.
16197 SDValue Extract = Op.getOperand(0);
16198 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16199 Extract = Extract.getOperand(0);
16200 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16201 return false;
16202
16204 if (!ExtOp)
16205 return false;
16206
16207 Index = ExtOp->getZExtValue();
16208 if (Input && Input != Extract.getOperand(0))
16209 return false;
16210
16211 if (!Input)
16212 Input = Extract.getOperand(0);
16213
16214 Elems = Elems << 8;
16215 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16216 Elems |= Index;
16217
16218 return true;
16219 };
16220
16221 // If the build vector operands aren't sign extended vector extracts,
16222 // of the same input vector, then return.
16223 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16224 if (!isSExtOfVecExtract(N->getOperand(i))) {
16225 return SDValue();
16226 }
16227 }
16228
16229 // If the vector extract indices are not correct, add the appropriate
16230 // vector_shuffle.
16231 int TgtElemArrayIdx;
16232 int InputSize = Input.getValueType().getScalarSizeInBits();
16233 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16234 if (InputSize + OutputSize == 40)
16235 TgtElemArrayIdx = 0;
16236 else if (InputSize + OutputSize == 72)
16237 TgtElemArrayIdx = 1;
16238 else if (InputSize + OutputSize == 48)
16239 TgtElemArrayIdx = 2;
16240 else if (InputSize + OutputSize == 80)
16241 TgtElemArrayIdx = 3;
16242 else if (InputSize + OutputSize == 96)
16243 TgtElemArrayIdx = 4;
16244 else
16245 return SDValue();
16246
16247 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16248 CorrectElems = DAG.getDataLayout().isLittleEndian()
16249 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16250 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16251 if (Elems != CorrectElems) {
16252 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16253 }
16254
16255 // Regular lowering will catch cases where a shuffle is not needed.
16256 return SDValue();
16257}
16258
16259// Look for the pattern of a load from a narrow width to i128, feeding
16260// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16261// (LXVRZX). This node represents a zero extending load that will be matched
16262// to the Load VSX Vector Rightmost instructions.
16264 SDLoc DL(N);
16265
16266 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16267 if (N->getValueType(0) != MVT::v1i128)
16268 return SDValue();
16269
16270 SDValue Operand = N->getOperand(0);
16271 // Proceed with the transformation if the operand to the BUILD_VECTOR
16272 // is a load instruction.
16273 if (Operand.getOpcode() != ISD::LOAD)
16274 return SDValue();
16275
16276 auto *LD = cast<LoadSDNode>(Operand);
16277 EVT MemoryType = LD->getMemoryVT();
16278
16279 // This transformation is only valid if the we are loading either a byte,
16280 // halfword, word, or doubleword.
16281 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16282 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16283
16284 // Ensure that the load from the narrow width is being zero extended to i128.
16285 if (!ValidLDType ||
16286 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16287 LD->getExtensionType() != ISD::EXTLOAD))
16288 return SDValue();
16289
16290 SDValue LoadOps[] = {
16291 LD->getChain(), LD->getBasePtr(),
16292 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16293
16294 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16295 DAG.getVTList(MVT::v1i128, MVT::Other),
16296 LoadOps, MemoryType, LD->getMemOperand());
16297}
16298
16299SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16300 DAGCombinerInfo &DCI) const {
16301 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16302 "Should be called with a BUILD_VECTOR node");
16303
16304 SelectionDAG &DAG = DCI.DAG;
16305 SDLoc dl(N);
16306
16307 if (!Subtarget.hasVSX())
16308 return SDValue();
16309
16310 // The target independent DAG combiner will leave a build_vector of
16311 // float-to-int conversions intact. We can generate MUCH better code for
16312 // a float-to-int conversion of a vector of floats.
16313 SDValue FirstInput = N->getOperand(0);
16314 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16315 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16316 if (Reduced)
16317 return Reduced;
16318 }
16319
16320 // If we're building a vector out of consecutive loads, just load that
16321 // vector type.
16322 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16323 if (Reduced)
16324 return Reduced;
16325
16326 // If we're building a vector out of extended elements from another vector
16327 // we have P9 vector integer extend instructions. The code assumes legal
16328 // input types (i.e. it can't handle things like v4i16) so do not run before
16329 // legalization.
16330 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16331 Reduced = combineBVOfVecSExt(N, DAG);
16332 if (Reduced)
16333 return Reduced;
16334 }
16335
16336 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16337 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16338 // is a load from <valid narrow width> to i128.
16339 if (Subtarget.isISA3_1()) {
16340 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16341 if (BVOfZLoad)
16342 return BVOfZLoad;
16343 }
16344
16345 if (N->getValueType(0) != MVT::v2f64)
16346 return SDValue();
16347
16348 // Looking for:
16349 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16350 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16351 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16352 return SDValue();
16353 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16354 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16355 return SDValue();
16356 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16357 return SDValue();
16358
16359 SDValue Ext1 = FirstInput.getOperand(0);
16360 SDValue Ext2 = N->getOperand(1).getOperand(0);
16361 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16363 return SDValue();
16364
16365 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16366 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16367 if (!Ext1Op || !Ext2Op)
16368 return SDValue();
16369 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16370 Ext1.getOperand(0) != Ext2.getOperand(0))
16371 return SDValue();
16372
16373 int FirstElem = Ext1Op->getZExtValue();
16374 int SecondElem = Ext2Op->getZExtValue();
16375 int SubvecIdx;
16376 if (FirstElem == 0 && SecondElem == 1)
16377 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16378 else if (FirstElem == 2 && SecondElem == 3)
16379 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16380 else
16381 return SDValue();
16382
16383 SDValue SrcVec = Ext1.getOperand(0);
16384 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16385 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16386 return DAG.getNode(NodeType, dl, MVT::v2f64,
16387 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16388}
16389
16390SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16391 DAGCombinerInfo &DCI) const {
16392 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16393 N->getOpcode() == ISD::UINT_TO_FP) &&
16394 "Need an int -> FP conversion node here");
16395
16396 if (useSoftFloat() || !Subtarget.has64BitSupport())
16397 return SDValue();
16398
16399 SelectionDAG &DAG = DCI.DAG;
16400 SDLoc dl(N);
16401 SDValue Op(N, 0);
16402
16403 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16404 // from the hardware.
16405 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16406 return SDValue();
16407 if (!Op.getOperand(0).getValueType().isSimple())
16408 return SDValue();
16409 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16410 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16411 return SDValue();
16412
16413 SDValue FirstOperand(Op.getOperand(0));
16414 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16415 (FirstOperand.getValueType() == MVT::i8 ||
16416 FirstOperand.getValueType() == MVT::i16);
16417 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16418 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16419 bool DstDouble = Op.getValueType() == MVT::f64;
16420 unsigned ConvOp = Signed ?
16421 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16422 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16423 SDValue WidthConst =
16424 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16425 dl, false);
16426 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16427 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16428 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16429 DAG.getVTList(MVT::f64, MVT::Other),
16430 Ops, MVT::i8, LDN->getMemOperand());
16431 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16432
16433 // For signed conversion, we need to sign-extend the value in the VSR
16434 if (Signed) {
16435 SDValue ExtOps[] = { Ld, WidthConst };
16436 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16437 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16438 } else
16439 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16440 }
16441
16442
16443 // For i32 intermediate values, unfortunately, the conversion functions
16444 // leave the upper 32 bits of the value are undefined. Within the set of
16445 // scalar instructions, we have no method for zero- or sign-extending the
16446 // value. Thus, we cannot handle i32 intermediate values here.
16447 if (Op.getOperand(0).getValueType() == MVT::i32)
16448 return SDValue();
16449
16450 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16451 "UINT_TO_FP is supported only with FPCVT");
16452
16453 // If we have FCFIDS, then use it when converting to single-precision.
16454 // Otherwise, convert to double-precision and then round.
16455 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16456 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16457 : PPCISD::FCFIDS)
16458 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16459 : PPCISD::FCFID);
16460 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16461 ? MVT::f32
16462 : MVT::f64;
16463
16464 // If we're converting from a float, to an int, and back to a float again,
16465 // then we don't need the store/load pair at all.
16466 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16467 Subtarget.hasFPCVT()) ||
16468 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16469 SDValue Src = Op.getOperand(0).getOperand(0);
16470 if (Src.getValueType() == MVT::f32) {
16471 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16472 DCI.AddToWorklist(Src.getNode());
16473 } else if (Src.getValueType() != MVT::f64) {
16474 // Make sure that we don't pick up a ppc_fp128 source value.
16475 return SDValue();
16476 }
16477
16478 unsigned FCTOp =
16479 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16480 PPCISD::FCTIDUZ;
16481
16482 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16483 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16484
16485 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16486 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16487 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16488 DCI.AddToWorklist(FP.getNode());
16489 }
16490
16491 return FP;
16492 }
16493
16494 return SDValue();
16495}
16496
16497// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16498// builtins) into loads with swaps.
16500 DAGCombinerInfo &DCI) const {
16501 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16502 // load combines.
16503 if (DCI.isBeforeLegalizeOps())
16504 return SDValue();
16505
16506 SelectionDAG &DAG = DCI.DAG;
16507 SDLoc dl(N);
16508 SDValue Chain;
16509 SDValue Base;
16510 MachineMemOperand *MMO;
16511
16512 switch (N->getOpcode()) {
16513 default:
16514 llvm_unreachable("Unexpected opcode for little endian VSX load");
16515 case ISD::LOAD: {
16517 Chain = LD->getChain();
16518 Base = LD->getBasePtr();
16519 MMO = LD->getMemOperand();
16520 // If the MMO suggests this isn't a load of a full vector, leave
16521 // things alone. For a built-in, we have to make the change for
16522 // correctness, so if there is a size problem that will be a bug.
16523 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16524 return SDValue();
16525 break;
16526 }
16529 Chain = Intrin->getChain();
16530 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16531 // us what we want. Get operand 2 instead.
16532 Base = Intrin->getOperand(2);
16533 MMO = Intrin->getMemOperand();
16534 break;
16535 }
16536 }
16537
16538 MVT VecTy = N->getValueType(0).getSimpleVT();
16539
16540 SDValue LoadOps[] = { Chain, Base };
16541 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16542 DAG.getVTList(MVT::v2f64, MVT::Other),
16543 LoadOps, MVT::v2f64, MMO);
16544
16545 DCI.AddToWorklist(Load.getNode());
16546 Chain = Load.getValue(1);
16547 SDValue Swap = DAG.getNode(
16548 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16549 DCI.AddToWorklist(Swap.getNode());
16550
16551 // Add a bitcast if the resulting load type doesn't match v2f64.
16552 if (VecTy != MVT::v2f64) {
16553 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16554 DCI.AddToWorklist(N.getNode());
16555 // Package {bitcast value, swap's chain} to match Load's shape.
16556 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16557 N, Swap.getValue(1));
16558 }
16559
16560 return Swap;
16561}
16562
16563// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16564// builtins) into stores with swaps.
16566 DAGCombinerInfo &DCI) const {
16567 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16568 // store combines.
16569 if (DCI.isBeforeLegalizeOps())
16570 return SDValue();
16571
16572 SelectionDAG &DAG = DCI.DAG;
16573 SDLoc dl(N);
16574 SDValue Chain;
16575 SDValue Base;
16576 unsigned SrcOpnd;
16577 MachineMemOperand *MMO;
16578
16579 switch (N->getOpcode()) {
16580 default:
16581 llvm_unreachable("Unexpected opcode for little endian VSX store");
16582 case ISD::STORE: {
16584 Chain = ST->getChain();
16585 Base = ST->getBasePtr();
16586 MMO = ST->getMemOperand();
16587 SrcOpnd = 1;
16588 // If the MMO suggests this isn't a store of a full vector, leave
16589 // things alone. For a built-in, we have to make the change for
16590 // correctness, so if there is a size problem that will be a bug.
16591 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16592 return SDValue();
16593 break;
16594 }
16595 case ISD::INTRINSIC_VOID: {
16597 Chain = Intrin->getChain();
16598 // Intrin->getBasePtr() oddly does not get what we want.
16599 Base = Intrin->getOperand(3);
16600 MMO = Intrin->getMemOperand();
16601 SrcOpnd = 2;
16602 break;
16603 }
16604 }
16605
16606 SDValue Src = N->getOperand(SrcOpnd);
16607 MVT VecTy = Src.getValueType().getSimpleVT();
16608
16609 // All stores are done as v2f64 and possible bit cast.
16610 if (VecTy != MVT::v2f64) {
16611 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16612 DCI.AddToWorklist(Src.getNode());
16613 }
16614
16615 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16616 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16617 DCI.AddToWorklist(Swap.getNode());
16618 Chain = Swap.getValue(1);
16619 SDValue StoreOps[] = { Chain, Swap, Base };
16620 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16621 DAG.getVTList(MVT::Other),
16622 StoreOps, VecTy, MMO);
16623 DCI.AddToWorklist(Store.getNode());
16624 return Store;
16625}
16626
16627// Handle DAG combine for STORE (FP_TO_INT F).
16628SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16629 DAGCombinerInfo &DCI) const {
16630 SelectionDAG &DAG = DCI.DAG;
16631 SDLoc dl(N);
16632 unsigned Opcode = N->getOperand(1).getOpcode();
16633 (void)Opcode;
16634 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16635
16636 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16637 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16638 && "Not a FP_TO_INT Instruction!");
16639
16640 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16641 EVT Op1VT = N->getOperand(1).getValueType();
16642 EVT ResVT = Val.getValueType();
16643
16644 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16645 return SDValue();
16646
16647 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16648 bool ValidTypeForStoreFltAsInt =
16649 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16650 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16651
16652 // TODO: Lower conversion from f128 on all VSX targets
16653 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16654 return SDValue();
16655
16656 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16657 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16658 return SDValue();
16659
16660 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16661
16662 // Set number of bytes being converted.
16663 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16664 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16665 DAG.getIntPtrConstant(ByteSize, dl, false),
16666 DAG.getValueType(Op1VT)};
16667
16668 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
16669 DAG.getVTList(MVT::Other), Ops,
16670 cast<StoreSDNode>(N)->getMemoryVT(),
16671 cast<StoreSDNode>(N)->getMemOperand());
16672
16673 return Val;
16674}
16675
16676static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16677 // Check that the source of the element keeps flipping
16678 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16679 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16680 for (int i = 1, e = Mask.size(); i < e; i++) {
16681 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16682 return false;
16683 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16684 return false;
16685 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16686 }
16687 return true;
16688}
16689
16690static bool isSplatBV(SDValue Op) {
16691 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16692 return false;
16693 SDValue FirstOp;
16694
16695 // Find first non-undef input.
16696 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16697 FirstOp = Op.getOperand(i);
16698 if (!FirstOp.isUndef())
16699 break;
16700 }
16701
16702 // All inputs are undef or the same as the first non-undef input.
16703 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16704 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16705 return false;
16706 return true;
16707}
16708
16710 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16711 return Op;
16712 if (Op.getOpcode() != ISD::BITCAST)
16713 return SDValue();
16714 Op = Op.getOperand(0);
16715 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16716 return Op;
16717 return SDValue();
16718}
16719
16720// Fix up the shuffle mask to account for the fact that the result of
16721// scalar_to_vector is not in lane zero. This just takes all values in
16722// the ranges specified by the min/max indices and adds the number of
16723// elements required to ensure each element comes from the respective
16724// position in the valid lane.
16725// On little endian, that's just the corresponding element in the other
16726// half of the vector. On big endian, it is in the same half but right
16727// justified rather than left justified in that half.
16729 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16730 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16731 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16732 int LHSEltFixup =
16733 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16734 int RHSEltFixup =
16735 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16736 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16737 int Idx = ShuffV[I];
16738 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16739 ShuffV[I] += LHSEltFixup;
16740 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16741 ShuffV[I] += RHSEltFixup;
16742 }
16743}
16744
16745// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16746// the original is:
16747// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16748// In such a case, just change the shuffle mask to extract the element
16749// from the permuted index.
16751 const PPCSubtarget &Subtarget) {
16752 SDLoc dl(OrigSToV);
16753 EVT VT = OrigSToV.getValueType();
16754 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16755 "Expecting a SCALAR_TO_VECTOR here");
16756 SDValue Input = OrigSToV.getOperand(0);
16757
16758 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16759 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16760 SDValue OrigVector = Input.getOperand(0);
16761
16762 // Can't handle non-const element indices or different vector types
16763 // for the input to the extract and the output of the scalar_to_vector.
16764 if (Idx && VT == OrigVector.getValueType()) {
16765 unsigned NumElts = VT.getVectorNumElements();
16766 assert(
16767 NumElts > 1 &&
16768 "Cannot produce a permuted scalar_to_vector for one element vector");
16769 SmallVector<int, 16> NewMask(NumElts, -1);
16770 unsigned ResultInElt = NumElts / 2;
16771 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16772 NewMask[ResultInElt] = Idx->getZExtValue();
16773 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16774 }
16775 }
16776 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16777 OrigSToV.getOperand(0));
16778}
16779
16781 int HalfVec, int LHSLastElementDefined,
16782 int RHSLastElementDefined) {
16783 for (int Index : ShuffV) {
16784 if (Index < 0) // Skip explicitly undefined mask indices.
16785 continue;
16786 // Handle first input vector of the vector_shuffle.
16787 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16788 (Index > LHSLastElementDefined))
16789 return false;
16790 // Handle second input vector of the vector_shuffle.
16791 if ((RHSLastElementDefined >= 0) &&
16792 (Index > HalfVec + RHSLastElementDefined))
16793 return false;
16794 }
16795 return true;
16796}
16797
16799 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16800 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16801 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16802 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16803 // Set up the values for the shuffle vector fixup.
16804 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16805 // The last element depends on if the input comes from the LHS or RHS.
16806 //
16807 // For example:
16808 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16809 //
16810 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16811 // because elements 1 and higher of a scalar_to_vector are undefined.
16812 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16813 // because elements 1 and higher of a scalar_to_vector are undefined.
16814 // It is also not 4 because the original scalar_to_vector is wider and
16815 // actually contains two i32 elements.
16816 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16817 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16818 : FirstElt;
16819 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16820 if (SToVPermuted.getValueType() != VecShuffOperandType)
16821 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16822 return SToVPermuted;
16823}
16824
16825// On little endian subtargets, combine shuffles such as:
16826// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16827// into:
16828// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16829// because the latter can be matched to a single instruction merge.
16830// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16831// to put the value into element zero. Adjust the shuffle mask so that the
16832// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16833// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16834// nodes with elements smaller than doubleword because all the ways
16835// of getting scalar data into a vector register put the value in the
16836// rightmost element of the left half of the vector.
16837SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16838 SelectionDAG &DAG) const {
16839 SDValue LHS = SVN->getOperand(0);
16840 SDValue RHS = SVN->getOperand(1);
16841 auto Mask = SVN->getMask();
16842 int NumElts = LHS.getValueType().getVectorNumElements();
16843 SDValue Res(SVN, 0);
16844 SDLoc dl(SVN);
16845 bool IsLittleEndian = Subtarget.isLittleEndian();
16846
16847 // On big endian targets this is only useful for subtargets with direct moves.
16848 // On little endian targets it would be useful for all subtargets with VSX.
16849 // However adding special handling for LE subtargets without direct moves
16850 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16851 // which includes direct moves.
16852 if (!Subtarget.hasDirectMove())
16853 return Res;
16854
16855 // If this is not a shuffle of a shuffle and the first element comes from
16856 // the second vector, canonicalize to the commuted form. This will make it
16857 // more likely to match one of the single instruction patterns.
16858 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16859 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16860 std::swap(LHS, RHS);
16861 Res = DAG.getCommutedVectorShuffle(*SVN);
16862 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16863 }
16864
16865 // Adjust the shuffle mask if either input vector comes from a
16866 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16867 // form (to prevent the need for a swap).
16868 SmallVector<int, 16> ShuffV(Mask);
16869 SDValue SToVLHS = isScalarToVec(LHS);
16870 SDValue SToVRHS = isScalarToVec(RHS);
16871 if (SToVLHS || SToVRHS) {
16872 EVT VT = SVN->getValueType(0);
16873 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16874 int ShuffleNumElts = ShuffV.size();
16875 int HalfVec = ShuffleNumElts / 2;
16876 // The width of the "valid lane" (i.e. the lane that contains the value that
16877 // is vectorized) needs to be expressed in terms of the number of elements
16878 // of the shuffle. It is thereby the ratio of the values before and after
16879 // any bitcast, which will be set later on if the LHS or RHS are
16880 // SCALAR_TO_VECTOR nodes.
16881 unsigned LHSNumValidElts = HalfVec;
16882 unsigned RHSNumValidElts = HalfVec;
16883
16884 // Initially assume that neither input is permuted. These will be adjusted
16885 // accordingly if either input is. Note, that -1 means that all elements
16886 // are undefined.
16887 int LHSFirstElt = 0;
16888 int RHSFirstElt = ShuffleNumElts;
16889 int LHSLastElt = -1;
16890 int RHSLastElt = -1;
16891
16892 // Get the permuted scalar to vector nodes for the source(s) that come from
16893 // ISD::SCALAR_TO_VECTOR.
16894 // On big endian systems, this only makes sense for element sizes smaller
16895 // than 64 bits since for 64-bit elements, all instructions already put
16896 // the value into element zero. Since scalar size of LHS and RHS may differ
16897 // after isScalarToVec, this should be checked using their own sizes.
16898 int LHSScalarSize = 0;
16899 int RHSScalarSize = 0;
16900 if (SToVLHS) {
16901 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16902 if (!IsLittleEndian && LHSScalarSize >= 64)
16903 return Res;
16904 }
16905 if (SToVRHS) {
16906 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16907 if (!IsLittleEndian && RHSScalarSize >= 64)
16908 return Res;
16909 }
16910 if (LHSScalarSize != 0)
16912 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
16913 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
16914 if (RHSScalarSize != 0)
16916 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
16917 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
16918
16919 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
16920 return Res;
16921
16922 // Fix up the shuffle mask to reflect where the desired element actually is.
16923 // The minimum and maximum indices that correspond to element zero for both
16924 // the LHS and RHS are computed and will control which shuffle mask entries
16925 // are to be changed. For example, if the RHS is permuted, any shuffle mask
16926 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16928 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16929 LHSNumValidElts, RHSNumValidElts, Subtarget);
16930 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16931
16932 // We may have simplified away the shuffle. We won't be able to do anything
16933 // further with it here.
16934 if (!isa<ShuffleVectorSDNode>(Res))
16935 return Res;
16936 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16937 }
16938
16939 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16940 // The common case after we commuted the shuffle is that the RHS is a splat
16941 // and we have elements coming in from the splat at indices that are not
16942 // conducive to using a merge.
16943 // Example:
16944 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16945 if (!isSplatBV(TheSplat))
16946 return Res;
16947
16948 // We are looking for a mask such that all even elements are from
16949 // one vector and all odd elements from the other.
16950 if (!isAlternatingShuffMask(Mask, NumElts))
16951 return Res;
16952
16953 // Adjust the mask so we are pulling in the same index from the splat
16954 // as the index from the interesting vector in consecutive elements.
16955 if (IsLittleEndian) {
16956 // Example (even elements from first vector):
16957 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16958 if (Mask[0] < NumElts)
16959 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16960 if (ShuffV[i] < 0)
16961 continue;
16962 // If element from non-splat is undef, pick first element from splat.
16963 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16964 }
16965 // Example (odd elements from first vector):
16966 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16967 else
16968 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16969 if (ShuffV[i] < 0)
16970 continue;
16971 // If element from non-splat is undef, pick first element from splat.
16972 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16973 }
16974 } else {
16975 // Example (even elements from first vector):
16976 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16977 if (Mask[0] < NumElts)
16978 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16979 if (ShuffV[i] < 0)
16980 continue;
16981 // If element from non-splat is undef, pick first element from splat.
16982 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16983 }
16984 // Example (odd elements from first vector):
16985 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16986 else
16987 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16988 if (ShuffV[i] < 0)
16989 continue;
16990 // If element from non-splat is undef, pick first element from splat.
16991 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16992 }
16993 }
16994
16995 // If the RHS has undefs, we need to remove them since we may have created
16996 // a shuffle that adds those instead of the splat value.
16997 SDValue SplatVal =
16998 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16999 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17000
17001 if (IsLittleEndian)
17002 RHS = TheSplat;
17003 else
17004 LHS = TheSplat;
17005 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17006}
17007
17008SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17009 LSBaseSDNode *LSBase,
17010 DAGCombinerInfo &DCI) const {
17011 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17012 "Not a reverse memop pattern!");
17013
17014 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17015 auto Mask = SVN->getMask();
17016 int i = 0;
17017 auto I = Mask.rbegin();
17018 auto E = Mask.rend();
17019
17020 for (; I != E; ++I) {
17021 if (*I != i)
17022 return false;
17023 i++;
17024 }
17025 return true;
17026 };
17027
17028 SelectionDAG &DAG = DCI.DAG;
17029 EVT VT = SVN->getValueType(0);
17030
17031 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17032 return SDValue();
17033
17034 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17035 // See comment in PPCVSXSwapRemoval.cpp.
17036 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17037 if (!Subtarget.hasP9Vector())
17038 return SDValue();
17039
17040 if(!IsElementReverse(SVN))
17041 return SDValue();
17042
17043 if (LSBase->getOpcode() == ISD::LOAD) {
17044 // If the load return value 0 has more than one user except the
17045 // shufflevector instruction, it is not profitable to replace the
17046 // shufflevector with a reverse load.
17047 for (SDUse &Use : LSBase->uses())
17048 if (Use.getResNo() == 0 &&
17049 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17050 return SDValue();
17051
17052 SDLoc dl(LSBase);
17053 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17054 return DAG.getMemIntrinsicNode(
17055 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17056 LSBase->getMemoryVT(), LSBase->getMemOperand());
17057 }
17058
17059 if (LSBase->getOpcode() == ISD::STORE) {
17060 // If there are other uses of the shuffle, the swap cannot be avoided.
17061 // Forcing the use of an X-Form (since swapped stores only have
17062 // X-Forms) without removing the swap is unprofitable.
17063 if (!SVN->hasOneUse())
17064 return SDValue();
17065
17066 SDLoc dl(LSBase);
17067 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17068 LSBase->getBasePtr()};
17069 return DAG.getMemIntrinsicNode(
17070 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17071 LSBase->getMemoryVT(), LSBase->getMemOperand());
17072 }
17073
17074 llvm_unreachable("Expected a load or store node here");
17075}
17076
17077static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17078 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17079 if (IntrinsicID == Intrinsic::ppc_stdcx)
17080 StoreWidth = 8;
17081 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17082 StoreWidth = 4;
17083 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17084 StoreWidth = 2;
17085 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17086 StoreWidth = 1;
17087 else
17088 return false;
17089 return true;
17090}
17091
17094 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17095 // (ADDC (ADDE 0, 0, C), -1) -> C
17096 SDValue LHS = N->getOperand(0);
17097 SDValue RHS = N->getOperand(1);
17098 if (LHS->getOpcode() == PPCISD::ADDE &&
17099 isNullConstant(LHS->getOperand(0)) &&
17100 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17101 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17102 }
17103 }
17104 return SDValue();
17105}
17106
17108 DAGCombinerInfo &DCI) const {
17109 SelectionDAG &DAG = DCI.DAG;
17110 SDLoc dl(N);
17111 switch (N->getOpcode()) {
17112 default: break;
17113 case ISD::ADD:
17114 return combineADD(N, DCI);
17115 case ISD::AND: {
17116 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17117 // original input as that will prevent us from selecting optimal rotates.
17118 // This only matters if the input to the extend is i32 widened to i64.
17119 SDValue Op1 = N->getOperand(0);
17120 SDValue Op2 = N->getOperand(1);
17121 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17122 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17123 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17124 Op1.getOperand(0).getValueType() != MVT::i32)
17125 break;
17126 SDValue NarrowOp = Op1.getOperand(0);
17127 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17128 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17129 break;
17130
17131 uint64_t Imm = Op2->getAsZExtVal();
17132 // Make sure that the constant is narrow enough to fit in the narrow type.
17133 if (!isUInt<32>(Imm))
17134 break;
17135 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17136 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17137 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17138 }
17139 case ISD::SHL:
17140 return combineSHL(N, DCI);
17141 case ISD::SRA:
17142 return combineSRA(N, DCI);
17143 case ISD::SRL:
17144 return combineSRL(N, DCI);
17145 case ISD::MUL:
17146 return combineMUL(N, DCI);
17147 case ISD::FMA:
17148 case PPCISD::FNMSUB:
17149 return combineFMALike(N, DCI);
17150 case PPCISD::SHL:
17151 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17152 return N->getOperand(0);
17153 break;
17154 case PPCISD::SRL:
17155 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17156 return N->getOperand(0);
17157 break;
17158 case PPCISD::SRA:
17159 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17160 if (C->isZero() || // 0 >>s V -> 0.
17161 C->isAllOnes()) // -1 >>s V -> -1.
17162 return N->getOperand(0);
17163 }
17164 break;
17165 case ISD::SIGN_EXTEND:
17166 case ISD::ZERO_EXTEND:
17167 case ISD::ANY_EXTEND:
17168 return DAGCombineExtBoolTrunc(N, DCI);
17169 case ISD::TRUNCATE:
17170 return combineTRUNCATE(N, DCI);
17171 case ISD::SETCC:
17172 if (SDValue CSCC = combineSetCC(N, DCI))
17173 return CSCC;
17174 [[fallthrough]];
17175 case ISD::SELECT_CC:
17176 return DAGCombineTruncBoolExt(N, DCI);
17177 case ISD::SINT_TO_FP:
17178 case ISD::UINT_TO_FP:
17179 return combineFPToIntToFP(N, DCI);
17181 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17182 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17183 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17184 }
17185 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17186 case ISD::STORE: {
17187
17188 EVT Op1VT = N->getOperand(1).getValueType();
17189 unsigned Opcode = N->getOperand(1).getOpcode();
17190
17191 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17192 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17193 SDValue Val = combineStoreFPToInt(N, DCI);
17194 if (Val)
17195 return Val;
17196 }
17197
17198 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17199 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17200 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17201 if (Val)
17202 return Val;
17203 }
17204
17205 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17206 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17207 N->getOperand(1).getNode()->hasOneUse() &&
17208 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17209 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17210
17211 // STBRX can only handle simple types and it makes no sense to store less
17212 // two bytes in byte-reversed order.
17213 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17214 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17215 break;
17216
17217 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17218 // Do an any-extend to 32-bits if this is a half-word input.
17219 if (BSwapOp.getValueType() == MVT::i16)
17220 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17221
17222 // If the type of BSWAP operand is wider than stored memory width
17223 // it need to be shifted to the right side before STBRX.
17224 if (Op1VT.bitsGT(mVT)) {
17225 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17226 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17227 DAG.getConstant(Shift, dl, MVT::i32));
17228 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17229 if (Op1VT == MVT::i64)
17230 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17231 }
17232
17233 SDValue Ops[] = {
17234 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17235 };
17236 return
17237 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17238 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17239 cast<StoreSDNode>(N)->getMemOperand());
17240 }
17241
17242 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17243 // So it can increase the chance of CSE constant construction.
17244 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17245 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17246 // Need to sign-extended to 64-bits to handle negative values.
17247 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17248 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17249 MemVT.getSizeInBits());
17250 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17251
17252 auto *ST = cast<StoreSDNode>(N);
17253 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17254 ST->getBasePtr(), ST->getOffset(), MemVT,
17255 ST->getMemOperand(), ST->getAddressingMode(),
17256 /*IsTruncating=*/true);
17257 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17258 // new store which will change the constant by removing non-demanded bits.
17259 return ST->isUnindexed()
17260 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17261 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17262 }
17263
17264 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17265 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17266 if (Op1VT.isSimple()) {
17267 MVT StoreVT = Op1VT.getSimpleVT();
17268 if (Subtarget.needsSwapsForVSXMemOps() &&
17269 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17270 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17271 return expandVSXStoreForLE(N, DCI);
17272 }
17273 break;
17274 }
17275 case ISD::LOAD: {
17277 EVT VT = LD->getValueType(0);
17278
17279 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17280 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17281 if (VT.isSimple()) {
17282 MVT LoadVT = VT.getSimpleVT();
17283 if (Subtarget.needsSwapsForVSXMemOps() &&
17284 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17285 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17286 return expandVSXLoadForLE(N, DCI);
17287 }
17288
17289 // We sometimes end up with a 64-bit integer load, from which we extract
17290 // two single-precision floating-point numbers. This happens with
17291 // std::complex<float>, and other similar structures, because of the way we
17292 // canonicalize structure copies. However, if we lack direct moves,
17293 // then the final bitcasts from the extracted integer values to the
17294 // floating-point numbers turn into store/load pairs. Even with direct moves,
17295 // just loading the two floating-point numbers is likely better.
17296 auto ReplaceTwoFloatLoad = [&]() {
17297 if (VT != MVT::i64)
17298 return false;
17299
17300 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17301 LD->isVolatile())
17302 return false;
17303
17304 // We're looking for a sequence like this:
17305 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17306 // t16: i64 = srl t13, Constant:i32<32>
17307 // t17: i32 = truncate t16
17308 // t18: f32 = bitcast t17
17309 // t19: i32 = truncate t13
17310 // t20: f32 = bitcast t19
17311
17312 if (!LD->hasNUsesOfValue(2, 0))
17313 return false;
17314
17315 auto UI = LD->user_begin();
17316 while (UI.getUse().getResNo() != 0) ++UI;
17317 SDNode *Trunc = *UI++;
17318 while (UI.getUse().getResNo() != 0) ++UI;
17319 SDNode *RightShift = *UI;
17320 if (Trunc->getOpcode() != ISD::TRUNCATE)
17321 std::swap(Trunc, RightShift);
17322
17323 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17324 Trunc->getValueType(0) != MVT::i32 ||
17325 !Trunc->hasOneUse())
17326 return false;
17327 if (RightShift->getOpcode() != ISD::SRL ||
17328 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17329 RightShift->getConstantOperandVal(1) != 32 ||
17330 !RightShift->hasOneUse())
17331 return false;
17332
17333 SDNode *Trunc2 = *RightShift->user_begin();
17334 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17335 Trunc2->getValueType(0) != MVT::i32 ||
17336 !Trunc2->hasOneUse())
17337 return false;
17338
17339 SDNode *Bitcast = *Trunc->user_begin();
17340 SDNode *Bitcast2 = *Trunc2->user_begin();
17341
17342 if (Bitcast->getOpcode() != ISD::BITCAST ||
17343 Bitcast->getValueType(0) != MVT::f32)
17344 return false;
17345 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17346 Bitcast2->getValueType(0) != MVT::f32)
17347 return false;
17348
17349 if (Subtarget.isLittleEndian())
17350 std::swap(Bitcast, Bitcast2);
17351
17352 // Bitcast has the second float (in memory-layout order) and Bitcast2
17353 // has the first one.
17354
17355 SDValue BasePtr = LD->getBasePtr();
17356 if (LD->isIndexed()) {
17357 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17358 "Non-pre-inc AM on PPC?");
17359 BasePtr =
17360 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17361 LD->getOffset());
17362 }
17363
17364 auto MMOFlags =
17365 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17366 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17367 LD->getPointerInfo(), LD->getAlign(),
17368 MMOFlags, LD->getAAInfo());
17369 SDValue AddPtr =
17370 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17371 BasePtr, DAG.getIntPtrConstant(4, dl));
17372 SDValue FloatLoad2 = DAG.getLoad(
17373 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17374 LD->getPointerInfo().getWithOffset(4),
17375 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17376
17377 if (LD->isIndexed()) {
17378 // Note that DAGCombine should re-form any pre-increment load(s) from
17379 // what is produced here if that makes sense.
17380 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17381 }
17382
17383 DCI.CombineTo(Bitcast2, FloatLoad);
17384 DCI.CombineTo(Bitcast, FloatLoad2);
17385
17386 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17387 SDValue(FloatLoad2.getNode(), 1));
17388 return true;
17389 };
17390
17391 if (ReplaceTwoFloatLoad())
17392 return SDValue(N, 0);
17393
17394 EVT MemVT = LD->getMemoryVT();
17395 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17396 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17397 if (LD->isUnindexed() && VT.isVector() &&
17398 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17399 // P8 and later hardware should just use LOAD.
17400 !Subtarget.hasP8Vector() &&
17401 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17402 VT == MVT::v4f32))) &&
17403 LD->getAlign() < ABIAlignment) {
17404 // This is a type-legal unaligned Altivec load.
17405 SDValue Chain = LD->getChain();
17406 SDValue Ptr = LD->getBasePtr();
17407 bool isLittleEndian = Subtarget.isLittleEndian();
17408
17409 // This implements the loading of unaligned vectors as described in
17410 // the venerable Apple Velocity Engine overview. Specifically:
17411 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17412 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17413 //
17414 // The general idea is to expand a sequence of one or more unaligned
17415 // loads into an alignment-based permutation-control instruction (lvsl
17416 // or lvsr), a series of regular vector loads (which always truncate
17417 // their input address to an aligned address), and a series of
17418 // permutations. The results of these permutations are the requested
17419 // loaded values. The trick is that the last "extra" load is not taken
17420 // from the address you might suspect (sizeof(vector) bytes after the
17421 // last requested load), but rather sizeof(vector) - 1 bytes after the
17422 // last requested vector. The point of this is to avoid a page fault if
17423 // the base address happened to be aligned. This works because if the
17424 // base address is aligned, then adding less than a full vector length
17425 // will cause the last vector in the sequence to be (re)loaded.
17426 // Otherwise, the next vector will be fetched as you might suspect was
17427 // necessary.
17428
17429 // We might be able to reuse the permutation generation from
17430 // a different base address offset from this one by an aligned amount.
17431 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17432 // optimization later.
17433 Intrinsic::ID Intr, IntrLD, IntrPerm;
17434 MVT PermCntlTy, PermTy, LDTy;
17435 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17436 : Intrinsic::ppc_altivec_lvsl;
17437 IntrLD = Intrinsic::ppc_altivec_lvx;
17438 IntrPerm = Intrinsic::ppc_altivec_vperm;
17439 PermCntlTy = MVT::v16i8;
17440 PermTy = MVT::v4i32;
17441 LDTy = MVT::v4i32;
17442
17443 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17444
17445 // Create the new MMO for the new base load. It is like the original MMO,
17446 // but represents an area in memory almost twice the vector size centered
17447 // on the original address. If the address is unaligned, we might start
17448 // reading up to (sizeof(vector)-1) bytes below the address of the
17449 // original unaligned load.
17451 MachineMemOperand *BaseMMO =
17452 MF.getMachineMemOperand(LD->getMemOperand(),
17453 -(int64_t)MemVT.getStoreSize()+1,
17454 2*MemVT.getStoreSize()-1);
17455
17456 // Create the new base load.
17457 SDValue LDXIntID =
17458 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17459 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17460 SDValue BaseLoad =
17462 DAG.getVTList(PermTy, MVT::Other),
17463 BaseLoadOps, LDTy, BaseMMO);
17464
17465 // Note that the value of IncOffset (which is provided to the next
17466 // load's pointer info offset value, and thus used to calculate the
17467 // alignment), and the value of IncValue (which is actually used to
17468 // increment the pointer value) are different! This is because we
17469 // require the next load to appear to be aligned, even though it
17470 // is actually offset from the base pointer by a lesser amount.
17471 int IncOffset = VT.getSizeInBits() / 8;
17472 int IncValue = IncOffset;
17473
17474 // Walk (both up and down) the chain looking for another load at the real
17475 // (aligned) offset (the alignment of the other load does not matter in
17476 // this case). If found, then do not use the offset reduction trick, as
17477 // that will prevent the loads from being later combined (as they would
17478 // otherwise be duplicates).
17479 if (!findConsecutiveLoad(LD, DAG))
17480 --IncValue;
17481
17483 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17484 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17485
17486 MachineMemOperand *ExtraMMO =
17487 MF.getMachineMemOperand(LD->getMemOperand(),
17488 1, 2*MemVT.getStoreSize()-1);
17489 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17490 SDValue ExtraLoad =
17492 DAG.getVTList(PermTy, MVT::Other),
17493 ExtraLoadOps, LDTy, ExtraMMO);
17494
17495 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17496 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17497
17498 // Because vperm has a big-endian bias, we must reverse the order
17499 // of the input vectors and complement the permute control vector
17500 // when generating little endian code. We have already handled the
17501 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17502 // and ExtraLoad here.
17503 SDValue Perm;
17504 if (isLittleEndian)
17505 Perm = BuildIntrinsicOp(IntrPerm,
17506 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17507 else
17508 Perm = BuildIntrinsicOp(IntrPerm,
17509 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17510
17511 if (VT != PermTy)
17512 Perm = Subtarget.hasAltivec()
17513 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17514 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17515 DAG.getTargetConstant(1, dl, MVT::i64));
17516 // second argument is 1 because this rounding
17517 // is always exact.
17518
17519 // The output of the permutation is our loaded result, the TokenFactor is
17520 // our new chain.
17521 DCI.CombineTo(N, Perm, TF);
17522 return SDValue(N, 0);
17523 }
17524 }
17525 break;
17527 bool isLittleEndian = Subtarget.isLittleEndian();
17528 unsigned IID = N->getConstantOperandVal(0);
17529 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17530 : Intrinsic::ppc_altivec_lvsl);
17531 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17532 SDValue Add = N->getOperand(1);
17533
17534 int Bits = 4 /* 16 byte alignment */;
17535
17536 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17537 APInt::getAllOnes(Bits /* alignment */)
17538 .zext(Add.getScalarValueSizeInBits()))) {
17539 SDNode *BasePtr = Add->getOperand(0).getNode();
17540 for (SDNode *U : BasePtr->users()) {
17541 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17542 U->getConstantOperandVal(0) == IID) {
17543 // We've found another LVSL/LVSR, and this address is an aligned
17544 // multiple of that one. The results will be the same, so use the
17545 // one we've just found instead.
17546
17547 return SDValue(U, 0);
17548 }
17549 }
17550 }
17551
17552 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17553 SDNode *BasePtr = Add->getOperand(0).getNode();
17554 for (SDNode *U : BasePtr->users()) {
17555 if (U->getOpcode() == ISD::ADD &&
17556 isa<ConstantSDNode>(U->getOperand(1)) &&
17557 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17558 (1ULL << Bits) ==
17559 0) {
17560 SDNode *OtherAdd = U;
17561 for (SDNode *V : OtherAdd->users()) {
17562 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17563 V->getConstantOperandVal(0) == IID) {
17564 return SDValue(V, 0);
17565 }
17566 }
17567 }
17568 }
17569 }
17570 }
17571
17572 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17573 // Expose the vabsduw/h/b opportunity for down stream
17574 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17575 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17576 IID == Intrinsic::ppc_altivec_vmaxsh ||
17577 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17578 SDValue V1 = N->getOperand(1);
17579 SDValue V2 = N->getOperand(2);
17580 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17581 V1.getSimpleValueType() == MVT::v8i16 ||
17582 V1.getSimpleValueType() == MVT::v16i8) &&
17584 // (0-a, a)
17585 if (V1.getOpcode() == ISD::SUB &&
17587 V1.getOperand(1) == V2) {
17588 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17589 }
17590 // (a, 0-a)
17591 if (V2.getOpcode() == ISD::SUB &&
17593 V2.getOperand(1) == V1) {
17594 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17595 }
17596 // (x-y, y-x)
17597 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17598 V1.getOperand(0) == V2.getOperand(1) &&
17599 V1.getOperand(1) == V2.getOperand(0)) {
17600 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17601 }
17602 }
17603 }
17604 }
17605
17606 break;
17608 switch (N->getConstantOperandVal(1)) {
17609 default:
17610 break;
17611 case Intrinsic::ppc_altivec_vsum4sbs:
17612 case Intrinsic::ppc_altivec_vsum4shs:
17613 case Intrinsic::ppc_altivec_vsum4ubs: {
17614 // These sum-across intrinsics only have a chain due to the side effect
17615 // that they may set the SAT bit. If we know the SAT bit will not be set
17616 // for some inputs, we can replace any uses of their chain with the
17617 // input chain.
17618 if (BuildVectorSDNode *BVN =
17619 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17620 APInt APSplatBits, APSplatUndef;
17621 unsigned SplatBitSize;
17622 bool HasAnyUndefs;
17623 bool BVNIsConstantSplat = BVN->isConstantSplat(
17624 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17625 !Subtarget.isLittleEndian());
17626 // If the constant splat vector is 0, the SAT bit will not be set.
17627 if (BVNIsConstantSplat && APSplatBits == 0)
17628 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17629 }
17630 return SDValue();
17631 }
17632 case Intrinsic::ppc_vsx_lxvw4x:
17633 case Intrinsic::ppc_vsx_lxvd2x:
17634 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17635 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17636 if (Subtarget.needsSwapsForVSXMemOps())
17637 return expandVSXLoadForLE(N, DCI);
17638 break;
17639 }
17640 break;
17642 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17643 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17644 if (Subtarget.needsSwapsForVSXMemOps()) {
17645 switch (N->getConstantOperandVal(1)) {
17646 default:
17647 break;
17648 case Intrinsic::ppc_vsx_stxvw4x:
17649 case Intrinsic::ppc_vsx_stxvd2x:
17650 return expandVSXStoreForLE(N, DCI);
17651 }
17652 }
17653 break;
17654 case ISD::BSWAP: {
17655 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17656 // For subtargets without LDBRX, we can still do better than the default
17657 // expansion even for 64-bit BSWAP (LOAD).
17658 bool Is64BitBswapOn64BitTgt =
17659 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17660 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17661 N->getOperand(0).hasOneUse();
17662 if (IsSingleUseNormalLd &&
17663 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17664 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17665 SDValue Load = N->getOperand(0);
17666 LoadSDNode *LD = cast<LoadSDNode>(Load);
17667 // Create the byte-swapping load.
17668 SDValue Ops[] = {
17669 LD->getChain(), // Chain
17670 LD->getBasePtr(), // Ptr
17671 DAG.getValueType(N->getValueType(0)) // VT
17672 };
17673 SDValue BSLoad =
17674 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
17675 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
17676 MVT::i64 : MVT::i32, MVT::Other),
17677 Ops, LD->getMemoryVT(), LD->getMemOperand());
17678
17679 // If this is an i16 load, insert the truncate.
17680 SDValue ResVal = BSLoad;
17681 if (N->getValueType(0) == MVT::i16)
17682 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
17683
17684 // First, combine the bswap away. This makes the value produced by the
17685 // load dead.
17686 DCI.CombineTo(N, ResVal);
17687
17688 // Next, combine the load away, we give it a bogus result value but a real
17689 // chain result. The result value is dead because the bswap is dead.
17690 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
17691
17692 // Return N so it doesn't get rechecked!
17693 return SDValue(N, 0);
17694 }
17695 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17696 // before legalization so that the BUILD_PAIR is handled correctly.
17697 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17698 !IsSingleUseNormalLd)
17699 return SDValue();
17700 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
17701
17702 // Can't split volatile or atomic loads.
17703 if (!LD->isSimple())
17704 return SDValue();
17705 SDValue BasePtr = LD->getBasePtr();
17706 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
17707 LD->getPointerInfo(), LD->getAlign());
17708 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
17709 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17710 DAG.getIntPtrConstant(4, dl));
17712 LD->getMemOperand(), 4, 4);
17713 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
17714 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
17715 SDValue Res;
17716 if (Subtarget.isLittleEndian())
17717 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
17718 else
17719 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
17720 SDValue TF =
17721 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17722 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
17723 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
17724 return Res;
17725 }
17726 case PPCISD::VCMP:
17727 // If a VCMP_rec node already exists with exactly the same operands as this
17728 // node, use its result instead of this node (VCMP_rec computes both a CR6
17729 // and a normal output).
17730 //
17731 if (!N->getOperand(0).hasOneUse() &&
17732 !N->getOperand(1).hasOneUse() &&
17733 !N->getOperand(2).hasOneUse()) {
17734
17735 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17736 SDNode *VCMPrecNode = nullptr;
17737
17738 SDNode *LHSN = N->getOperand(0).getNode();
17739 for (SDNode *User : LHSN->users())
17740 if (User->getOpcode() == PPCISD::VCMP_rec &&
17741 User->getOperand(1) == N->getOperand(1) &&
17742 User->getOperand(2) == N->getOperand(2) &&
17743 User->getOperand(0) == N->getOperand(0)) {
17744 VCMPrecNode = User;
17745 break;
17746 }
17747
17748 // If there is no VCMP_rec node, or if the flag value has a single use,
17749 // don't transform this.
17750 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
17751 break;
17752
17753 // Look at the (necessarily single) use of the flag value. If it has a
17754 // chain, this transformation is more complex. Note that multiple things
17755 // could use the value result, which we should ignore.
17756 SDNode *FlagUser = nullptr;
17757 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17758 FlagUser == nullptr; ++UI) {
17759 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17760 SDNode *User = UI->getUser();
17761 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17762 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
17763 FlagUser = User;
17764 break;
17765 }
17766 }
17767 }
17768
17769 // If the user is a MFOCRF instruction, we know this is safe.
17770 // Otherwise we give up for right now.
17771 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17772 return SDValue(VCMPrecNode, 0);
17773 }
17774 break;
17775 case ISD::BR_CC: {
17776 // If this is a branch on an altivec predicate comparison, lower this so
17777 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17778 // lowering is done pre-legalize, because the legalizer lowers the predicate
17779 // compare down to code that is difficult to reassemble.
17780 // This code also handles branches that depend on the result of a store
17781 // conditional.
17782 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17783 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
17784
17785 int CompareOpc;
17786 bool isDot;
17787
17788 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17789 break;
17790
17791 // Since we are doing this pre-legalize, the RHS can be a constant of
17792 // arbitrary bitwidth which may cause issues when trying to get the value
17793 // from the underlying APInt.
17794 auto RHSAPInt = RHS->getAsAPIntVal();
17795 if (!RHSAPInt.isIntN(64))
17796 break;
17797
17798 unsigned Val = RHSAPInt.getZExtValue();
17799 auto isImpossibleCompare = [&]() {
17800 // If this is a comparison against something other than 0/1, then we know
17801 // that the condition is never/always true.
17802 if (Val != 0 && Val != 1) {
17803 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17804 return N->getOperand(0);
17805 // Always !=, turn it into an unconditional branch.
17806 return DAG.getNode(ISD::BR, dl, MVT::Other,
17807 N->getOperand(0), N->getOperand(4));
17808 }
17809 return SDValue();
17810 };
17811 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17812 unsigned StoreWidth = 0;
17813 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17814 isStoreConditional(LHS, StoreWidth)) {
17815 if (SDValue Impossible = isImpossibleCompare())
17816 return Impossible;
17817 PPC::Predicate CompOpc;
17818 // eq 0 => ne
17819 // ne 0 => eq
17820 // eq 1 => eq
17821 // ne 1 => ne
17822 if (Val == 0)
17823 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17824 else
17825 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17826
17827 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
17828 DAG.getConstant(StoreWidth, dl, MVT::i32)};
17829 auto *MemNode = cast<MemSDNode>(LHS);
17830 SDValue ConstSt = DAG.getMemIntrinsicNode(
17831 PPCISD::STORE_COND, dl,
17832 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
17833 MemNode->getMemoryVT(), MemNode->getMemOperand());
17834
17835 SDValue InChain;
17836 // Unchain the branch from the original store conditional.
17837 if (N->getOperand(0) == LHS.getValue(1))
17838 InChain = LHS.getOperand(0);
17839 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
17840 SmallVector<SDValue, 4> InChains;
17841 SDValue InTF = N->getOperand(0);
17842 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17843 if (InTF.getOperand(i) != LHS.getValue(1))
17844 InChains.push_back(InTF.getOperand(i));
17845 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
17846 }
17847
17848 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
17849 DAG.getConstant(CompOpc, dl, MVT::i32),
17850 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
17851 ConstSt.getValue(2));
17852 }
17853
17854 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17855 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
17856 assert(isDot && "Can't compare against a vector result!");
17857
17858 if (SDValue Impossible = isImpossibleCompare())
17859 return Impossible;
17860
17861 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17862 // Create the PPCISD altivec 'dot' comparison node.
17863 SDValue Ops[] = {
17864 LHS.getOperand(2), // LHS of compare
17865 LHS.getOperand(3), // RHS of compare
17866 DAG.getConstant(CompareOpc, dl, MVT::i32)
17867 };
17868 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
17869 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
17870
17871 // Unpack the result based on how the target uses it.
17872 PPC::Predicate CompOpc;
17873 switch (LHS.getConstantOperandVal(1)) {
17874 default: // Can't happen, don't crash on invalid number though.
17875 case 0: // Branch on the value of the EQ bit of CR6.
17876 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17877 break;
17878 case 1: // Branch on the inverted value of the EQ bit of CR6.
17879 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17880 break;
17881 case 2: // Branch on the value of the LT bit of CR6.
17882 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17883 break;
17884 case 3: // Branch on the inverted value of the LT bit of CR6.
17885 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17886 break;
17887 }
17888
17889 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
17890 DAG.getConstant(CompOpc, dl, MVT::i32),
17891 DAG.getRegister(PPC::CR6, MVT::i32),
17892 N->getOperand(4), CompNode.getValue(1));
17893 }
17894 break;
17895 }
17896 case ISD::BUILD_VECTOR:
17897 return DAGCombineBuildVector(N, DCI);
17898 case PPCISD::ADDC:
17899 return DAGCombineAddc(N, DCI);
17900 }
17901
17902 return SDValue();
17903}
17904
17905SDValue
17907 SelectionDAG &DAG,
17908 SmallVectorImpl<SDNode *> &Created) const {
17909 // fold (sdiv X, pow2)
17910 EVT VT = N->getValueType(0);
17911 if (VT == MVT::i64 && !Subtarget.isPPC64())
17912 return SDValue();
17913 if ((VT != MVT::i32 && VT != MVT::i64) ||
17914 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
17915 return SDValue();
17916
17917 SDLoc DL(N);
17918 SDValue N0 = N->getOperand(0);
17919
17920 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17921 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17922 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
17923
17924 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
17925 Created.push_back(Op.getNode());
17926
17927 if (IsNegPow2) {
17928 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
17929 Created.push_back(Op.getNode());
17930 }
17931
17932 return Op;
17933}
17934
17935//===----------------------------------------------------------------------===//
17936// Inline Assembly Support
17937//===----------------------------------------------------------------------===//
17938
17940 KnownBits &Known,
17941 const APInt &DemandedElts,
17942 const SelectionDAG &DAG,
17943 unsigned Depth) const {
17944 Known.resetAll();
17945 switch (Op.getOpcode()) {
17946 default: break;
17947 case PPCISD::LBRX: {
17948 // lhbrx is known to have the top bits cleared out.
17949 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
17950 Known.Zero = 0xFFFF0000;
17951 break;
17952 }
17953 case PPCISD::ADDE: {
17954 if (Op.getResNo() == 0) {
17955 // (0|1), _ = ADDE 0, 0, CARRY
17956 SDValue LHS = Op.getOperand(0);
17957 SDValue RHS = Op.getOperand(1);
17958 if (isNullConstant(LHS) && isNullConstant(RHS))
17959 Known.Zero = ~1ULL;
17960 }
17961 break;
17962 }
17964 switch (Op.getConstantOperandVal(0)) {
17965 default: break;
17966 case Intrinsic::ppc_altivec_vcmpbfp_p:
17967 case Intrinsic::ppc_altivec_vcmpeqfp_p:
17968 case Intrinsic::ppc_altivec_vcmpequb_p:
17969 case Intrinsic::ppc_altivec_vcmpequh_p:
17970 case Intrinsic::ppc_altivec_vcmpequw_p:
17971 case Intrinsic::ppc_altivec_vcmpequd_p:
17972 case Intrinsic::ppc_altivec_vcmpequq_p:
17973 case Intrinsic::ppc_altivec_vcmpgefp_p:
17974 case Intrinsic::ppc_altivec_vcmpgtfp_p:
17975 case Intrinsic::ppc_altivec_vcmpgtsb_p:
17976 case Intrinsic::ppc_altivec_vcmpgtsh_p:
17977 case Intrinsic::ppc_altivec_vcmpgtsw_p:
17978 case Intrinsic::ppc_altivec_vcmpgtsd_p:
17979 case Intrinsic::ppc_altivec_vcmpgtsq_p:
17980 case Intrinsic::ppc_altivec_vcmpgtub_p:
17981 case Intrinsic::ppc_altivec_vcmpgtuh_p:
17982 case Intrinsic::ppc_altivec_vcmpgtuw_p:
17983 case Intrinsic::ppc_altivec_vcmpgtud_p:
17984 case Intrinsic::ppc_altivec_vcmpgtuq_p:
17985 Known.Zero = ~1U; // All bits but the low one are known to be zero.
17986 break;
17987 }
17988 break;
17989 }
17991 switch (Op.getConstantOperandVal(1)) {
17992 default:
17993 break;
17994 case Intrinsic::ppc_load2r:
17995 // Top bits are cleared for load2r (which is the same as lhbrx).
17996 Known.Zero = 0xFFFF0000;
17997 break;
17998 }
17999 break;
18000 }
18001 }
18002}
18003
18005 switch (Subtarget.getCPUDirective()) {
18006 default: break;
18007 case PPC::DIR_970:
18008 case PPC::DIR_PWR4:
18009 case PPC::DIR_PWR5:
18010 case PPC::DIR_PWR5X:
18011 case PPC::DIR_PWR6:
18012 case PPC::DIR_PWR6X:
18013 case PPC::DIR_PWR7:
18014 case PPC::DIR_PWR8:
18015 case PPC::DIR_PWR9:
18016 case PPC::DIR_PWR10:
18017 case PPC::DIR_PWR11:
18018 case PPC::DIR_PWR_FUTURE: {
18019 if (!ML)
18020 break;
18021
18023 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18024 // so that we can decrease cache misses and branch-prediction misses.
18025 // Actual alignment of the loop will depend on the hotness check and other
18026 // logic in alignBlocks.
18027 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18028 return Align(32);
18029 }
18030
18031 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18032
18033 // For small loops (between 5 and 8 instructions), align to a 32-byte
18034 // boundary so that the entire loop fits in one instruction-cache line.
18035 uint64_t LoopSize = 0;
18036 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18037 for (const MachineInstr &J : **I) {
18038 LoopSize += TII->getInstSizeInBytes(J);
18039 if (LoopSize > 32)
18040 break;
18041 }
18042
18043 if (LoopSize > 16 && LoopSize <= 32)
18044 return Align(32);
18045
18046 break;
18047 }
18048 }
18049
18051}
18052
18053/// getConstraintType - Given a constraint, return the type of
18054/// constraint it is for this target.
18057 if (Constraint.size() == 1) {
18058 switch (Constraint[0]) {
18059 default: break;
18060 case 'b':
18061 case 'r':
18062 case 'f':
18063 case 'd':
18064 case 'v':
18065 case 'y':
18066 return C_RegisterClass;
18067 case 'Z':
18068 // FIXME: While Z does indicate a memory constraint, it specifically
18069 // indicates an r+r address (used in conjunction with the 'y' modifier
18070 // in the replacement string). Currently, we're forcing the base
18071 // register to be r0 in the asm printer (which is interpreted as zero)
18072 // and forming the complete address in the second register. This is
18073 // suboptimal.
18074 return C_Memory;
18075 }
18076 } else if (Constraint == "wc") { // individual CR bits.
18077 return C_RegisterClass;
18078 } else if (Constraint == "wa" || Constraint == "wd" ||
18079 Constraint == "wf" || Constraint == "ws" ||
18080 Constraint == "wi" || Constraint == "ww") {
18081 return C_RegisterClass; // VSX registers.
18082 }
18083 return TargetLowering::getConstraintType(Constraint);
18084}
18085
18086/// Examine constraint type and operand type and determine a weight value.
18087/// This object must already have been set up with the operand type
18088/// and the current alternative constraint selected.
18091 AsmOperandInfo &info, const char *constraint) const {
18093 Value *CallOperandVal = info.CallOperandVal;
18094 // If we don't have a value, we can't do a match,
18095 // but allow it at the lowest weight.
18096 if (!CallOperandVal)
18097 return CW_Default;
18098 Type *type = CallOperandVal->getType();
18099
18100 // Look at the constraint type.
18101 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18102 return CW_Register; // an individual CR bit.
18103 else if ((StringRef(constraint) == "wa" ||
18104 StringRef(constraint) == "wd" ||
18105 StringRef(constraint) == "wf") &&
18106 type->isVectorTy())
18107 return CW_Register;
18108 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18109 return CW_Register; // just hold 64-bit integers data.
18110 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18111 return CW_Register;
18112 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18113 return CW_Register;
18114
18115 switch (*constraint) {
18116 default:
18118 break;
18119 case 'b':
18120 if (type->isIntegerTy())
18121 weight = CW_Register;
18122 break;
18123 case 'f':
18124 if (type->isFloatTy())
18125 weight = CW_Register;
18126 break;
18127 case 'd':
18128 if (type->isDoubleTy())
18129 weight = CW_Register;
18130 break;
18131 case 'v':
18132 if (type->isVectorTy())
18133 weight = CW_Register;
18134 break;
18135 case 'y':
18136 weight = CW_Register;
18137 break;
18138 case 'Z':
18139 weight = CW_Memory;
18140 break;
18141 }
18142 return weight;
18143}
18144
18145std::pair<unsigned, const TargetRegisterClass *>
18147 StringRef Constraint,
18148 MVT VT) const {
18149 if (Constraint.size() == 1) {
18150 // GCC RS6000 Constraint Letters
18151 switch (Constraint[0]) {
18152 case 'b': // R1-R31
18153 if (VT == MVT::i64 && Subtarget.isPPC64())
18154 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18155 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18156 case 'r': // R0-R31
18157 if (VT == MVT::i64 && Subtarget.isPPC64())
18158 return std::make_pair(0U, &PPC::G8RCRegClass);
18159 return std::make_pair(0U, &PPC::GPRCRegClass);
18160 // 'd' and 'f' constraints are both defined to be "the floating point
18161 // registers", where one is for 32-bit and the other for 64-bit. We don't
18162 // really care overly much here so just give them all the same reg classes.
18163 case 'd':
18164 case 'f':
18165 if (Subtarget.hasSPE()) {
18166 if (VT == MVT::f32 || VT == MVT::i32)
18167 return std::make_pair(0U, &PPC::GPRCRegClass);
18168 if (VT == MVT::f64 || VT == MVT::i64)
18169 return std::make_pair(0U, &PPC::SPERCRegClass);
18170 } else {
18171 if (VT == MVT::f32 || VT == MVT::i32)
18172 return std::make_pair(0U, &PPC::F4RCRegClass);
18173 if (VT == MVT::f64 || VT == MVT::i64)
18174 return std::make_pair(0U, &PPC::F8RCRegClass);
18175 }
18176 break;
18177 case 'v':
18178 if (Subtarget.hasAltivec() && VT.isVector())
18179 return std::make_pair(0U, &PPC::VRRCRegClass);
18180 else if (Subtarget.hasVSX())
18181 // Scalars in Altivec registers only make sense with VSX.
18182 return std::make_pair(0U, &PPC::VFRCRegClass);
18183 break;
18184 case 'y': // crrc
18185 return std::make_pair(0U, &PPC::CRRCRegClass);
18186 }
18187 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18188 // An individual CR bit.
18189 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18190 } else if ((Constraint == "wa" || Constraint == "wd" ||
18191 Constraint == "wf" || Constraint == "wi") &&
18192 Subtarget.hasVSX()) {
18193 // A VSX register for either a scalar (FP) or vector. There is no
18194 // support for single precision scalars on subtargets prior to Power8.
18195 if (VT.isVector())
18196 return std::make_pair(0U, &PPC::VSRCRegClass);
18197 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18198 return std::make_pair(0U, &PPC::VSSRCRegClass);
18199 return std::make_pair(0U, &PPC::VSFRCRegClass);
18200 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18201 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18202 return std::make_pair(0U, &PPC::VSSRCRegClass);
18203 else
18204 return std::make_pair(0U, &PPC::VSFRCRegClass);
18205 } else if (Constraint == "lr") {
18206 if (VT == MVT::i64)
18207 return std::make_pair(0U, &PPC::LR8RCRegClass);
18208 else
18209 return std::make_pair(0U, &PPC::LRRCRegClass);
18210 }
18211
18212 // Handle special cases of physical registers that are not properly handled
18213 // by the base class.
18214 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18215 // If we name a VSX register, we can't defer to the base class because it
18216 // will not recognize the correct register (their names will be VSL{0-31}
18217 // and V{0-31} so they won't match). So we match them here.
18218 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18219 int VSNum = atoi(Constraint.data() + 3);
18220 assert(VSNum >= 0 && VSNum <= 63 &&
18221 "Attempted to access a vsr out of range");
18222 if (VSNum < 32)
18223 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18224 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18225 }
18226
18227 // For float registers, we can't defer to the base class as it will match
18228 // the SPILLTOVSRRC class.
18229 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18230 int RegNum = atoi(Constraint.data() + 2);
18231 if (RegNum > 31 || RegNum < 0)
18232 report_fatal_error("Invalid floating point register number");
18233 if (VT == MVT::f32 || VT == MVT::i32)
18234 return Subtarget.hasSPE()
18235 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18236 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18237 if (VT == MVT::f64 || VT == MVT::i64)
18238 return Subtarget.hasSPE()
18239 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18240 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18241 }
18242 }
18243
18244 std::pair<unsigned, const TargetRegisterClass *> R =
18246
18247 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18248 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18249 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18250 // register.
18251 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18252 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18253 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18254 PPC::GPRCRegClass.contains(R.first))
18255 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18256 PPC::sub_32, &PPC::G8RCRegClass),
18257 &PPC::G8RCRegClass);
18258
18259 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18260 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18261 R.first = PPC::CR0;
18262 R.second = &PPC::CRRCRegClass;
18263 }
18264 // FIXME: This warning should ideally be emitted in the front end.
18265 const auto &TM = getTargetMachine();
18266 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18267 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18268 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18269 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18270 errs() << "warning: vector registers 20 to 32 are reserved in the "
18271 "default AIX AltiVec ABI and cannot be used\n";
18272 }
18273
18274 return R;
18275}
18276
18277/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18278/// vector. If it is invalid, don't add anything to Ops.
18280 StringRef Constraint,
18281 std::vector<SDValue> &Ops,
18282 SelectionDAG &DAG) const {
18283 SDValue Result;
18284
18285 // Only support length 1 constraints.
18286 if (Constraint.size() > 1)
18287 return;
18288
18289 char Letter = Constraint[0];
18290 switch (Letter) {
18291 default: break;
18292 case 'I':
18293 case 'J':
18294 case 'K':
18295 case 'L':
18296 case 'M':
18297 case 'N':
18298 case 'O':
18299 case 'P': {
18301 if (!CST) return; // Must be an immediate to match.
18302 SDLoc dl(Op);
18303 int64_t Value = CST->getSExtValue();
18304 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18305 // numbers are printed as such.
18306 switch (Letter) {
18307 default: llvm_unreachable("Unknown constraint letter!");
18308 case 'I': // "I" is a signed 16-bit constant.
18309 if (isInt<16>(Value))
18310 Result = DAG.getTargetConstant(Value, dl, TCVT);
18311 break;
18312 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18314 Result = DAG.getTargetConstant(Value, dl, TCVT);
18315 break;
18316 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18318 Result = DAG.getTargetConstant(Value, dl, TCVT);
18319 break;
18320 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18321 if (isUInt<16>(Value))
18322 Result = DAG.getTargetConstant(Value, dl, TCVT);
18323 break;
18324 case 'M': // "M" is a constant that is greater than 31.
18325 if (Value > 31)
18326 Result = DAG.getTargetConstant(Value, dl, TCVT);
18327 break;
18328 case 'N': // "N" is a positive constant that is an exact power of two.
18329 if (Value > 0 && isPowerOf2_64(Value))
18330 Result = DAG.getTargetConstant(Value, dl, TCVT);
18331 break;
18332 case 'O': // "O" is the constant zero.
18333 if (Value == 0)
18334 Result = DAG.getTargetConstant(Value, dl, TCVT);
18335 break;
18336 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18337 if (isInt<16>(-Value))
18338 Result = DAG.getTargetConstant(Value, dl, TCVT);
18339 break;
18340 }
18341 break;
18342 }
18343 }
18344
18345 if (Result.getNode()) {
18346 Ops.push_back(Result);
18347 return;
18348 }
18349
18350 // Handle standard constraint letters.
18352}
18353
18356 SelectionDAG &DAG) const {
18357 if (I.getNumOperands() <= 1)
18358 return;
18359 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18360 return;
18361 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18362 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18363 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18364 return;
18365
18366 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18367 Ops.push_back(DAG.getMDNode(MDN));
18368}
18369
18370// isLegalAddressingMode - Return true if the addressing mode represented
18371// by AM is legal for this target, for a load/store of the specified type.
18373 const AddrMode &AM, Type *Ty,
18374 unsigned AS,
18375 Instruction *I) const {
18376 // Vector type r+i form is supported since power9 as DQ form. We don't check
18377 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18378 // imm form is preferred and the offset can be adjusted to use imm form later
18379 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18380 // max offset to check legal addressing mode, we should be a little aggressive
18381 // to contain other offsets for that LSRUse.
18382 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18383 return false;
18384
18385 // PPC allows a sign-extended 16-bit immediate field.
18386 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18387 return false;
18388
18389 // No global is ever allowed as a base.
18390 if (AM.BaseGV)
18391 return false;
18392
18393 // PPC only support r+r,
18394 switch (AM.Scale) {
18395 case 0: // "r+i" or just "i", depending on HasBaseReg.
18396 break;
18397 case 1:
18398 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18399 return false;
18400 // Otherwise we have r+r or r+i.
18401 break;
18402 case 2:
18403 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18404 return false;
18405 // Allow 2*r as r+r.
18406 break;
18407 default:
18408 // No other scales are supported.
18409 return false;
18410 }
18411
18412 return true;
18413}
18414
18415SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18416 SelectionDAG &DAG) const {
18418 MachineFrameInfo &MFI = MF.getFrameInfo();
18419 MFI.setReturnAddressIsTaken(true);
18420
18421 SDLoc dl(Op);
18422 unsigned Depth = Op.getConstantOperandVal(0);
18423
18424 // Make sure the function does not optimize away the store of the RA to
18425 // the stack.
18426 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18427 FuncInfo->setLRStoreRequired();
18428 auto PtrVT = getPointerTy(MF.getDataLayout());
18429
18430 if (Depth > 0) {
18431 // The link register (return address) is saved in the caller's frame
18432 // not the callee's stack frame. So we must get the caller's frame
18433 // address and load the return address at the LR offset from there.
18434 SDValue FrameAddr =
18435 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18437 SDValue Offset =
18438 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18439 Subtarget.getScalarIntVT());
18440 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18441 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18443 }
18444
18445 // Just load the return address off the stack.
18446 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18447 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18449}
18450
18451SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18452 SelectionDAG &DAG) const {
18453 SDLoc dl(Op);
18454 unsigned Depth = Op.getConstantOperandVal(0);
18455
18456 MachineFunction &MF = DAG.getMachineFunction();
18457 MachineFrameInfo &MFI = MF.getFrameInfo();
18458 MFI.setFrameAddressIsTaken(true);
18459
18460 EVT PtrVT = getPointerTy(MF.getDataLayout());
18461 bool isPPC64 = PtrVT == MVT::i64;
18462
18463 // Naked functions never have a frame pointer, and so we use r1. For all
18464 // other functions, this decision must be delayed until during PEI.
18465 unsigned FrameReg;
18466 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18467 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18468 else
18469 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18470
18471 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18472 PtrVT);
18473 while (Depth--)
18474 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18475 FrameAddr, MachinePointerInfo());
18476 return FrameAddr;
18477}
18478
18479#define GET_REGISTER_MATCHER
18480#include "PPCGenAsmMatcher.inc"
18481
18483 const MachineFunction &MF) const {
18484 bool IsPPC64 = Subtarget.isPPC64();
18485
18486 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18487 if (!Is64Bit && VT != LLT::scalar(32))
18488 report_fatal_error("Invalid register global variable type");
18489
18491 if (!Reg)
18492 return Reg;
18493
18494 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18495 // Need followup investigation as to why.
18496 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18497 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18498 StringRef(RegName) + "\"."));
18499
18500 // Convert GPR to GP8R register for 64bit.
18501 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18502 Reg = Reg.id() - PPC::R0 + PPC::X0;
18503
18504 return Reg;
18505}
18506
18508 // 32-bit SVR4 ABI access everything as got-indirect.
18509 if (Subtarget.is32BitELFABI())
18510 return true;
18511
18512 // AIX accesses everything indirectly through the TOC, which is similar to
18513 // the GOT.
18514 if (Subtarget.isAIXABI())
18515 return true;
18516
18518 // If it is small or large code model, module locals are accessed
18519 // indirectly by loading their address from .toc/.got.
18520 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18521 return true;
18522
18523 // JumpTable and BlockAddress are accessed as got-indirect.
18525 return true;
18526
18528 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18529
18530 return false;
18531}
18532
18533bool
18535 // The PowerPC target isn't yet aware of offsets.
18536 return false;
18537}
18538
18540 const CallBase &I,
18541 MachineFunction &MF,
18542 unsigned Intrinsic) const {
18543 switch (Intrinsic) {
18544 case Intrinsic::ppc_atomicrmw_xchg_i128:
18545 case Intrinsic::ppc_atomicrmw_add_i128:
18546 case Intrinsic::ppc_atomicrmw_sub_i128:
18547 case Intrinsic::ppc_atomicrmw_nand_i128:
18548 case Intrinsic::ppc_atomicrmw_and_i128:
18549 case Intrinsic::ppc_atomicrmw_or_i128:
18550 case Intrinsic::ppc_atomicrmw_xor_i128:
18551 case Intrinsic::ppc_cmpxchg_i128:
18552 Info.opc = ISD::INTRINSIC_W_CHAIN;
18553 Info.memVT = MVT::i128;
18554 Info.ptrVal = I.getArgOperand(0);
18555 Info.offset = 0;
18556 Info.align = Align(16);
18559 return true;
18560 case Intrinsic::ppc_atomic_load_i128:
18561 Info.opc = ISD::INTRINSIC_W_CHAIN;
18562 Info.memVT = MVT::i128;
18563 Info.ptrVal = I.getArgOperand(0);
18564 Info.offset = 0;
18565 Info.align = Align(16);
18567 return true;
18568 case Intrinsic::ppc_atomic_store_i128:
18569 Info.opc = ISD::INTRINSIC_VOID;
18570 Info.memVT = MVT::i128;
18571 Info.ptrVal = I.getArgOperand(2);
18572 Info.offset = 0;
18573 Info.align = Align(16);
18575 return true;
18576 case Intrinsic::ppc_altivec_lvx:
18577 case Intrinsic::ppc_altivec_lvxl:
18578 case Intrinsic::ppc_altivec_lvebx:
18579 case Intrinsic::ppc_altivec_lvehx:
18580 case Intrinsic::ppc_altivec_lvewx:
18581 case Intrinsic::ppc_vsx_lxvd2x:
18582 case Intrinsic::ppc_vsx_lxvw4x:
18583 case Intrinsic::ppc_vsx_lxvd2x_be:
18584 case Intrinsic::ppc_vsx_lxvw4x_be:
18585 case Intrinsic::ppc_vsx_lxvl:
18586 case Intrinsic::ppc_vsx_lxvll: {
18587 EVT VT;
18588 switch (Intrinsic) {
18589 case Intrinsic::ppc_altivec_lvebx:
18590 VT = MVT::i8;
18591 break;
18592 case Intrinsic::ppc_altivec_lvehx:
18593 VT = MVT::i16;
18594 break;
18595 case Intrinsic::ppc_altivec_lvewx:
18596 VT = MVT::i32;
18597 break;
18598 case Intrinsic::ppc_vsx_lxvd2x:
18599 case Intrinsic::ppc_vsx_lxvd2x_be:
18600 VT = MVT::v2f64;
18601 break;
18602 default:
18603 VT = MVT::v4i32;
18604 break;
18605 }
18606
18607 Info.opc = ISD::INTRINSIC_W_CHAIN;
18608 Info.memVT = VT;
18609 Info.ptrVal = I.getArgOperand(0);
18610 Info.offset = -VT.getStoreSize()+1;
18611 Info.size = 2*VT.getStoreSize()-1;
18612 Info.align = Align(1);
18613 Info.flags = MachineMemOperand::MOLoad;
18614 return true;
18615 }
18616 case Intrinsic::ppc_altivec_stvx:
18617 case Intrinsic::ppc_altivec_stvxl:
18618 case Intrinsic::ppc_altivec_stvebx:
18619 case Intrinsic::ppc_altivec_stvehx:
18620 case Intrinsic::ppc_altivec_stvewx:
18621 case Intrinsic::ppc_vsx_stxvd2x:
18622 case Intrinsic::ppc_vsx_stxvw4x:
18623 case Intrinsic::ppc_vsx_stxvd2x_be:
18624 case Intrinsic::ppc_vsx_stxvw4x_be:
18625 case Intrinsic::ppc_vsx_stxvl:
18626 case Intrinsic::ppc_vsx_stxvll: {
18627 EVT VT;
18628 switch (Intrinsic) {
18629 case Intrinsic::ppc_altivec_stvebx:
18630 VT = MVT::i8;
18631 break;
18632 case Intrinsic::ppc_altivec_stvehx:
18633 VT = MVT::i16;
18634 break;
18635 case Intrinsic::ppc_altivec_stvewx:
18636 VT = MVT::i32;
18637 break;
18638 case Intrinsic::ppc_vsx_stxvd2x:
18639 case Intrinsic::ppc_vsx_stxvd2x_be:
18640 VT = MVT::v2f64;
18641 break;
18642 default:
18643 VT = MVT::v4i32;
18644 break;
18645 }
18646
18647 Info.opc = ISD::INTRINSIC_VOID;
18648 Info.memVT = VT;
18649 Info.ptrVal = I.getArgOperand(1);
18650 Info.offset = -VT.getStoreSize()+1;
18651 Info.size = 2*VT.getStoreSize()-1;
18652 Info.align = Align(1);
18653 Info.flags = MachineMemOperand::MOStore;
18654 return true;
18655 }
18656 case Intrinsic::ppc_stdcx:
18657 case Intrinsic::ppc_stwcx:
18658 case Intrinsic::ppc_sthcx:
18659 case Intrinsic::ppc_stbcx: {
18660 EVT VT;
18661 auto Alignment = Align(8);
18662 switch (Intrinsic) {
18663 case Intrinsic::ppc_stdcx:
18664 VT = MVT::i64;
18665 break;
18666 case Intrinsic::ppc_stwcx:
18667 VT = MVT::i32;
18668 Alignment = Align(4);
18669 break;
18670 case Intrinsic::ppc_sthcx:
18671 VT = MVT::i16;
18672 Alignment = Align(2);
18673 break;
18674 case Intrinsic::ppc_stbcx:
18675 VT = MVT::i8;
18676 Alignment = Align(1);
18677 break;
18678 }
18679 Info.opc = ISD::INTRINSIC_W_CHAIN;
18680 Info.memVT = VT;
18681 Info.ptrVal = I.getArgOperand(0);
18682 Info.offset = 0;
18683 Info.align = Alignment;
18685 return true;
18686 }
18687 default:
18688 break;
18689 }
18690
18691 return false;
18692}
18693
18694/// It returns EVT::Other if the type should be determined using generic
18695/// target-independent logic.
18697 LLVMContext &Context, const MemOp &Op,
18698 const AttributeList &FuncAttributes) const {
18699 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18700 // We should use Altivec/VSX loads and stores when available. For unaligned
18701 // addresses, unaligned VSX loads are only fast starting with the P8.
18702 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18703 if (Op.isMemset() && Subtarget.hasVSX()) {
18704 uint64_t TailSize = Op.size() % 16;
18705 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18706 // element if vector element type matches tail store. For tail size
18707 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18708 if (TailSize > 2 && TailSize <= 4) {
18709 return MVT::v8i16;
18710 }
18711 return MVT::v4i32;
18712 }
18713 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
18714 return MVT::v4i32;
18715 }
18716 }
18717
18718 if (Subtarget.isPPC64()) {
18719 return MVT::i64;
18720 }
18721
18722 return MVT::i32;
18723}
18724
18725/// Returns true if it is beneficial to convert a load of a constant
18726/// to just the constant itself.
18728 Type *Ty) const {
18729 assert(Ty->isIntegerTy());
18730
18731 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18732 return !(BitSize == 0 || BitSize > 64);
18733}
18734
18736 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18737 return false;
18738 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18739 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18740 return NumBits1 == 64 && NumBits2 == 32;
18741}
18742
18744 if (!VT1.isInteger() || !VT2.isInteger())
18745 return false;
18746 unsigned NumBits1 = VT1.getSizeInBits();
18747 unsigned NumBits2 = VT2.getSizeInBits();
18748 return NumBits1 == 64 && NumBits2 == 32;
18749}
18750
18752 // Generally speaking, zexts are not free, but they are free when they can be
18753 // folded with other operations.
18754 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18755 EVT MemVT = LD->getMemoryVT();
18756 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18757 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18758 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18759 LD->getExtensionType() == ISD::ZEXTLOAD))
18760 return true;
18761 }
18762
18763 // FIXME: Add other cases...
18764 // - 32-bit shifts with a zext to i64
18765 // - zext after ctlz, bswap, etc.
18766 // - zext after and by a constant mask
18767
18768 return TargetLowering::isZExtFree(Val, VT2);
18769}
18770
18771bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18772 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18773 "invalid fpext types");
18774 // Extending to float128 is not free.
18775 if (DestVT == MVT::f128)
18776 return false;
18777 return true;
18778}
18779
18781 return isInt<16>(Imm) || isUInt<16>(Imm);
18782}
18783
18785 return isInt<16>(Imm) || isUInt<16>(Imm);
18786}
18787
18790 unsigned *Fast) const {
18792 return false;
18793
18794 // PowerPC supports unaligned memory access for simple non-vector types.
18795 // Although accessing unaligned addresses is not as efficient as accessing
18796 // aligned addresses, it is generally more efficient than manual expansion,
18797 // and generally only traps for software emulation when crossing page
18798 // boundaries.
18799
18800 if (!VT.isSimple())
18801 return false;
18802
18803 if (VT.isFloatingPoint() && !VT.isVector() &&
18804 !Subtarget.allowsUnalignedFPAccess())
18805 return false;
18806
18807 if (VT.getSimpleVT().isVector()) {
18808 if (Subtarget.hasVSX()) {
18809 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18810 VT != MVT::v4f32 && VT != MVT::v4i32)
18811 return false;
18812 } else {
18813 return false;
18814 }
18815 }
18816
18817 if (VT == MVT::ppcf128)
18818 return false;
18819
18820 if (Fast)
18821 *Fast = 1;
18822
18823 return true;
18824}
18825
18827 SDValue C) const {
18828 // Check integral scalar types.
18829 if (!VT.isScalarInteger())
18830 return false;
18831 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18832 if (!ConstNode->getAPIntValue().isSignedIntN(64))
18833 return false;
18834 // This transformation will generate >= 2 operations. But the following
18835 // cases will generate <= 2 instructions during ISEL. So exclude them.
18836 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18837 // HW instruction, ie. MULLI
18838 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18839 // instruction is needed than case 1, ie. MULLI and RLDICR
18840 int64_t Imm = ConstNode->getSExtValue();
18841 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
18842 Imm >>= Shift;
18843 if (isInt<16>(Imm))
18844 return false;
18845 uint64_t UImm = static_cast<uint64_t>(Imm);
18846 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
18847 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
18848 return true;
18849 }
18850 return false;
18851}
18852
18858
18860 Type *Ty) const {
18861 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18862 return false;
18863 switch (Ty->getScalarType()->getTypeID()) {
18864 case Type::FloatTyID:
18865 case Type::DoubleTyID:
18866 return true;
18867 case Type::FP128TyID:
18868 return Subtarget.hasP9Vector();
18869 default:
18870 return false;
18871 }
18872}
18873
18874// FIXME: add more patterns which are not profitable to hoist.
18876 if (!I->hasOneUse())
18877 return true;
18878
18879 Instruction *User = I->user_back();
18880 assert(User && "A single use instruction with no uses.");
18881
18882 switch (I->getOpcode()) {
18883 case Instruction::FMul: {
18884 // Don't break FMA, PowerPC prefers FMA.
18885 if (User->getOpcode() != Instruction::FSub &&
18886 User->getOpcode() != Instruction::FAdd)
18887 return true;
18888
18890 const Function *F = I->getFunction();
18891 const DataLayout &DL = F->getDataLayout();
18892 Type *Ty = User->getOperand(0)->getType();
18893 bool AllowContract = I->getFastMathFlags().allowContract() &&
18894 User->getFastMathFlags().allowContract();
18895
18896 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
18898 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
18899 }
18900 case Instruction::Load: {
18901 // Don't break "store (load float*)" pattern, this pattern will be combined
18902 // to "store (load int32)" in later InstCombine pass. See function
18903 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18904 // cycles than loading a 32 bit integer.
18905 LoadInst *LI = cast<LoadInst>(I);
18906 // For the loads that combineLoadToOperationType does nothing, like
18907 // ordered load, it should be profitable to hoist them.
18908 // For swifterror load, it can only be used for pointer to pointer type, so
18909 // later type check should get rid of this case.
18910 if (!LI->isUnordered())
18911 return true;
18912
18913 if (User->getOpcode() != Instruction::Store)
18914 return true;
18915
18916 if (I->getType()->getTypeID() != Type::FloatTyID)
18917 return true;
18918
18919 return false;
18920 }
18921 default:
18922 return true;
18923 }
18924 return true;
18925}
18926
18927const MCPhysReg *
18929 // LR is a callee-save register, but we must treat it as clobbered by any call
18930 // site. Hence we include LR in the scratch registers, which are in turn added
18931 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18932 // to CTR, which is used by any indirect call.
18933 static const MCPhysReg ScratchRegs[] = {
18934 PPC::X12, PPC::LR8, PPC::CTR8, 0
18935 };
18936
18937 return ScratchRegs;
18938}
18939
18941 const Constant *PersonalityFn) const {
18942 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18943}
18944
18946 const Constant *PersonalityFn) const {
18947 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18948}
18949
18950bool
18952 EVT VT , unsigned DefinedValues) const {
18953 if (VT == MVT::v2i64)
18954 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18955
18956 if (Subtarget.hasVSX())
18957 return true;
18958
18960}
18961
18963 if (DisableILPPref || Subtarget.enableMachineScheduler())
18965
18966 return Sched::ILP;
18967}
18968
18969// Create a fast isel object.
18970FastISel *
18972 const TargetLibraryInfo *LibInfo) const {
18973 return PPC::createFastISel(FuncInfo, LibInfo);
18974}
18975
18976// 'Inverted' means the FMA opcode after negating one multiplicand.
18977// For example, (fma -a b c) = (fnmsub a b c)
18978static unsigned invertFMAOpcode(unsigned Opc) {
18979 switch (Opc) {
18980 default:
18981 llvm_unreachable("Invalid FMA opcode for PowerPC!");
18982 case ISD::FMA:
18983 return PPCISD::FNMSUB;
18984 case PPCISD::FNMSUB:
18985 return ISD::FMA;
18986 }
18987}
18988
18990 bool LegalOps, bool OptForSize,
18992 unsigned Depth) const {
18994 return SDValue();
18995
18996 unsigned Opc = Op.getOpcode();
18997 EVT VT = Op.getValueType();
18998 SDNodeFlags Flags = Op.getNode()->getFlags();
18999
19000 switch (Opc) {
19001 case PPCISD::FNMSUB:
19002 if (!Op.hasOneUse() || !isTypeLegal(VT))
19003 break;
19004
19006 SDValue N0 = Op.getOperand(0);
19007 SDValue N1 = Op.getOperand(1);
19008 SDValue N2 = Op.getOperand(2);
19009 SDLoc Loc(Op);
19010
19012 SDValue NegN2 =
19013 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19014
19015 if (!NegN2)
19016 return SDValue();
19017
19018 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19019 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19020 // These transformations may change sign of zeroes. For example,
19021 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19022 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
19023 // Try and choose the cheaper one to negate.
19025 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19026 N0Cost, Depth + 1);
19027
19029 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19030 N1Cost, Depth + 1);
19031
19032 if (NegN0 && N0Cost <= N1Cost) {
19033 Cost = std::min(N0Cost, N2Cost);
19034 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19035 } else if (NegN1) {
19036 Cost = std::min(N1Cost, N2Cost);
19037 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19038 }
19039 }
19040
19041 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19042 if (isOperationLegal(ISD::FMA, VT)) {
19043 Cost = N2Cost;
19044 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19045 }
19046
19047 break;
19048 }
19049
19050 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19051 Cost, Depth);
19052}
19053
19054// Override to enable LOAD_STACK_GUARD lowering on Linux.
19056 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19057 return true;
19059}
19060
19062 bool ForCodeSize) const {
19063 if (!VT.isSimple() || !Subtarget.hasVSX())
19064 return false;
19065
19066 switch(VT.getSimpleVT().SimpleTy) {
19067 default:
19068 // For FP types that are currently not supported by PPC backend, return
19069 // false. Examples: f16, f80.
19070 return false;
19071 case MVT::f32:
19072 case MVT::f64: {
19073 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19074 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19075 return true;
19076 }
19077 bool IsExact;
19078 APSInt IntResult(16, false);
19079 // The rounding mode doesn't really matter because we only care about floats
19080 // that can be converted to integers exactly.
19081 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19082 // For exact values in the range [-16, 15] we can materialize the float.
19083 if (IsExact && IntResult <= 15 && IntResult >= -16)
19084 return true;
19085 return Imm.isZero();
19086 }
19087 case MVT::ppcf128:
19088 return Imm.isPosZero();
19089 }
19090}
19091
19092// For vector shift operation op, fold
19093// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19095 SelectionDAG &DAG) {
19096 SDValue N0 = N->getOperand(0);
19097 SDValue N1 = N->getOperand(1);
19098 EVT VT = N0.getValueType();
19099 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19100 unsigned Opcode = N->getOpcode();
19101 unsigned TargetOpcode;
19102
19103 switch (Opcode) {
19104 default:
19105 llvm_unreachable("Unexpected shift operation");
19106 case ISD::SHL:
19107 TargetOpcode = PPCISD::SHL;
19108 break;
19109 case ISD::SRL:
19110 TargetOpcode = PPCISD::SRL;
19111 break;
19112 case ISD::SRA:
19113 TargetOpcode = PPCISD::SRA;
19114 break;
19115 }
19116
19117 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19118 N1->getOpcode() == ISD::AND)
19119 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19120 if (Mask->getZExtValue() == OpSizeInBits - 1)
19121 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19122
19123 return SDValue();
19124}
19125
19126SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19127 DAGCombinerInfo &DCI) const {
19128 EVT VT = N->getValueType(0);
19129 assert(VT.isVector() && "Vector type expected.");
19130
19131 unsigned Opc = N->getOpcode();
19132 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19133 "Unexpected opcode.");
19134
19135 if (!isOperationLegal(Opc, VT))
19136 return SDValue();
19137
19138 EVT EltTy = VT.getScalarType();
19139 unsigned EltBits = EltTy.getSizeInBits();
19140 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19141 return SDValue();
19142
19143 SDValue N1 = N->getOperand(1);
19144 uint64_t SplatBits = 0;
19145 bool AddSplatCase = false;
19146 unsigned OpcN1 = N1.getOpcode();
19147 if (OpcN1 == PPCISD::VADD_SPLAT &&
19149 AddSplatCase = true;
19150 SplatBits = N1.getConstantOperandVal(0);
19151 }
19152
19153 if (!AddSplatCase) {
19154 if (OpcN1 != ISD::BUILD_VECTOR)
19155 return SDValue();
19156
19157 unsigned SplatBitSize;
19158 bool HasAnyUndefs;
19159 APInt APSplatBits, APSplatUndef;
19160 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19161 bool BVNIsConstantSplat =
19162 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19163 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19164 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19165 return SDValue();
19166 SplatBits = APSplatBits.getZExtValue();
19167 }
19168
19169 SDLoc DL(N);
19170 SDValue N0 = N->getOperand(0);
19171 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19172 // shift vector, which means the max value is 31/63. A shift vector of all
19173 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19174 // -16 to 15 range.
19175 if (SplatBits == (EltBits - 1)) {
19176 unsigned NewOpc;
19177 switch (Opc) {
19178 case ISD::SHL:
19179 NewOpc = PPCISD::SHL;
19180 break;
19181 case ISD::SRL:
19182 NewOpc = PPCISD::SRL;
19183 break;
19184 case ISD::SRA:
19185 NewOpc = PPCISD::SRA;
19186 break;
19187 }
19188 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19189 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19190 }
19191
19192 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19193 return SDValue();
19194
19195 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19196 // before the BUILD_VECTOR is replaced by a load.
19197 if (EltTy != MVT::i64 || SplatBits != 1)
19198 return SDValue();
19199
19200 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19201}
19202
19203SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19204 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19205 return Value;
19206
19207 if (N->getValueType(0).isVector())
19208 return combineVectorShift(N, DCI);
19209
19210 SDValue N0 = N->getOperand(0);
19211 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19212 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19213 N0.getOpcode() != ISD::SIGN_EXTEND ||
19214 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19215 N->getValueType(0) != MVT::i64)
19216 return SDValue();
19217
19218 // We can't save an operation here if the value is already extended, and
19219 // the existing shift is easier to combine.
19220 SDValue ExtsSrc = N0.getOperand(0);
19221 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19222 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19223 return SDValue();
19224
19225 SDLoc DL(N0);
19226 SDValue ShiftBy = SDValue(CN1, 0);
19227 // We want the shift amount to be i32 on the extswli, but the shift could
19228 // have an i64.
19229 if (ShiftBy.getValueType() == MVT::i64)
19230 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19231
19232 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19233 ShiftBy);
19234}
19235
19236SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19237 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19238 return Value;
19239
19240 if (N->getValueType(0).isVector())
19241 return combineVectorShift(N, DCI);
19242
19243 return SDValue();
19244}
19245
19246SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19247 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19248 return Value;
19249
19250 if (N->getValueType(0).isVector())
19251 return combineVectorShift(N, DCI);
19252
19253 return SDValue();
19254}
19255
19256// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19257// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19258// When C is zero, the equation (addi Z, -C) can be simplified to Z
19259// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19261 const PPCSubtarget &Subtarget) {
19262 if (!Subtarget.isPPC64())
19263 return SDValue();
19264
19265 SDValue LHS = N->getOperand(0);
19266 SDValue RHS = N->getOperand(1);
19267
19268 auto isZextOfCompareWithConstant = [](SDValue Op) {
19269 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19270 Op.getValueType() != MVT::i64)
19271 return false;
19272
19273 SDValue Cmp = Op.getOperand(0);
19274 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19275 Cmp.getOperand(0).getValueType() != MVT::i64)
19276 return false;
19277
19278 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19279 int64_t NegConstant = 0 - Constant->getSExtValue();
19280 // Due to the limitations of the addi instruction,
19281 // -C is required to be [-32768, 32767].
19282 return isInt<16>(NegConstant);
19283 }
19284
19285 return false;
19286 };
19287
19288 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19289 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19290
19291 // If there is a pattern, canonicalize a zext operand to the RHS.
19292 if (LHSHasPattern && !RHSHasPattern)
19293 std::swap(LHS, RHS);
19294 else if (!LHSHasPattern && !RHSHasPattern)
19295 return SDValue();
19296
19297 SDLoc DL(N);
19298 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19299 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19300 SDValue Cmp = RHS.getOperand(0);
19301 SDValue Z = Cmp.getOperand(0);
19302 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19303 int64_t NegConstant = 0 - Constant->getSExtValue();
19304
19305 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19306 default: break;
19307 case ISD::SETNE: {
19308 // when C == 0
19309 // --> addze X, (addic Z, -1).carry
19310 // /
19311 // add X, (zext(setne Z, C))--
19312 // \ when -32768 <= -C <= 32767 && C != 0
19313 // --> addze X, (addic (addi Z, -C), -1).carry
19314 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19315 DAG.getConstant(NegConstant, DL, MVT::i64));
19316 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19317 SDValue Addc =
19318 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19319 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19320 DAG.getConstant(0, DL, CarryType));
19321 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19322 DAG.getConstant(0, DL, MVT::i64),
19323 SDValue(Addc.getNode(), 1));
19324 }
19325 case ISD::SETEQ: {
19326 // when C == 0
19327 // --> addze X, (subfic Z, 0).carry
19328 // /
19329 // add X, (zext(sete Z, C))--
19330 // \ when -32768 <= -C <= 32767 && C != 0
19331 // --> addze X, (subfic (addi Z, -C), 0).carry
19332 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19333 DAG.getConstant(NegConstant, DL, MVT::i64));
19334 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19335 SDValue Subc =
19336 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19337 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19338 DAG.getConstant(0, DL, CarryType));
19339 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19340 DAG.getConstant(1UL, DL, CarryType));
19341 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19342 DAG.getConstant(0, DL, MVT::i64), Invert);
19343 }
19344 }
19345
19346 return SDValue();
19347}
19348
19349// Transform
19350// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19351// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19352// In this case both C1 and C2 must be known constants.
19353// C1+C2 must fit into a 34 bit signed integer.
19355 const PPCSubtarget &Subtarget) {
19356 if (!Subtarget.isUsingPCRelativeCalls())
19357 return SDValue();
19358
19359 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19360 // If we find that node try to cast the Global Address and the Constant.
19361 SDValue LHS = N->getOperand(0);
19362 SDValue RHS = N->getOperand(1);
19363
19364 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19365 std::swap(LHS, RHS);
19366
19367 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19368 return SDValue();
19369
19370 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19373
19374 // Check that both casts succeeded.
19375 if (!GSDN || !ConstNode)
19376 return SDValue();
19377
19378 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19379 SDLoc DL(GSDN);
19380
19381 // The signed int offset needs to fit in 34 bits.
19382 if (!isInt<34>(NewOffset))
19383 return SDValue();
19384
19385 // The new global address is a copy of the old global address except
19386 // that it has the updated Offset.
19387 SDValue GA =
19388 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19389 NewOffset, GSDN->getTargetFlags());
19390 SDValue MatPCRel =
19391 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19392 return MatPCRel;
19393}
19394
19395// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19396// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19397// Mathematical identity: X + 1 = X - (-1)
19398// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19399// Requirement: VSX feature for efficient xxleqv generation
19401 const PPCSubtarget &Subtarget) {
19402
19403 EVT VT = N->getValueType(0);
19404 if (!Subtarget.hasVSX())
19405 return SDValue();
19406
19407 // Handle v2i64, v4i32, v8i16 and v16i8 types
19408 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19409 VT == MVT::v2i64))
19410 return SDValue();
19411
19412 SDValue LHS = N->getOperand(0);
19413 SDValue RHS = N->getOperand(1);
19414
19415 // Check if RHS is BUILD_VECTOR
19416 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19417 return SDValue();
19418
19419 // Check if all the elements are 1
19420 unsigned NumOfEles = RHS.getNumOperands();
19421 for (unsigned i = 0; i < NumOfEles; ++i) {
19422 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
19423 if (!CN || CN->getSExtValue() != 1)
19424 return SDValue();
19425 }
19426 SDLoc DL(N);
19427
19428 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
19429 SmallVector<SDValue, 4> Ops(4, MinusOne);
19430 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
19431
19432 // Bitcast to the target vector type
19433 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
19434
19435 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
19436}
19437
19438SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19439 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19440 return Value;
19441
19442 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19443 return Value;
19444
19445 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
19446 return Value;
19447 return SDValue();
19448}
19449
19450// Detect TRUNCATE operations on bitcasts of float128 values.
19451// What we are looking for here is the situtation where we extract a subset
19452// of bits from a 128 bit float.
19453// This can be of two forms:
19454// 1) BITCAST of f128 feeding TRUNCATE
19455// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19456// The reason this is required is because we do not have a legal i128 type
19457// and so we want to prevent having to store the f128 and then reload part
19458// of it.
19459SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19460 DAGCombinerInfo &DCI) const {
19461 // If we are using CRBits then try that first.
19462 if (Subtarget.useCRBits()) {
19463 // Check if CRBits did anything and return that if it did.
19464 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19465 return CRTruncValue;
19466 }
19467
19468 SDLoc dl(N);
19469 SDValue Op0 = N->getOperand(0);
19470
19471 // Looking for a truncate of i128 to i64.
19472 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19473 return SDValue();
19474
19475 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19476
19477 // SRL feeding TRUNCATE.
19478 if (Op0.getOpcode() == ISD::SRL) {
19479 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19480 // The right shift has to be by 64 bits.
19481 if (!ConstNode || ConstNode->getZExtValue() != 64)
19482 return SDValue();
19483
19484 // Switch the element number to extract.
19485 EltToExtract = EltToExtract ? 0 : 1;
19486 // Update Op0 past the SRL.
19487 Op0 = Op0.getOperand(0);
19488 }
19489
19490 // BITCAST feeding a TRUNCATE possibly via SRL.
19491 if (Op0.getOpcode() == ISD::BITCAST &&
19492 Op0.getValueType() == MVT::i128 &&
19493 Op0.getOperand(0).getValueType() == MVT::f128) {
19494 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19495 return DCI.DAG.getNode(
19496 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19497 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19498 }
19499 return SDValue();
19500}
19501
19502SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19503 SelectionDAG &DAG = DCI.DAG;
19504
19505 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19506 if (!ConstOpOrElement)
19507 return SDValue();
19508
19509 // An imul is usually smaller than the alternative sequence for legal type.
19511 isOperationLegal(ISD::MUL, N->getValueType(0)))
19512 return SDValue();
19513
19514 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19515 switch (this->Subtarget.getCPUDirective()) {
19516 default:
19517 // TODO: enhance the condition for subtarget before pwr8
19518 return false;
19519 case PPC::DIR_PWR8:
19520 // type mul add shl
19521 // scalar 4 1 1
19522 // vector 7 2 2
19523 return true;
19524 case PPC::DIR_PWR9:
19525 case PPC::DIR_PWR10:
19526 case PPC::DIR_PWR11:
19528 // type mul add shl
19529 // scalar 5 2 2
19530 // vector 7 2 2
19531
19532 // The cycle RATIO of related operations are showed as a table above.
19533 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19534 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19535 // are 4, it is always profitable; but for 3 instrs patterns
19536 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19537 // So we should only do it for vector type.
19538 return IsAddOne && IsNeg ? VT.isVector() : true;
19539 }
19540 };
19541
19542 EVT VT = N->getValueType(0);
19543 SDLoc DL(N);
19544
19545 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19546 bool IsNeg = MulAmt.isNegative();
19547 APInt MulAmtAbs = MulAmt.abs();
19548
19549 if ((MulAmtAbs - 1).isPowerOf2()) {
19550 // (mul x, 2^N + 1) => (add (shl x, N), x)
19551 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19552
19553 if (!IsProfitable(IsNeg, true, VT))
19554 return SDValue();
19555
19556 SDValue Op0 = N->getOperand(0);
19557 SDValue Op1 =
19558 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19559 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19560 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19561
19562 if (!IsNeg)
19563 return Res;
19564
19565 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19566 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19567 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19568 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19569
19570 if (!IsProfitable(IsNeg, false, VT))
19571 return SDValue();
19572
19573 SDValue Op0 = N->getOperand(0);
19574 SDValue Op1 =
19575 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19576 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19577
19578 if (!IsNeg)
19579 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19580 else
19581 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19582
19583 } else {
19584 return SDValue();
19585 }
19586}
19587
19588// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19589// in combiner since we need to check SD flags and other subtarget features.
19590SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19591 DAGCombinerInfo &DCI) const {
19592 SDValue N0 = N->getOperand(0);
19593 SDValue N1 = N->getOperand(1);
19594 SDValue N2 = N->getOperand(2);
19595 SDNodeFlags Flags = N->getFlags();
19596 EVT VT = N->getValueType(0);
19597 SelectionDAG &DAG = DCI.DAG;
19598 const TargetOptions &Options = getTargetMachine().Options;
19599 unsigned Opc = N->getOpcode();
19601 bool LegalOps = !DCI.isBeforeLegalizeOps();
19602 SDLoc Loc(N);
19603
19604 if (!isOperationLegal(ISD::FMA, VT))
19605 return SDValue();
19606
19607 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19608 // since (fnmsub a b c)=-0 while c-ab=+0.
19609 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19610 return SDValue();
19611
19612 // (fma (fneg a) b c) => (fnmsub a b c)
19613 // (fnmsub (fneg a) b c) => (fma a b c)
19614 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19615 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19616
19617 // (fma a (fneg b) c) => (fnmsub a b c)
19618 // (fnmsub a (fneg b) c) => (fma a b c)
19619 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19620 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19621
19622 return SDValue();
19623}
19624
19625bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19626 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19627 if (!Subtarget.is64BitELFABI())
19628 return false;
19629
19630 // If not a tail call then no need to proceed.
19631 if (!CI->isTailCall())
19632 return false;
19633
19634 // If sibling calls have been disabled and tail-calls aren't guaranteed
19635 // there is no reason to duplicate.
19636 auto &TM = getTargetMachine();
19637 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19638 return false;
19639
19640 // Can't tail call a function called indirectly, or if it has variadic args.
19641 const Function *Callee = CI->getCalledFunction();
19642 if (!Callee || Callee->isVarArg())
19643 return false;
19644
19645 // Make sure the callee and caller calling conventions are eligible for tco.
19646 const Function *Caller = CI->getParent()->getParent();
19647 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19648 CI->getCallingConv()))
19649 return false;
19650
19651 // If the function is local then we have a good chance at tail-calling it
19652 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19653}
19654
19655bool PPCTargetLowering::
19656isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19657 const Value *Mask = AndI.getOperand(1);
19658 // If the mask is suitable for andi. or andis. we should sink the and.
19659 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19660 // Can't handle constants wider than 64-bits.
19661 if (CI->getBitWidth() > 64)
19662 return false;
19663 int64_t ConstVal = CI->getZExtValue();
19664 return isUInt<16>(ConstVal) ||
19665 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19666 }
19667
19668 // For non-constant masks, we can always use the record-form and.
19669 return true;
19670}
19671
19672/// getAddrModeForFlags - Based on the set of address flags, select the most
19673/// optimal instruction format to match by.
19674PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19675 // This is not a node we should be handling here.
19676 if (Flags == PPC::MOF_None)
19677 return PPC::AM_None;
19678 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19679 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
19680 if ((Flags & FlagSet) == FlagSet)
19681 return PPC::AM_DForm;
19682 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
19683 if ((Flags & FlagSet) == FlagSet)
19684 return PPC::AM_DSForm;
19685 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
19686 if ((Flags & FlagSet) == FlagSet)
19687 return PPC::AM_DQForm;
19688 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
19689 if ((Flags & FlagSet) == FlagSet)
19690 return PPC::AM_PrefixDForm;
19691 // If no other forms are selected, return an X-Form as it is the most
19692 // general addressing mode.
19693 return PPC::AM_XForm;
19694}
19695
19696/// Set alignment flags based on whether or not the Frame Index is aligned.
19697/// Utilized when computing flags for address computation when selecting
19698/// load and store instructions.
19699static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19700 SelectionDAG &DAG) {
19701 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19702 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
19703 if (!FI)
19704 return;
19706 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
19707 // If this is (add $FI, $S16Imm), the alignment flags are already set
19708 // based on the immediate. We just need to clear the alignment flags
19709 // if the FI alignment is weaker.
19710 if ((FrameIndexAlign % 4) != 0)
19711 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19712 if ((FrameIndexAlign % 16) != 0)
19713 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19714 // If the address is a plain FrameIndex, set alignment flags based on
19715 // FI alignment.
19716 if (!IsAdd) {
19717 if ((FrameIndexAlign % 4) == 0)
19718 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19719 if ((FrameIndexAlign % 16) == 0)
19720 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19721 }
19722}
19723
19724/// Given a node, compute flags that are used for address computation when
19725/// selecting load and store instructions. The flags computed are stored in
19726/// FlagSet. This function takes into account whether the node is a constant,
19727/// an ADD, OR, or a constant, and computes the address flags accordingly.
19728static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19729 SelectionDAG &DAG) {
19730 // Set the alignment flags for the node depending on if the node is
19731 // 4-byte or 16-byte aligned.
19732 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19733 if ((Imm & 0x3) == 0)
19734 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19735 if ((Imm & 0xf) == 0)
19736 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19737 };
19738
19740 // All 32-bit constants can be computed as LIS + Disp.
19741 const APInt &ConstImm = CN->getAPIntValue();
19742 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
19743 FlagSet |= PPC::MOF_AddrIsSImm32;
19744 SetAlignFlagsForImm(ConstImm.getZExtValue());
19745 setAlignFlagsForFI(N, FlagSet, DAG);
19746 }
19747 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
19748 FlagSet |= PPC::MOF_RPlusSImm34;
19749 else // Let constant materialization handle large constants.
19750 FlagSet |= PPC::MOF_NotAddNorCst;
19751 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19752 // This address can be represented as an addition of:
19753 // - Register + Imm16 (possibly a multiple of 4/16)
19754 // - Register + Imm34
19755 // - Register + PPCISD::Lo
19756 // - Register + Register
19757 // In any case, we won't have to match this as Base + Zero.
19758 SDValue RHS = N.getOperand(1);
19760 const APInt &ConstImm = CN->getAPIntValue();
19761 if (ConstImm.isSignedIntN(16)) {
19762 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19763 SetAlignFlagsForImm(ConstImm.getZExtValue());
19764 setAlignFlagsForFI(N, FlagSet, DAG);
19765 }
19766 if (ConstImm.isSignedIntN(34))
19767 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19768 else
19769 FlagSet |= PPC::MOF_RPlusR; // Register.
19770 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
19771 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19772 else
19773 FlagSet |= PPC::MOF_RPlusR;
19774 } else { // The address computation is not a constant or an addition.
19775 setAlignFlagsForFI(N, FlagSet, DAG);
19776 FlagSet |= PPC::MOF_NotAddNorCst;
19777 }
19778}
19779
19780static bool isPCRelNode(SDValue N) {
19781 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
19786}
19787
19788/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19789/// the address flags of the load/store instruction that is to be matched.
19790unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19791 SelectionDAG &DAG) const {
19792 unsigned FlagSet = PPC::MOF_None;
19793
19794 // Compute subtarget flags.
19795 if (!Subtarget.hasP9Vector())
19796 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19797 else
19798 FlagSet |= PPC::MOF_SubtargetP9;
19799
19800 if (Subtarget.hasPrefixInstrs())
19801 FlagSet |= PPC::MOF_SubtargetP10;
19802
19803 if (Subtarget.hasSPE())
19804 FlagSet |= PPC::MOF_SubtargetSPE;
19805
19806 // Check if we have a PCRel node and return early.
19807 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19808 return FlagSet;
19809
19810 // If the node is the paired load/store intrinsics, compute flags for
19811 // address computation and return early.
19812 unsigned ParentOp = Parent->getOpcode();
19813 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19814 (ParentOp == ISD::INTRINSIC_VOID))) {
19815 unsigned ID = Parent->getConstantOperandVal(1);
19816 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19817 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19818 ? Parent->getOperand(2)
19819 : Parent->getOperand(3);
19820 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
19821 FlagSet |= PPC::MOF_Vector;
19822 return FlagSet;
19823 }
19824 }
19825
19826 // Mark this as something we don't want to handle here if it is atomic
19827 // or pre-increment instruction.
19828 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
19829 if (LSB->isIndexed())
19830 return PPC::MOF_None;
19831
19832 // Compute in-memory type flags. This is based on if there are scalars,
19833 // floats or vectors.
19834 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
19835 assert(MN && "Parent should be a MemSDNode!");
19836 EVT MemVT = MN->getMemoryVT();
19837 unsigned Size = MemVT.getSizeInBits();
19838 if (MemVT.isScalarInteger()) {
19839 assert(Size <= 128 &&
19840 "Not expecting scalar integers larger than 16 bytes!");
19841 if (Size < 32)
19842 FlagSet |= PPC::MOF_SubWordInt;
19843 else if (Size == 32)
19844 FlagSet |= PPC::MOF_WordInt;
19845 else
19846 FlagSet |= PPC::MOF_DoubleWordInt;
19847 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19848 if (Size == 128)
19849 FlagSet |= PPC::MOF_Vector;
19850 else if (Size == 256) {
19851 assert(Subtarget.pairedVectorMemops() &&
19852 "256-bit vectors are only available when paired vector memops is "
19853 "enabled!");
19854 FlagSet |= PPC::MOF_Vector;
19855 } else
19856 llvm_unreachable("Not expecting illegal vectors!");
19857 } else { // Floating point type: can be scalar, f128 or vector types.
19858 if (Size == 32 || Size == 64)
19859 FlagSet |= PPC::MOF_ScalarFloat;
19860 else if (MemVT == MVT::f128 || MemVT.isVector())
19861 FlagSet |= PPC::MOF_Vector;
19862 else
19863 llvm_unreachable("Not expecting illegal scalar floats!");
19864 }
19865
19866 // Compute flags for address computation.
19867 computeFlagsForAddressComputation(N, FlagSet, DAG);
19868
19869 // Compute type extension flags.
19870 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
19871 switch (LN->getExtensionType()) {
19872 case ISD::SEXTLOAD:
19873 FlagSet |= PPC::MOF_SExt;
19874 break;
19875 case ISD::EXTLOAD:
19876 case ISD::ZEXTLOAD:
19877 FlagSet |= PPC::MOF_ZExt;
19878 break;
19879 case ISD::NON_EXTLOAD:
19880 FlagSet |= PPC::MOF_NoExt;
19881 break;
19882 }
19883 } else
19884 FlagSet |= PPC::MOF_NoExt;
19885
19886 // For integers, no extension is the same as zero extension.
19887 // We set the extension mode to zero extension so we don't have
19888 // to add separate entries in AddrModesMap for loads and stores.
19889 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19890 FlagSet |= PPC::MOF_ZExt;
19891 FlagSet &= ~PPC::MOF_NoExt;
19892 }
19893
19894 // If we don't have prefixed instructions, 34-bit constants should be
19895 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19896 bool IsNonP1034BitConst =
19898 FlagSet) == PPC::MOF_RPlusSImm34;
19899 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19900 IsNonP1034BitConst)
19901 FlagSet |= PPC::MOF_NotAddNorCst;
19902
19903 return FlagSet;
19904}
19905
19906/// SelectForceXFormMode - Given the specified address, force it to be
19907/// represented as an indexed [r+r] operation (an XForm instruction).
19909 SDValue &Base,
19910 SelectionDAG &DAG) const {
19911
19913 int16_t ForceXFormImm = 0;
19914 if (provablyDisjointOr(DAG, N) &&
19915 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
19916 Disp = N.getOperand(0);
19917 Base = N.getOperand(1);
19918 return Mode;
19919 }
19920
19921 // If the address is the result of an add, we will utilize the fact that the
19922 // address calculation includes an implicit add. However, we can reduce
19923 // register pressure if we do not materialize a constant just for use as the
19924 // index register. We only get rid of the add if it is not an add of a
19925 // value and a 16-bit signed constant and both have a single use.
19926 if (N.getOpcode() == ISD::ADD &&
19927 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
19928 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
19929 Disp = N.getOperand(0);
19930 Base = N.getOperand(1);
19931 return Mode;
19932 }
19933
19934 // Otherwise, use R0 as the base register.
19935 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19936 N.getValueType());
19937 Base = N;
19938
19939 return Mode;
19940}
19941
19943 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19944 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19945 EVT ValVT = Val.getValueType();
19946 // If we are splitting a scalar integer into f64 parts (i.e. so they
19947 // can be placed into VFRC registers), we need to zero extend and
19948 // bitcast the values. This will ensure the value is placed into a
19949 // VSR using direct moves or stack operations as needed.
19950 if (PartVT == MVT::f64 &&
19951 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
19952 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
19953 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
19954 Parts[0] = Val;
19955 return true;
19956 }
19957 return false;
19958}
19959
19960SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19961 SelectionDAG &DAG) const {
19962 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19964 EVT RetVT = Op.getValueType();
19965 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
19966 SDValue Callee =
19967 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
19968 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
19970 for (const SDValue &N : Op->op_values()) {
19971 EVT ArgVT = N.getValueType();
19972 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
19973 TargetLowering::ArgListEntry Entry(N, ArgTy);
19974 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
19975 Entry.IsZExt = !Entry.IsSExt;
19976 Args.push_back(Entry);
19977 }
19978
19979 SDValue InChain = DAG.getEntryNode();
19980 SDValue TCChain = InChain;
19981 const Function &F = DAG.getMachineFunction().getFunction();
19982 bool isTailCall =
19983 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
19984 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
19985 if (isTailCall)
19986 InChain = TCChain;
19987 CLI.setDebugLoc(SDLoc(Op))
19988 .setChain(InChain)
19989 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
19990 .setTailCall(isTailCall)
19991 .setSExtResult(SignExtend)
19992 .setZExtResult(!SignExtend)
19994 return TLI.LowerCallTo(CLI).first;
19995}
19996
19997SDValue PPCTargetLowering::lowerLibCallBasedOnType(
19998 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
19999 SelectionDAG &DAG) const {
20000 if (Op.getValueType() == MVT::f32)
20001 return lowerToLibCall(LibCallFloatName, Op, DAG);
20002
20003 if (Op.getValueType() == MVT::f64)
20004 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20005
20006 return SDValue();
20007}
20008
20009bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20010 SDNodeFlags Flags = Op.getNode()->getFlags();
20011 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20012 Flags.hasNoNaNs() && Flags.hasNoInfs();
20013}
20014
20015bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20016 return Op.getNode()->getFlags().hasApproximateFuncs();
20017}
20018
20019bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20021}
20022
20023SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20024 const char *LibCallFloatName,
20025 const char *LibCallDoubleNameFinite,
20026 const char *LibCallFloatNameFinite,
20027 SDValue Op,
20028 SelectionDAG &DAG) const {
20029 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20030 return SDValue();
20031
20032 if (!isLowringToMASSFiniteSafe(Op))
20033 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20034 DAG);
20035
20036 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20037 LibCallDoubleNameFinite, Op, DAG);
20038}
20039
20040SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20041 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20042 "__xl_powf_finite", Op, DAG);
20043}
20044
20045SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20046 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20047 "__xl_sinf_finite", Op, DAG);
20048}
20049
20050SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20051 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20052 "__xl_cosf_finite", Op, DAG);
20053}
20054
20055SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20056 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20057 "__xl_logf_finite", Op, DAG);
20058}
20059
20060SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20061 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20062 "__xl_log10f_finite", Op, DAG);
20063}
20064
20065SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20066 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20067 "__xl_expf_finite", Op, DAG);
20068}
20069
20070// If we happen to match to an aligned D-Form, check if the Frame Index is
20071// adequately aligned. If it is not, reset the mode to match to X-Form.
20072static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20075 return;
20076 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20079}
20080
20081/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20082/// compute the address flags of the node, get the optimal address mode based
20083/// on the flags, and set the Base and Disp based on the address mode.
20085 SDValue N, SDValue &Disp,
20086 SDValue &Base,
20087 SelectionDAG &DAG,
20088 MaybeAlign Align) const {
20089 SDLoc DL(Parent);
20090
20091 // Compute the address flags.
20092 unsigned Flags = computeMOFlags(Parent, N, DAG);
20093
20094 // Get the optimal address mode based on the Flags.
20095 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20096
20097 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20098 // Select an X-Form load if it is not.
20099 setXFormForUnalignedFI(N, Flags, Mode);
20100
20101 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20102 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20103 assert(Subtarget.isUsingPCRelativeCalls() &&
20104 "Must be using PC-Relative calls when a valid PC-Relative node is "
20105 "present!");
20106 Mode = PPC::AM_PCRel;
20107 }
20108
20109 // Set Base and Disp accordingly depending on the address mode.
20110 switch (Mode) {
20111 case PPC::AM_DForm:
20112 case PPC::AM_DSForm:
20113 case PPC::AM_DQForm: {
20114 // This is a register plus a 16-bit immediate. The base will be the
20115 // register and the displacement will be the immediate unless it
20116 // isn't sufficiently aligned.
20117 if (Flags & PPC::MOF_RPlusSImm16) {
20118 SDValue Op0 = N.getOperand(0);
20119 SDValue Op1 = N.getOperand(1);
20120 int16_t Imm = Op1->getAsZExtVal();
20121 if (!Align || isAligned(*Align, Imm)) {
20122 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20123 Base = Op0;
20125 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20126 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20127 }
20128 break;
20129 }
20130 }
20131 // This is a register plus the @lo relocation. The base is the register
20132 // and the displacement is the global address.
20133 else if (Flags & PPC::MOF_RPlusLo) {
20134 Disp = N.getOperand(1).getOperand(0); // The global address.
20139 Base = N.getOperand(0);
20140 break;
20141 }
20142 // This is a constant address at most 32 bits. The base will be
20143 // zero or load-immediate-shifted and the displacement will be
20144 // the low 16 bits of the address.
20145 else if (Flags & PPC::MOF_AddrIsSImm32) {
20146 auto *CN = cast<ConstantSDNode>(N);
20147 EVT CNType = CN->getValueType(0);
20148 uint64_t CNImm = CN->getZExtValue();
20149 // If this address fits entirely in a 16-bit sext immediate field, codegen
20150 // this as "d, 0".
20151 int16_t Imm;
20152 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20153 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20154 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20155 CNType);
20156 break;
20157 }
20158 // Handle 32-bit sext immediate with LIS + Addr mode.
20159 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20160 (!Align || isAligned(*Align, CNImm))) {
20161 int32_t Addr = (int32_t)CNImm;
20162 // Otherwise, break this down into LIS + Disp.
20163 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20164 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20165 MVT::i32);
20166 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20167 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20168 break;
20169 }
20170 }
20171 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20172 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20174 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20175 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20176 } else
20177 Base = N;
20178 break;
20179 }
20180 case PPC::AM_PrefixDForm: {
20181 int64_t Imm34 = 0;
20182 unsigned Opcode = N.getOpcode();
20183 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20184 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20185 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20186 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20187 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20188 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20189 else
20190 Base = N.getOperand(0);
20191 } else if (isIntS34Immediate(N, Imm34)) {
20192 // The address is a 34-bit signed immediate.
20193 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20194 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20195 }
20196 break;
20197 }
20198 case PPC::AM_PCRel: {
20199 // When selecting PC-Relative instructions, "Base" is not utilized as
20200 // we select the address as [PC+imm].
20201 Disp = N;
20202 break;
20203 }
20204 case PPC::AM_None:
20205 break;
20206 default: { // By default, X-Form is always available to be selected.
20207 // When a frame index is not aligned, we also match by XForm.
20209 Base = FI ? N : N.getOperand(1);
20210 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20211 N.getValueType())
20212 : N.getOperand(0);
20213 break;
20214 }
20215 }
20216 return Mode;
20217}
20218
20220 bool Return,
20221 bool IsVarArg) const {
20222 switch (CC) {
20223 case CallingConv::Cold:
20224 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20225 default:
20226 return CC_PPC64_ELF;
20227 }
20228}
20229
20231 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20232}
20233
20236 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20237 if (shouldInlineQuadwordAtomics() && Size == 128)
20239
20240 switch (AI->getOperation()) {
20246 default:
20248 }
20249
20250 llvm_unreachable("unreachable atomicrmw operation");
20251}
20252
20260
20261static Intrinsic::ID
20263 switch (BinOp) {
20264 default:
20265 llvm_unreachable("Unexpected AtomicRMW BinOp");
20267 return Intrinsic::ppc_atomicrmw_xchg_i128;
20268 case AtomicRMWInst::Add:
20269 return Intrinsic::ppc_atomicrmw_add_i128;
20270 case AtomicRMWInst::Sub:
20271 return Intrinsic::ppc_atomicrmw_sub_i128;
20272 case AtomicRMWInst::And:
20273 return Intrinsic::ppc_atomicrmw_and_i128;
20274 case AtomicRMWInst::Or:
20275 return Intrinsic::ppc_atomicrmw_or_i128;
20276 case AtomicRMWInst::Xor:
20277 return Intrinsic::ppc_atomicrmw_xor_i128;
20279 return Intrinsic::ppc_atomicrmw_nand_i128;
20280 }
20281}
20282
20284 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20285 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20286 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20287 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20288 Type *ValTy = Incr->getType();
20289 assert(ValTy->getPrimitiveSizeInBits() == 128);
20290 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20291 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20292 Value *IncrHi =
20293 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20294 Value *LoHi = Builder.CreateIntrinsic(
20296 {AlignedAddr, IncrLo, IncrHi});
20297 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20298 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20299 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20300 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20301 return Builder.CreateOr(
20302 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20303}
20304
20306 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20307 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20308 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20309 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20310 Type *ValTy = CmpVal->getType();
20311 assert(ValTy->getPrimitiveSizeInBits() == 128);
20312 Function *IntCmpXchg =
20313 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20314 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20315 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20316 Value *CmpHi =
20317 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20318 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20319 Value *NewHi =
20320 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20321 emitLeadingFence(Builder, CI, Ord);
20322 Value *LoHi =
20323 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20324 emitTrailingFence(Builder, CI, Ord);
20325 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20326 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20327 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20328 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20329 return Builder.CreateOr(
20330 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20331}
20332
20334 return Subtarget.useCRBits();
20335}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
bool isDenormal() const
Definition APFloat.h:1432
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
APInt abs() const
Get the absolute value.
Definition APInt.h:1796
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1723
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
arg_iterator arg_begin()
Definition Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:191
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1078
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:180
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ TargetConstantPool
Definition ISDOpcodes.h:184
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ TargetExternalSymbol
Definition ISDOpcodes.h:185
@ TargetJumpTable
Definition ISDOpcodes.h:183
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:181
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison