LLVM 20.0.0git
PPCFastISel.cpp
Go to the documentation of this file.
1//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PowerPC-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// PPCGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCISelLowering.h"
20#include "PPCSubtarget.h"
29#include "llvm/IR/CallingConv.h"
32#include "llvm/IR/Operator.h"
34
35//===----------------------------------------------------------------------===//
36//
37// TBD:
38// fastLowerArguments: Handle simple cases.
39// PPCMaterializeGV: Handle TLS.
40// SelectCall: Handle function pointers.
41// SelectCall: Handle multi-register return values.
42// SelectCall: Optimize away nops for local calls.
43// processCallArgs: Handle bit-converted arguments.
44// finishCall: Handle multi-register return values.
45// PPCComputeAddress: Handle parameter references as FrameIndex's.
46// PPCEmitCmp: Handle immediate as operand 1.
47// SelectCall: Handle small byval arguments.
48// SelectIntrinsicCall: Implement.
49// SelectSelect: Implement.
50// Consider factoring isTypeLegal into the base class.
51// Implement switches and jump tables.
52//
53//===----------------------------------------------------------------------===//
54using namespace llvm;
55
56#define DEBUG_TYPE "ppcfastisel"
57
58namespace {
59
60struct Address {
61 enum {
62 RegBase,
63 FrameIndexBase
64 } BaseType;
65
66 union {
67 unsigned Reg;
68 int FI;
69 } Base;
70
71 int64_t Offset;
72
73 // Innocuous defaults for our address.
74 Address()
75 : BaseType(RegBase), Offset(0) {
76 Base.Reg = 0;
77 }
78};
79
80class PPCFastISel final : public FastISel {
81
82 const TargetMachine &TM;
83 const PPCSubtarget *Subtarget;
84 PPCFunctionInfo *PPCFuncInfo;
85 const TargetInstrInfo &TII;
86 const TargetLowering &TLI;
87 LLVMContext *Context;
88
89 public:
90 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
91 const TargetLibraryInfo *LibInfo)
92 : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
93 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
94 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
95 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
96 Context(&FuncInfo.Fn->getContext()) {}
97
98 // Backend specific FastISel code.
99 private:
100 bool fastSelectInstruction(const Instruction *I) override;
101 unsigned fastMaterializeConstant(const Constant *C) override;
102 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
103 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
104 const LoadInst *LI) override;
105 bool fastLowerArguments() override;
106 unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
107 unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
108 const TargetRegisterClass *RC,
109 unsigned Op0, uint64_t Imm);
110 unsigned fastEmitInst_r(unsigned MachineInstOpcode,
111 const TargetRegisterClass *RC, unsigned Op0);
112 unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
113 const TargetRegisterClass *RC,
114 unsigned Op0, unsigned Op1);
115
116 bool fastLowerCall(CallLoweringInfo &CLI) override;
117
118 // Instruction selection routines.
119 private:
120 bool SelectLoad(const Instruction *I);
121 bool SelectStore(const Instruction *I);
122 bool SelectBranch(const Instruction *I);
123 bool SelectIndirectBr(const Instruction *I);
124 bool SelectFPExt(const Instruction *I);
125 bool SelectFPTrunc(const Instruction *I);
126 bool SelectIToFP(const Instruction *I, bool IsSigned);
127 bool SelectFPToI(const Instruction *I, bool IsSigned);
128 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
129 bool SelectRet(const Instruction *I);
130 bool SelectTrunc(const Instruction *I);
131 bool SelectIntExt(const Instruction *I);
132
133 // Utility routines.
134 private:
135 bool isTypeLegal(Type *Ty, MVT &VT);
136 bool isLoadTypeLegal(Type *Ty, MVT &VT);
137 bool isValueAvailable(const Value *V) const;
138 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
139 return RC->getID() == PPC::VSFRCRegClassID;
140 }
141 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
142 return RC->getID() == PPC::VSSRCRegClassID;
143 }
144 unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
145 unsigned SrcReg, unsigned Flag = 0,
146 unsigned SubReg = 0) {
147 Register TmpReg = createResultReg(ToRC);
148 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
149 TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
150 return TmpReg;
151 }
152 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
153 bool isZExt, unsigned DestReg,
154 const PPC::Predicate Pred);
155 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
156 const TargetRegisterClass *RC, bool IsZExt = true,
157 unsigned FP64LoadOpc = PPC::LFD);
158 bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
159 bool PPCComputeAddress(const Value *Obj, Address &Addr);
160 void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
161 unsigned &IndexReg);
162 bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
163 unsigned DestReg, bool IsZExt);
164 unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
165 unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
166 unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
167 bool UseSExt = true);
168 unsigned PPCMaterialize32BitInt(int64_t Imm,
169 const TargetRegisterClass *RC);
170 unsigned PPCMaterialize64BitInt(int64_t Imm,
171 const TargetRegisterClass *RC);
172 unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
173 unsigned SrcReg, bool IsSigned);
174 unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
175
176 // Call handling routines.
177 private:
178 bool processCallArgs(SmallVectorImpl<Value*> &Args,
180 SmallVectorImpl<MVT> &ArgVTs,
184 unsigned &NumBytes,
185 bool IsVarArg);
186 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
187
188 private:
189 #include "PPCGenFastISel.inc"
190
191};
192
193} // end anonymous namespace
194
195static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
196 switch (Pred) {
197 // These are not representable with any single compare.
200 // Major concern about the following 6 cases is NaN result. The comparison
201 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
202 // only one of which will be set. The result is generated by fcmpu
203 // instruction. However, bc instruction only inspects one of the first 3
204 // bits, so when un is set, bc instruction may jump to an undesired
205 // place.
206 //
207 // More specifically, if we expect an unordered comparison and un is set, we
208 // expect to always go to true branch; in such case UEQ, UGT and ULT still
209 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
210 // since they are tested by inspecting !eq, !lt, !gt, respectively.
211 //
212 // Similarly, for ordered comparison, when un is set, we always expect the
213 // result to be false. In such case OGT, OLT and OEQ is good, since they are
214 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
215 // and ONE are tested through !lt, !gt and !eq, and these are true.
222 default:
223 return std::nullopt;
224
226 case CmpInst::ICMP_EQ:
227 return PPC::PRED_EQ;
228
232 return PPC::PRED_GT;
233
237 return PPC::PRED_GE;
238
242 return PPC::PRED_LT;
243
247 return PPC::PRED_LE;
248
250 case CmpInst::ICMP_NE:
251 return PPC::PRED_NE;
252
254 return PPC::PRED_NU;
255
257 return PPC::PRED_UN;
258 }
259}
260
261// Determine whether the type Ty is simple enough to be handled by
262// fast-isel, and return its equivalent machine type in VT.
263// FIXME: Copied directly from ARM -- factor into base class?
264bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
265 EVT Evt = TLI.getValueType(DL, Ty, true);
266
267 // Only handle simple types.
268 if (Evt == MVT::Other || !Evt.isSimple()) return false;
269 VT = Evt.getSimpleVT();
270
271 // Handle all legal types, i.e. a register that will directly hold this
272 // value.
273 return TLI.isTypeLegal(VT);
274}
275
276// Determine whether the type Ty is simple enough to be handled by
277// fast-isel as a load target, and return its equivalent machine type in VT.
278bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
279 if (isTypeLegal(Ty, VT)) return true;
280
281 // If this is a type than can be sign or zero-extended to a basic operation
282 // go ahead and accept it now.
283 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
284 return true;
285 }
286
287 return false;
288}
289
290bool PPCFastISel::isValueAvailable(const Value *V) const {
291 if (!isa<Instruction>(V))
292 return true;
293
294 const auto *I = cast<Instruction>(V);
295 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
296}
297
298// Given a value Obj, create an Address object Addr that represents its
299// address. Return false if we can't handle it.
300bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
301 const User *U = nullptr;
302 unsigned Opcode = Instruction::UserOp1;
303 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
304 // Don't walk into other basic blocks unless the object is an alloca from
305 // another block, otherwise it may not have a virtual register assigned.
306 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
307 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
308 Opcode = I->getOpcode();
309 U = I;
310 }
311 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
312 Opcode = C->getOpcode();
313 U = C;
314 }
315
316 switch (Opcode) {
317 default:
318 break;
319 case Instruction::BitCast:
320 // Look through bitcasts.
321 return PPCComputeAddress(U->getOperand(0), Addr);
322 case Instruction::IntToPtr:
323 // Look past no-op inttoptrs.
324 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
325 TLI.getPointerTy(DL))
326 return PPCComputeAddress(U->getOperand(0), Addr);
327 break;
328 case Instruction::PtrToInt:
329 // Look past no-op ptrtoints.
330 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
331 return PPCComputeAddress(U->getOperand(0), Addr);
332 break;
333 case Instruction::GetElementPtr: {
334 Address SavedAddr = Addr;
335 int64_t TmpOffset = Addr.Offset;
336
337 // Iterate through the GEP folding the constants into offsets where
338 // we can.
340 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
341 II != IE; ++II, ++GTI) {
342 const Value *Op = *II;
343 if (StructType *STy = GTI.getStructTypeOrNull()) {
344 const StructLayout *SL = DL.getStructLayout(STy);
345 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
346 TmpOffset += SL->getElementOffset(Idx);
347 } else {
349 for (;;) {
350 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
351 // Constant-offset addressing.
352 TmpOffset += CI->getSExtValue() * S;
353 break;
354 }
355 if (canFoldAddIntoGEP(U, Op)) {
356 // A compatible add with a constant operand. Fold the constant.
357 ConstantInt *CI =
358 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
359 TmpOffset += CI->getSExtValue() * S;
360 // Iterate on the other operand.
361 Op = cast<AddOperator>(Op)->getOperand(0);
362 continue;
363 }
364 // Unsupported
365 goto unsupported_gep;
366 }
367 }
368 }
369
370 // Try to grab the base operand now.
371 Addr.Offset = TmpOffset;
372 if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
373
374 // We failed, restore everything and try the other options.
375 Addr = SavedAddr;
376
377 unsupported_gep:
378 break;
379 }
380 case Instruction::Alloca: {
381 const AllocaInst *AI = cast<AllocaInst>(Obj);
383 FuncInfo.StaticAllocaMap.find(AI);
384 if (SI != FuncInfo.StaticAllocaMap.end()) {
385 Addr.BaseType = Address::FrameIndexBase;
386 Addr.Base.FI = SI->second;
387 return true;
388 }
389 break;
390 }
391 }
392
393 // FIXME: References to parameters fall through to the behavior
394 // below. They should be able to reference a frame index since
395 // they are stored to the stack, so we can get "ld rx, offset(r1)"
396 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
397 // just contain the parameter. Try to handle this with a FI.
398
399 // Try to get this in a register if nothing else has worked.
400 if (Addr.Base.Reg == 0)
401 Addr.Base.Reg = getRegForValue(Obj);
402
403 // Prevent assignment of base register to X0, which is inappropriate
404 // for loads and stores alike.
405 if (Addr.Base.Reg != 0)
406 MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
407
408 return Addr.Base.Reg != 0;
409}
410
411// Fix up some addresses that can't be used directly. For example, if
412// an offset won't fit in an instruction field, we may need to move it
413// into an index register.
414void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
415 unsigned &IndexReg) {
416
417 // Check whether the offset fits in the instruction field.
418 if (!isInt<16>(Addr.Offset))
419 UseOffset = false;
420
421 // If this is a stack pointer and the offset needs to be simplified then
422 // put the alloca address into a register, set the base type back to
423 // register and continue. This should almost never happen.
424 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
425 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
427 ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
428 Addr.Base.Reg = ResultReg;
429 Addr.BaseType = Address::RegBase;
430 }
431
432 if (!UseOffset) {
433 IntegerType *OffsetTy = Type::getInt64Ty(*Context);
434 const ConstantInt *Offset = ConstantInt::getSigned(OffsetTy, Addr.Offset);
435 IndexReg = PPCMaterializeInt(Offset, MVT::i64);
436 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
437 }
438}
439
440// Emit a load instruction if possible, returning true if we succeeded,
441// otherwise false. See commentary below for how the register class of
442// the load is determined.
443bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
444 const TargetRegisterClass *RC,
445 bool IsZExt, unsigned FP64LoadOpc) {
446 unsigned Opc;
447 bool UseOffset = true;
448 bool HasSPE = Subtarget->hasSPE();
449
450 // If ResultReg is given, it determines the register class of the load.
451 // Otherwise, RC is the register class to use. If the result of the
452 // load isn't anticipated in this block, both may be zero, in which
453 // case we must make a conservative guess. In particular, don't assign
454 // R0 or X0 to the result register, as the result may be used in a load,
455 // store, add-immediate, or isel that won't permit this. (Though
456 // perhaps the spill and reload of live-exit values would handle this?)
457 const TargetRegisterClass *UseRC =
458 (ResultReg ? MRI.getRegClass(ResultReg) :
459 (RC ? RC :
460 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
461 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
462 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
463 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
464
465 bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
466
467 switch (VT.SimpleTy) {
468 default: // e.g., vector types not handled
469 return false;
470 case MVT::i8:
471 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
472 break;
473 case MVT::i16:
474 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
475 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
476 break;
477 case MVT::i32:
478 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
479 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
480 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
481 UseOffset = false;
482 break;
483 case MVT::i64:
484 Opc = PPC::LD;
485 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
486 "64-bit load with 32-bit target??");
487 UseOffset = ((Addr.Offset & 3) == 0);
488 break;
489 case MVT::f32:
490 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
491 break;
492 case MVT::f64:
493 Opc = FP64LoadOpc;
494 break;
495 }
496
497 // If necessary, materialize the offset into a register and use
498 // the indexed form. Also handle stack pointers with special needs.
499 unsigned IndexReg = 0;
500 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
501
502 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
503 // be used.
504 bool IsVSSRC = isVSSRCRegClass(UseRC);
505 bool IsVSFRC = isVSFRCRegClass(UseRC);
506 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
507 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
508 if ((Is32VSXLoad || Is64VSXLoad) &&
509 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
510 (Addr.Offset == 0)) {
511 UseOffset = false;
512 }
513
514 if (ResultReg == 0)
515 ResultReg = createResultReg(UseRC);
516
517 // Note: If we still have a frame index here, we know the offset is
518 // in range, as otherwise PPCSimplifyAddress would have converted it
519 // into a RegBase.
520 if (Addr.BaseType == Address::FrameIndexBase) {
521 // VSX only provides an indexed load.
522 if (Is32VSXLoad || Is64VSXLoad) return false;
523
524 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
525 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
526 Addr.Offset),
527 MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
528 MFI.getObjectAlign(Addr.Base.FI));
529
530 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
531 .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
532
533 // Base reg with offset in range.
534 } else if (UseOffset) {
535 // VSX only provides an indexed load.
536 if (Is32VSXLoad || Is64VSXLoad) return false;
537
538 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
539 .addImm(Addr.Offset).addReg(Addr.Base.Reg);
540
541 // Indexed form.
542 } else {
543 // Get the RR opcode corresponding to the RI one. FIXME: It would be
544 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
545 // is hard to get at.
546 switch (Opc) {
547 default: llvm_unreachable("Unexpected opcode!");
548 case PPC::LBZ: Opc = PPC::LBZX; break;
549 case PPC::LBZ8: Opc = PPC::LBZX8; break;
550 case PPC::LHZ: Opc = PPC::LHZX; break;
551 case PPC::LHZ8: Opc = PPC::LHZX8; break;
552 case PPC::LHA: Opc = PPC::LHAX; break;
553 case PPC::LHA8: Opc = PPC::LHAX8; break;
554 case PPC::LWZ: Opc = PPC::LWZX; break;
555 case PPC::LWZ8: Opc = PPC::LWZX8; break;
556 case PPC::LWA: Opc = PPC::LWAX; break;
557 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
558 case PPC::LD: Opc = PPC::LDX; break;
559 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
560 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
561 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
562 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
563 }
564
565 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
566 ResultReg);
567
568 // If we have an index register defined we use it in the store inst,
569 // otherwise we use X0 as base as it makes the vector instructions to
570 // use zero in the computation of the effective address regardless the
571 // content of the register.
572 if (IndexReg)
573 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
574 else
575 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
576 }
577
578 return true;
579}
580
581// Attempt to fast-select a load instruction.
582bool PPCFastISel::SelectLoad(const Instruction *I) {
583 // FIXME: No atomic loads are supported.
584 if (cast<LoadInst>(I)->isAtomic())
585 return false;
586
587 // Verify we have a legal type before going any further.
588 MVT VT;
589 if (!isLoadTypeLegal(I->getType(), VT))
590 return false;
591
592 // See if we can handle this address.
594 if (!PPCComputeAddress(I->getOperand(0), Addr))
595 return false;
596
597 // Look at the currently assigned register for this instruction
598 // to determine the required register class. This is necessary
599 // to constrain RA from using R0/X0 when this is not legal.
600 Register AssignedReg = FuncInfo.ValueMap[I];
601 const TargetRegisterClass *RC =
602 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
603
604 Register ResultReg = 0;
605 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
606 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
607 return false;
608 updateValueMap(I, ResultReg);
609 return true;
610}
611
612// Emit a store instruction to store SrcReg at Addr.
613bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
614 assert(SrcReg && "Nothing to store!");
615 unsigned Opc;
616 bool UseOffset = true;
617
618 const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
619 bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
620
621 switch (VT.SimpleTy) {
622 default: // e.g., vector types not handled
623 return false;
624 case MVT::i8:
625 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
626 break;
627 case MVT::i16:
628 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
629 break;
630 case MVT::i32:
631 assert(Is32BitInt && "Not GPRC for i32??");
632 Opc = PPC::STW;
633 break;
634 case MVT::i64:
635 Opc = PPC::STD;
636 UseOffset = ((Addr.Offset & 3) == 0);
637 break;
638 case MVT::f32:
639 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
640 break;
641 case MVT::f64:
642 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
643 break;
644 }
645
646 // If necessary, materialize the offset into a register and use
647 // the indexed form. Also handle stack pointers with special needs.
648 unsigned IndexReg = 0;
649 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
650
651 // If this is a potential VSX store with an offset of 0, a VSX indexed store
652 // can be used.
653 bool IsVSSRC = isVSSRCRegClass(RC);
654 bool IsVSFRC = isVSFRCRegClass(RC);
655 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
656 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
657 if ((Is32VSXStore || Is64VSXStore) &&
658 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
659 (Addr.Offset == 0)) {
660 UseOffset = false;
661 }
662
663 // Note: If we still have a frame index here, we know the offset is
664 // in range, as otherwise PPCSimplifyAddress would have converted it
665 // into a RegBase.
666 if (Addr.BaseType == Address::FrameIndexBase) {
667 // VSX only provides an indexed store.
668 if (Is32VSXStore || Is64VSXStore) return false;
669
670 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
671 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
672 Addr.Offset),
673 MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
674 MFI.getObjectAlign(Addr.Base.FI));
675
676 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
677 .addReg(SrcReg)
678 .addImm(Addr.Offset)
679 .addFrameIndex(Addr.Base.FI)
680 .addMemOperand(MMO);
681
682 // Base reg with offset in range.
683 } else if (UseOffset) {
684 // VSX only provides an indexed store.
685 if (Is32VSXStore || Is64VSXStore)
686 return false;
687
688 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
689 .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
690
691 // Indexed form.
692 } else {
693 // Get the RR opcode corresponding to the RI one. FIXME: It would be
694 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
695 // is hard to get at.
696 switch (Opc) {
697 default: llvm_unreachable("Unexpected opcode!");
698 case PPC::STB: Opc = PPC::STBX; break;
699 case PPC::STH : Opc = PPC::STHX; break;
700 case PPC::STW : Opc = PPC::STWX; break;
701 case PPC::STB8: Opc = PPC::STBX8; break;
702 case PPC::STH8: Opc = PPC::STHX8; break;
703 case PPC::STW8: Opc = PPC::STWX8; break;
704 case PPC::STD: Opc = PPC::STDX; break;
705 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
706 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
707 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
708 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
709 }
710
711 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
712 .addReg(SrcReg);
713
714 // If we have an index register defined we use it in the store inst,
715 // otherwise we use X0 as base as it makes the vector instructions to
716 // use zero in the computation of the effective address regardless the
717 // content of the register.
718 if (IndexReg)
719 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
720 else
721 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
722 }
723
724 return true;
725}
726
727// Attempt to fast-select a store instruction.
728bool PPCFastISel::SelectStore(const Instruction *I) {
729 Value *Op0 = I->getOperand(0);
730 unsigned SrcReg = 0;
731
732 // FIXME: No atomics loads are supported.
733 if (cast<StoreInst>(I)->isAtomic())
734 return false;
735
736 // Verify we have a legal type before going any further.
737 MVT VT;
738 if (!isLoadTypeLegal(Op0->getType(), VT))
739 return false;
740
741 // Get the value to be stored into a register.
742 SrcReg = getRegForValue(Op0);
743 if (SrcReg == 0)
744 return false;
745
746 // See if we can handle this address.
748 if (!PPCComputeAddress(I->getOperand(1), Addr))
749 return false;
750
751 if (!PPCEmitStore(VT, SrcReg, Addr))
752 return false;
753
754 return true;
755}
756
757// Attempt to fast-select a branch instruction.
758bool PPCFastISel::SelectBranch(const Instruction *I) {
759 const BranchInst *BI = cast<BranchInst>(I);
760 MachineBasicBlock *BrBB = FuncInfo.MBB;
761 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
762 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
763
764 // For now, just try the simplest case where it's fed by a compare.
765 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
766 if (isValueAvailable(CI)) {
767 std::optional<PPC::Predicate> OptPPCPred =
768 getComparePred(CI->getPredicate());
769 if (!OptPPCPred)
770 return false;
771
772 PPC::Predicate PPCPred = *OptPPCPred;
773
774 // Take advantage of fall-through opportunities.
775 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
776 std::swap(TBB, FBB);
777 PPCPred = PPC::InvertPredicate(PPCPred);
778 }
779
780 Register CondReg = createResultReg(&PPC::CRRCRegClass);
781
782 if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
783 CondReg, PPCPred))
784 return false;
785
786 BuildMI(*BrBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCC))
787 .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
788 .addReg(CondReg)
789 .addMBB(TBB);
790 finishCondBranch(BI->getParent(), TBB, FBB);
791 return true;
792 }
793 } else if (const ConstantInt *CI =
794 dyn_cast<ConstantInt>(BI->getCondition())) {
795 uint64_t Imm = CI->getZExtValue();
796 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
797 fastEmitBranch(Target, MIMD.getDL());
798 return true;
799 }
800
801 // FIXME: ARM looks for a case where the block containing the compare
802 // has been split from the block containing the branch. If this happens,
803 // there is a vreg available containing the result of the compare. I'm
804 // not sure we can do much, as we've lost the predicate information with
805 // the compare instruction -- we have a 4-bit CR but don't know which bit
806 // to test here.
807 return false;
808}
809
810// Attempt to emit a compare of the two source values. Signed and unsigned
811// comparisons are supported. Return false if we can't handle it.
812bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
813 bool IsZExt, unsigned DestReg,
814 const PPC::Predicate Pred) {
815 Type *Ty = SrcValue1->getType();
816 EVT SrcEVT = TLI.getValueType(DL, Ty, true);
817 if (!SrcEVT.isSimple())
818 return false;
819 MVT SrcVT = SrcEVT.getSimpleVT();
820
821 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
822 return false;
823
824 // See if operand 2 is an immediate encodeable in the compare.
825 // FIXME: Operands are not in canonical order at -O0, so an immediate
826 // operand in position 1 is a lost opportunity for now. We are
827 // similar to ARM in this regard.
828 int64_t Imm = 0;
829 bool UseImm = false;
830 const bool HasSPE = Subtarget->hasSPE();
831
832 // Only 16-bit integer constants can be represented in compares for
833 // PowerPC. Others will be materialized into a register.
834 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
835 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
836 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
837 const APInt &CIVal = ConstInt->getValue();
838 Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
839 (int64_t)CIVal.getSExtValue();
840 if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
841 UseImm = true;
842 }
843 }
844
845 Register SrcReg1 = getRegForValue(SrcValue1);
846 if (SrcReg1 == 0)
847 return false;
848
849 unsigned SrcReg2 = 0;
850 if (!UseImm) {
851 SrcReg2 = getRegForValue(SrcValue2);
852 if (SrcReg2 == 0)
853 return false;
854 }
855
856 unsigned CmpOpc;
857 bool NeedsExt = false;
858
859 auto RC1 = MRI.getRegClass(SrcReg1);
860 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
861
862 switch (SrcVT.SimpleTy) {
863 default: return false;
864 case MVT::f32:
865 if (HasSPE) {
866 switch (Pred) {
867 default: return false;
868 case PPC::PRED_EQ:
869 CmpOpc = PPC::EFSCMPEQ;
870 break;
871 case PPC::PRED_LT:
872 CmpOpc = PPC::EFSCMPLT;
873 break;
874 case PPC::PRED_GT:
875 CmpOpc = PPC::EFSCMPGT;
876 break;
877 }
878 } else {
879 CmpOpc = PPC::FCMPUS;
880 if (isVSSRCRegClass(RC1))
881 SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
882 if (RC2 && isVSSRCRegClass(RC2))
883 SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
884 }
885 break;
886 case MVT::f64:
887 if (HasSPE) {
888 switch (Pred) {
889 default: return false;
890 case PPC::PRED_EQ:
891 CmpOpc = PPC::EFDCMPEQ;
892 break;
893 case PPC::PRED_LT:
894 CmpOpc = PPC::EFDCMPLT;
895 break;
896 case PPC::PRED_GT:
897 CmpOpc = PPC::EFDCMPGT;
898 break;
899 }
900 } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
901 CmpOpc = PPC::XSCMPUDP;
902 } else {
903 CmpOpc = PPC::FCMPUD;
904 }
905 break;
906 case MVT::i1:
907 case MVT::i8:
908 case MVT::i16:
909 NeedsExt = true;
910 [[fallthrough]];
911 case MVT::i32:
912 if (!UseImm)
913 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
914 else
915 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
916 break;
917 case MVT::i64:
918 if (!UseImm)
919 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
920 else
921 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
922 break;
923 }
924
925 if (NeedsExt) {
926 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
927 if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
928 return false;
929 SrcReg1 = ExtReg;
930
931 if (!UseImm) {
932 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
933 if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
934 return false;
935 SrcReg2 = ExtReg;
936 }
937 }
938
939 if (!UseImm)
940 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
941 .addReg(SrcReg1).addReg(SrcReg2);
942 else
943 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
944 .addReg(SrcReg1).addImm(Imm);
945
946 return true;
947}
948
949// Attempt to fast-select a floating-point extend instruction.
950bool PPCFastISel::SelectFPExt(const Instruction *I) {
951 Value *Src = I->getOperand(0);
952 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
953 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
954
955 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
956 return false;
957
958 Register SrcReg = getRegForValue(Src);
959 if (!SrcReg)
960 return false;
961
962 // No code is generated for a FP extend.
963 updateValueMap(I, SrcReg);
964 return true;
965}
966
967// Attempt to fast-select a floating-point truncate instruction.
968bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
969 Value *Src = I->getOperand(0);
970 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
971 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
972
973 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
974 return false;
975
976 Register SrcReg = getRegForValue(Src);
977 if (!SrcReg)
978 return false;
979
980 // Round the result to single precision.
981 unsigned DestReg;
982 auto RC = MRI.getRegClass(SrcReg);
983 if (Subtarget->hasSPE()) {
984 DestReg = createResultReg(&PPC::GPRCRegClass);
985 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::EFSCFD),
986 DestReg)
987 .addReg(SrcReg);
988 } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
989 DestReg = createResultReg(&PPC::VSSRCRegClass);
990 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::XSRSP),
991 DestReg)
992 .addReg(SrcReg);
993 } else {
994 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
995 DestReg = createResultReg(&PPC::F4RCRegClass);
996 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
997 TII.get(PPC::FRSP), DestReg)
998 .addReg(SrcReg);
999 }
1000
1001 updateValueMap(I, DestReg);
1002 return true;
1003}
1004
1005// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
1006// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1007// those should be used instead of moving via a stack slot when the
1008// subtarget permits.
1009// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1010// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1011// case to 8 bytes which produces tighter code but wastes stack space.
1012unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
1013 bool IsSigned) {
1014
1015 // If necessary, extend 32-bit int to 64-bit.
1016 if (SrcVT == MVT::i32) {
1017 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1018 if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
1019 return 0;
1020 SrcReg = TmpReg;
1021 }
1022
1023 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1024 Address Addr;
1025 Addr.BaseType = Address::FrameIndexBase;
1026 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1027
1028 // Store the value from the GPR.
1029 if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
1030 return 0;
1031
1032 // Load the integer value into an FPR. The kind of load used depends
1033 // on a number of conditions.
1034 unsigned LoadOpc = PPC::LFD;
1035
1036 if (SrcVT == MVT::i32) {
1037 if (!IsSigned) {
1038 LoadOpc = PPC::LFIWZX;
1039 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1040 } else if (Subtarget->hasLFIWAX()) {
1041 LoadOpc = PPC::LFIWAX;
1042 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1043 }
1044 }
1045
1046 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1047 Register ResultReg = 0;
1048 if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1049 return 0;
1050
1051 return ResultReg;
1052}
1053
1054// Attempt to fast-select an integer-to-floating-point conversion.
1055// FIXME: Once fast-isel has better support for VSX, conversions using
1056// direct moves should be implemented.
1057bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1058 MVT DstVT;
1059 Type *DstTy = I->getType();
1060 if (!isTypeLegal(DstTy, DstVT))
1061 return false;
1062
1063 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1064 return false;
1065
1066 Value *Src = I->getOperand(0);
1067 EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1068 if (!SrcEVT.isSimple())
1069 return false;
1070
1071 MVT SrcVT = SrcEVT.getSimpleVT();
1072
1073 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1074 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1075 return false;
1076
1077 Register SrcReg = getRegForValue(Src);
1078 if (SrcReg == 0)
1079 return false;
1080
1081 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1082 if (Subtarget->hasSPE()) {
1083 unsigned Opc;
1084 if (DstVT == MVT::f32)
1085 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1086 else
1087 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1088
1089 Register DestReg = createResultReg(&PPC::SPERCRegClass);
1090 // Generate the convert.
1091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1092 .addReg(SrcReg);
1093 updateValueMap(I, DestReg);
1094 return true;
1095 }
1096
1097 // We can only lower an unsigned convert if we have the newer
1098 // floating-point conversion operations.
1099 if (!IsSigned && !Subtarget->hasFPCVT())
1100 return false;
1101
1102 // FIXME: For now we require the newer floating-point conversion operations
1103 // (which are present only on P7 and A2 server models) when converting
1104 // to single-precision float. Otherwise we have to generate a lot of
1105 // fiddly code to avoid double rounding. If necessary, the fiddly code
1106 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1107 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1108 return false;
1109
1110 // Extend the input if necessary.
1111 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1112 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1113 if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1114 return false;
1115 SrcVT = MVT::i64;
1116 SrcReg = TmpReg;
1117 }
1118
1119 // Move the integer value to an FPR.
1120 unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1121 if (FPReg == 0)
1122 return false;
1123
1124 // Determine the opcode for the conversion.
1125 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1126 Register DestReg = createResultReg(RC);
1127 unsigned Opc;
1128
1129 if (DstVT == MVT::f32)
1130 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1131 else
1132 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1133
1134 // Generate the convert.
1135 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1136 .addReg(FPReg);
1137
1138 updateValueMap(I, DestReg);
1139 return true;
1140}
1141
1142// Move the floating-point value in SrcReg into an integer destination
1143// register, and return the register (or zero if we can't handle it).
1144// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1145// those should be used instead of moving via a stack slot when the
1146// subtarget permits.
1147unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1148 unsigned SrcReg, bool IsSigned) {
1149 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1150 // Note that if have STFIWX available, we could use a 4-byte stack
1151 // slot for i32, but this being fast-isel we'll just go with the
1152 // easiest code gen possible.
1153 Address Addr;
1154 Addr.BaseType = Address::FrameIndexBase;
1155 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1156
1157 // Store the value from the FPR.
1158 if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1159 return 0;
1160
1161 // Reload it into a GPR. If we want an i32 on big endian, modify the
1162 // address to have a 4-byte offset so we load from the right place.
1163 if (VT == MVT::i32)
1164 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1165
1166 // Look at the currently assigned register for this instruction
1167 // to determine the required register class.
1168 Register AssignedReg = FuncInfo.ValueMap[I];
1169 const TargetRegisterClass *RC =
1170 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1171
1172 Register ResultReg = 0;
1173 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1174 return 0;
1175
1176 return ResultReg;
1177}
1178
1179// Attempt to fast-select a floating-point-to-integer conversion.
1180// FIXME: Once fast-isel has better support for VSX, conversions using
1181// direct moves should be implemented.
1182bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1183 MVT DstVT, SrcVT;
1184 Type *DstTy = I->getType();
1185 if (!isTypeLegal(DstTy, DstVT))
1186 return false;
1187
1188 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1189 return false;
1190
1191 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1192 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1193 !Subtarget->hasSPE())
1194 return false;
1195
1196 Value *Src = I->getOperand(0);
1197 Type *SrcTy = Src->getType();
1198 if (!isTypeLegal(SrcTy, SrcVT))
1199 return false;
1200
1201 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1202 return false;
1203
1204 Register SrcReg = getRegForValue(Src);
1205 if (SrcReg == 0)
1206 return false;
1207
1208 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1209 // meaningless copy to get the register class right.
1210 const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1211 if (InRC == &PPC::F4RCRegClass)
1212 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1213 else if (InRC == &PPC::VSSRCRegClass)
1214 SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
1215
1216 // Determine the opcode for the conversion, which takes place
1217 // entirely within FPRs or VSRs.
1218 unsigned DestReg;
1219 unsigned Opc;
1220 auto RC = MRI.getRegClass(SrcReg);
1221
1222 if (Subtarget->hasSPE()) {
1223 DestReg = createResultReg(&PPC::GPRCRegClass);
1224 if (IsSigned)
1225 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1226 else
1227 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1228 } else if (isVSFRCRegClass(RC)) {
1229 DestReg = createResultReg(&PPC::VSFRCRegClass);
1230 if (DstVT == MVT::i32)
1231 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1232 else
1233 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1234 } else {
1235 DestReg = createResultReg(&PPC::F8RCRegClass);
1236 if (DstVT == MVT::i32)
1237 if (IsSigned)
1238 Opc = PPC::FCTIWZ;
1239 else
1240 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1241 else
1242 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1243 }
1244
1245 // Generate the convert.
1246 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1247 .addReg(SrcReg);
1248
1249 // Now move the integer value from a float register to an integer register.
1250 unsigned IntReg = Subtarget->hasSPE()
1251 ? DestReg
1252 : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1253
1254 if (IntReg == 0)
1255 return false;
1256
1257 updateValueMap(I, IntReg);
1258 return true;
1259}
1260
1261// Attempt to fast-select a binary integer operation that isn't already
1262// handled automatically.
1263bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1264 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1265
1266 // We can get here in the case when we have a binary operation on a non-legal
1267 // type and the target independent selector doesn't know how to handle it.
1268 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1269 return false;
1270
1271 // Look at the currently assigned register for this instruction
1272 // to determine the required register class. If there is no register,
1273 // make a conservative choice (don't assign R0).
1274 Register AssignedReg = FuncInfo.ValueMap[I];
1275 const TargetRegisterClass *RC =
1276 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1277 &PPC::GPRC_and_GPRC_NOR0RegClass);
1278 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1279
1280 unsigned Opc;
1281 switch (ISDOpcode) {
1282 default: return false;
1283 case ISD::ADD:
1284 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1285 break;
1286 case ISD::OR:
1287 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1288 break;
1289 case ISD::SUB:
1290 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1291 break;
1292 }
1293
1294 Register ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1295 Register SrcReg1 = getRegForValue(I->getOperand(0));
1296 if (SrcReg1 == 0) return false;
1297
1298 // Handle case of small immediate operand.
1299 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1300 const APInt &CIVal = ConstInt->getValue();
1301 int Imm = (int)CIVal.getSExtValue();
1302 bool UseImm = true;
1303 if (isInt<16>(Imm)) {
1304 switch (Opc) {
1305 default:
1306 llvm_unreachable("Missing case!");
1307 case PPC::ADD4:
1308 Opc = PPC::ADDI;
1309 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1310 break;
1311 case PPC::ADD8:
1312 Opc = PPC::ADDI8;
1313 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1314 break;
1315 case PPC::OR:
1316 Opc = PPC::ORI;
1317 break;
1318 case PPC::OR8:
1319 Opc = PPC::ORI8;
1320 break;
1321 case PPC::SUBF:
1322 if (Imm == -32768)
1323 UseImm = false;
1324 else {
1325 Opc = PPC::ADDI;
1326 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1327 Imm = -Imm;
1328 }
1329 break;
1330 case PPC::SUBF8:
1331 if (Imm == -32768)
1332 UseImm = false;
1333 else {
1334 Opc = PPC::ADDI8;
1335 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1336 Imm = -Imm;
1337 }
1338 break;
1339 }
1340
1341 if (UseImm) {
1342 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
1343 ResultReg)
1344 .addReg(SrcReg1)
1345 .addImm(Imm);
1346 updateValueMap(I, ResultReg);
1347 return true;
1348 }
1349 }
1350 }
1351
1352 // Reg-reg case.
1353 Register SrcReg2 = getRegForValue(I->getOperand(1));
1354 if (SrcReg2 == 0) return false;
1355
1356 // Reverse operands for subtract-from.
1357 if (ISDOpcode == ISD::SUB)
1358 std::swap(SrcReg1, SrcReg2);
1359
1360 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
1361 .addReg(SrcReg1).addReg(SrcReg2);
1362 updateValueMap(I, ResultReg);
1363 return true;
1364}
1365
1366// Handle arguments to a call that we're attempting to fast-select.
1367// Return false if the arguments are too complex for us at the moment.
1368bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1370 SmallVectorImpl<MVT> &ArgVTs,
1374 unsigned &NumBytes,
1375 bool IsVarArg) {
1377 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1378
1379 // Reserve space for the linkage area on the stack.
1380 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1381 CCInfo.AllocateStack(LinkageSize, Align(8));
1382
1383 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
1384
1385 // Bail out if we can't handle any of the arguments.
1386 for (const CCValAssign &VA : ArgLocs) {
1387 MVT ArgVT = ArgVTs[VA.getValNo()];
1388
1389 // Skip vector arguments for now, as well as long double and
1390 // uint128_t, and anything that isn't passed in a register.
1391 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1392 !VA.isRegLoc() || VA.needsCustom())
1393 return false;
1394
1395 // Skip bit-converted arguments for now.
1396 if (VA.getLocInfo() == CCValAssign::BCvt)
1397 return false;
1398 }
1399
1400 // Get a count of how many bytes are to be pushed onto the stack.
1401 NumBytes = CCInfo.getStackSize();
1402
1403 // The prolog code of the callee may store up to 8 GPR argument registers to
1404 // the stack, allowing va_start to index over them in memory if its varargs.
1405 // Because we cannot tell if this is needed on the caller side, we have to
1406 // conservatively assume that it is needed. As such, make sure we have at
1407 // least enough stack space for the caller to store the 8 GPRs.
1408 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1409 NumBytes = std::max(NumBytes, LinkageSize + 64);
1410
1411 // Issue CALLSEQ_START.
1412 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1413 TII.get(TII.getCallFrameSetupOpcode()))
1414 .addImm(NumBytes).addImm(0);
1415
1416 // Prepare to assign register arguments. Every argument uses up a
1417 // GPR protocol register even if it's passed in a floating-point
1418 // register (unless we're using the fast calling convention).
1419 unsigned NextGPR = PPC::X3;
1420 unsigned NextFPR = PPC::F1;
1421
1422 // Process arguments.
1423 for (const CCValAssign &VA : ArgLocs) {
1424 unsigned Arg = ArgRegs[VA.getValNo()];
1425 MVT ArgVT = ArgVTs[VA.getValNo()];
1426
1427 // Handle argument promotion and bitcasts.
1428 switch (VA.getLocInfo()) {
1429 default:
1430 llvm_unreachable("Unknown loc info!");
1431 case CCValAssign::Full:
1432 break;
1433 case CCValAssign::SExt: {
1434 MVT DestVT = VA.getLocVT();
1435 const TargetRegisterClass *RC =
1436 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1437 Register TmpReg = createResultReg(RC);
1438 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1439 llvm_unreachable("Failed to emit a sext!");
1440 ArgVT = DestVT;
1441 Arg = TmpReg;
1442 break;
1443 }
1444 case CCValAssign::AExt:
1445 case CCValAssign::ZExt: {
1446 MVT DestVT = VA.getLocVT();
1447 const TargetRegisterClass *RC =
1448 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1449 Register TmpReg = createResultReg(RC);
1450 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1451 llvm_unreachable("Failed to emit a zext!");
1452 ArgVT = DestVT;
1453 Arg = TmpReg;
1454 break;
1455 }
1456 case CCValAssign::BCvt: {
1457 // FIXME: Not yet handled.
1458 llvm_unreachable("Should have bailed before getting here!");
1459 break;
1460 }
1461 }
1462
1463 // Copy this argument to the appropriate register.
1464 unsigned ArgReg;
1465 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1466 ArgReg = NextFPR++;
1467 if (CC != CallingConv::Fast)
1468 ++NextGPR;
1469 } else
1470 ArgReg = NextGPR++;
1471
1472 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1473 TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1474 RegArgs.push_back(ArgReg);
1475 }
1476
1477 return true;
1478}
1479
1480// For a call that we've determined we can fast-select, finish the
1481// call sequence and generate a copy to obtain the return value (if any).
1482bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1483 CallingConv::ID CC = CLI.CallConv;
1484
1485 // Issue CallSEQ_END.
1486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1487 TII.get(TII.getCallFrameDestroyOpcode()))
1488 .addImm(NumBytes).addImm(0);
1489
1490 // Next, generate a copy to obtain the return value.
1491 // FIXME: No multi-register return values yet, though I don't foresee
1492 // any real difficulties there.
1493 if (RetVT != MVT::isVoid) {
1495 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1496 CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1497 CCValAssign &VA = RVLocs[0];
1498 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1499 assert(VA.isRegLoc() && "Can only return in registers!");
1500
1501 MVT DestVT = VA.getValVT();
1502 MVT CopyVT = DestVT;
1503
1504 // Ints smaller than a register still arrive in a full 64-bit
1505 // register, so make sure we recognize this.
1506 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1507 CopyVT = MVT::i64;
1508
1509 unsigned SourcePhysReg = VA.getLocReg();
1510 unsigned ResultReg = 0;
1511
1512 if (RetVT == CopyVT) {
1513 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1514 ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
1515
1516 // If necessary, round the floating result to single precision.
1517 } else if (CopyVT == MVT::f64) {
1518 ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1519 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::FRSP),
1520 ResultReg).addReg(SourcePhysReg);
1521
1522 // If only the low half of a general register is needed, generate
1523 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1524 // used along the fast-isel path (not lowered), and downstream logic
1525 // also doesn't like a direct subreg copy on a physical reg.)
1526 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1527 // Convert physical register from G8RC to GPRC.
1528 SourcePhysReg -= PPC::X0 - PPC::R0;
1529 ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
1530 }
1531
1532 assert(ResultReg && "ResultReg unset!");
1533 CLI.InRegs.push_back(SourcePhysReg);
1534 CLI.ResultReg = ResultReg;
1535 CLI.NumResultRegs = 1;
1536 }
1537
1538 return true;
1539}
1540
1541bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1542 CallingConv::ID CC = CLI.CallConv;
1543 bool IsTailCall = CLI.IsTailCall;
1544 bool IsVarArg = CLI.IsVarArg;
1545 const Value *Callee = CLI.Callee;
1546 const MCSymbol *Symbol = CLI.Symbol;
1547
1548 if (!Callee && !Symbol)
1549 return false;
1550
1551 // Allow SelectionDAG isel to handle tail calls and long calls.
1552 if (IsTailCall || Subtarget->useLongCalls())
1553 return false;
1554
1555 // Let SDISel handle vararg functions.
1556 if (IsVarArg)
1557 return false;
1558
1559 // If this is a PC-Rel function, let SDISel handle the call.
1560 if (Subtarget->isUsingPCRelativeCalls())
1561 return false;
1562
1563 // Handle simple calls for now, with legal return types and
1564 // those that can be extended.
1565 Type *RetTy = CLI.RetTy;
1566 MVT RetVT;
1567 if (RetTy->isVoidTy())
1568 RetVT = MVT::isVoid;
1569 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1570 RetVT != MVT::i8)
1571 return false;
1572 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1573 // We can't handle boolean returns when CR bits are in use.
1574 return false;
1575
1576 // FIXME: No multi-register return values yet.
1577 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1578 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1579 RetVT != MVT::f64) {
1581 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1582 CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1583 if (RVLocs.size() > 1)
1584 return false;
1585 }
1586
1587 // Bail early if more than 8 arguments, as we only currently
1588 // handle arguments passed in registers.
1589 unsigned NumArgs = CLI.OutVals.size();
1590 if (NumArgs > 8)
1591 return false;
1592
1593 // Set up the argument vectors.
1596 SmallVector<MVT, 8> ArgVTs;
1598
1599 Args.reserve(NumArgs);
1600 ArgRegs.reserve(NumArgs);
1601 ArgVTs.reserve(NumArgs);
1602 ArgFlags.reserve(NumArgs);
1603
1604 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1605 // Only handle easy calls for now. It would be reasonably easy
1606 // to handle <= 8-byte structures passed ByVal in registers, but we
1607 // have to ensure they are right-justified in the register.
1608 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1609 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1610 return false;
1611
1612 Value *ArgValue = CLI.OutVals[i];
1613 Type *ArgTy = ArgValue->getType();
1614 MVT ArgVT;
1615 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1616 return false;
1617
1618 // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1619 // types, which is passed through vector register. Skip these types and
1620 // fallback to default SelectionDAG based selection.
1621 if (ArgVT.isVector() || ArgVT == MVT::f128)
1622 return false;
1623
1624 Register Arg = getRegForValue(ArgValue);
1625 if (Arg == 0)
1626 return false;
1627
1628 Args.push_back(ArgValue);
1629 ArgRegs.push_back(Arg);
1630 ArgVTs.push_back(ArgVT);
1631 ArgFlags.push_back(Flags);
1632 }
1633
1634 // Process the arguments.
1636 unsigned NumBytes;
1637
1638 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1639 RegArgs, CC, NumBytes, IsVarArg))
1640 return false;
1641
1643 // FIXME: No handling for function pointers yet. This requires
1644 // implementing the function descriptor (OPD) setup.
1645 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1646 if (!GV) {
1647 // patchpoints are a special case; they always dispatch to a pointer value.
1648 // However, we don't actually want to generate the indirect call sequence
1649 // here (that will be generated, as necessary, during asm printing), and
1650 // the call we generate here will be erased by FastISel::selectPatchpoint,
1651 // so don't try very hard...
1652 if (CLI.IsPatchPoint)
1653 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::NOP));
1654 else
1655 return false;
1656 } else {
1657 // Build direct call with NOP for TOC restore.
1658 // FIXME: We can and should optimize away the NOP for local calls.
1659 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1660 TII.get(PPC::BL8_NOP));
1661 // Add callee.
1662 MIB.addGlobalAddress(GV);
1663 }
1664
1665 // Add implicit physical register uses to the call.
1666 for (unsigned Reg : RegArgs)
1667 MIB.addReg(Reg, RegState::Implicit);
1668
1669 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1670 // into the call.
1671 PPCFuncInfo->setUsesTOCBasePtr();
1672 MIB.addReg(PPC::X2, RegState::Implicit);
1673
1674 // Add a register mask with the call-preserved registers. Proper
1675 // defs for return values will be added by setPhysRegsDeadExcept().
1676 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1677
1678 CLI.Call = MIB;
1679
1680 // Finish off the call including any return values.
1681 return finishCall(RetVT, CLI, NumBytes);
1682}
1683
1684// Attempt to fast-select a return instruction.
1685bool PPCFastISel::SelectRet(const Instruction *I) {
1686
1687 if (!FuncInfo.CanLowerReturn)
1688 return false;
1689
1690 const ReturnInst *Ret = cast<ReturnInst>(I);
1691 const Function &F = *I->getParent()->getParent();
1692
1693 // Build a list of return value registers.
1695 CallingConv::ID CC = F.getCallingConv();
1696
1697 if (Ret->getNumOperands() > 0) {
1699 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1700
1701 // Analyze operands of the call, assigning locations to each operand.
1703 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1704 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1705 const Value *RV = Ret->getOperand(0);
1706
1707 // FIXME: Only one output register for now.
1708 if (ValLocs.size() > 1)
1709 return false;
1710
1711 // Special case for returning a constant integer of any size - materialize
1712 // the constant as an i64 and copy it to the return register.
1713 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
1714 CCValAssign &VA = ValLocs[0];
1715
1716 Register RetReg = VA.getLocReg();
1717 // We still need to worry about properly extending the sign. For example,
1718 // we could have only a single bit or a constant that needs zero
1719 // extension rather than sign extension. Make sure we pass the return
1720 // value extension property to integer materialization.
1721 unsigned SrcReg =
1722 PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1723
1724 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1725 TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1726
1727 RetRegs.push_back(RetReg);
1728
1729 } else {
1730 Register Reg = getRegForValue(RV);
1731
1732 if (Reg == 0)
1733 return false;
1734
1735 // Copy the result values into the output registers.
1736 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1737
1738 CCValAssign &VA = ValLocs[i];
1739 assert(VA.isRegLoc() && "Can only return in registers!");
1740 RetRegs.push_back(VA.getLocReg());
1741 unsigned SrcReg = Reg + VA.getValNo();
1742
1743 EVT RVEVT = TLI.getValueType(DL, RV->getType());
1744 if (!RVEVT.isSimple())
1745 return false;
1746 MVT RVVT = RVEVT.getSimpleVT();
1747 MVT DestVT = VA.getLocVT();
1748
1749 if (RVVT != DestVT && RVVT != MVT::i8 &&
1750 RVVT != MVT::i16 && RVVT != MVT::i32)
1751 return false;
1752
1753 if (RVVT != DestVT) {
1754 switch (VA.getLocInfo()) {
1755 default:
1756 llvm_unreachable("Unknown loc info!");
1757 case CCValAssign::Full:
1758 llvm_unreachable("Full value assign but types don't match?");
1759 case CCValAssign::AExt:
1760 case CCValAssign::ZExt: {
1761 const TargetRegisterClass *RC =
1762 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1763 Register TmpReg = createResultReg(RC);
1764 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1765 return false;
1766 SrcReg = TmpReg;
1767 break;
1768 }
1769 case CCValAssign::SExt: {
1770 const TargetRegisterClass *RC =
1771 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1772 Register TmpReg = createResultReg(RC);
1773 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1774 return false;
1775 SrcReg = TmpReg;
1776 break;
1777 }
1778 }
1779 }
1780
1781 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1782 TII.get(TargetOpcode::COPY), RetRegs[i])
1783 .addReg(SrcReg);
1784 }
1785 }
1786 }
1787
1788 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1789 TII.get(PPC::BLR8));
1790
1791 for (unsigned Reg : RetRegs)
1792 MIB.addReg(Reg, RegState::Implicit);
1793
1794 return true;
1795}
1796
1797// Attempt to emit an integer extend of SrcReg into DestReg. Both
1798// signed and zero extensions are supported. Return false if we
1799// can't handle it.
1800bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1801 unsigned DestReg, bool IsZExt) {
1802 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1803 return false;
1804 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1805 return false;
1806
1807 // Signed extensions use EXTSB, EXTSH, EXTSW.
1808 if (!IsZExt) {
1809 unsigned Opc;
1810 if (SrcVT == MVT::i8)
1811 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1812 else if (SrcVT == MVT::i16)
1813 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1814 else {
1815 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1816 Opc = PPC::EXTSW_32_64;
1817 }
1818 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1819 .addReg(SrcReg);
1820
1821 // Unsigned 32-bit extensions use RLWINM.
1822 } else if (DestVT == MVT::i32) {
1823 unsigned MB;
1824 if (SrcVT == MVT::i8)
1825 MB = 24;
1826 else {
1827 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1828 MB = 16;
1829 }
1830 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLWINM),
1831 DestReg)
1832 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1833
1834 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1835 } else {
1836 unsigned MB;
1837 if (SrcVT == MVT::i8)
1838 MB = 56;
1839 else if (SrcVT == MVT::i16)
1840 MB = 48;
1841 else
1842 MB = 32;
1843 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1844 TII.get(PPC::RLDICL_32_64), DestReg)
1845 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1846 }
1847
1848 return true;
1849}
1850
1851// Attempt to fast-select an indirect branch instruction.
1852bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1853 Register AddrReg = getRegForValue(I->getOperand(0));
1854 if (AddrReg == 0)
1855 return false;
1856
1857 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::MTCTR8))
1858 .addReg(AddrReg);
1859 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCTR8));
1860
1861 const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1862 for (const BasicBlock *SuccBB : IB->successors())
1863 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(SuccBB));
1864
1865 return true;
1866}
1867
1868// Attempt to fast-select an integer truncate instruction.
1869bool PPCFastISel::SelectTrunc(const Instruction *I) {
1870 Value *Src = I->getOperand(0);
1871 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1872 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1873
1874 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1875 return false;
1876
1877 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1878 return false;
1879
1880 Register SrcReg = getRegForValue(Src);
1881 if (!SrcReg)
1882 return false;
1883
1884 // The only interesting case is when we need to switch register classes.
1885 if (SrcVT == MVT::i64)
1886 SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
1887
1888 updateValueMap(I, SrcReg);
1889 return true;
1890}
1891
1892// Attempt to fast-select an integer extend instruction.
1893bool PPCFastISel::SelectIntExt(const Instruction *I) {
1894 Type *DestTy = I->getType();
1895 Value *Src = I->getOperand(0);
1896 Type *SrcTy = Src->getType();
1897
1898 bool IsZExt = isa<ZExtInst>(I);
1899 Register SrcReg = getRegForValue(Src);
1900 if (!SrcReg) return false;
1901
1902 EVT SrcEVT, DestEVT;
1903 SrcEVT = TLI.getValueType(DL, SrcTy, true);
1904 DestEVT = TLI.getValueType(DL, DestTy, true);
1905 if (!SrcEVT.isSimple())
1906 return false;
1907 if (!DestEVT.isSimple())
1908 return false;
1909
1910 MVT SrcVT = SrcEVT.getSimpleVT();
1911 MVT DestVT = DestEVT.getSimpleVT();
1912
1913 // If we know the register class needed for the result of this
1914 // instruction, use it. Otherwise pick the register class of the
1915 // correct size that does not contain X0/R0, since we don't know
1916 // whether downstream uses permit that assignment.
1917 Register AssignedReg = FuncInfo.ValueMap[I];
1918 const TargetRegisterClass *RC =
1919 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1920 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1921 &PPC::GPRC_and_GPRC_NOR0RegClass));
1922 Register ResultReg = createResultReg(RC);
1923
1924 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1925 return false;
1926
1927 updateValueMap(I, ResultReg);
1928 return true;
1929}
1930
1931// Attempt to fast-select an instruction that wasn't handled by
1932// the table-generated machinery.
1933bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1934
1935 switch (I->getOpcode()) {
1936 case Instruction::Load:
1937 return SelectLoad(I);
1938 case Instruction::Store:
1939 return SelectStore(I);
1940 case Instruction::Br:
1941 return SelectBranch(I);
1942 case Instruction::IndirectBr:
1943 return SelectIndirectBr(I);
1944 case Instruction::FPExt:
1945 return SelectFPExt(I);
1946 case Instruction::FPTrunc:
1947 return SelectFPTrunc(I);
1948 case Instruction::SIToFP:
1949 return SelectIToFP(I, /*IsSigned*/ true);
1950 case Instruction::UIToFP:
1951 return SelectIToFP(I, /*IsSigned*/ false);
1952 case Instruction::FPToSI:
1953 return SelectFPToI(I, /*IsSigned*/ true);
1954 case Instruction::FPToUI:
1955 return SelectFPToI(I, /*IsSigned*/ false);
1956 case Instruction::Add:
1957 return SelectBinaryIntOp(I, ISD::ADD);
1958 case Instruction::Or:
1959 return SelectBinaryIntOp(I, ISD::OR);
1960 case Instruction::Sub:
1961 return SelectBinaryIntOp(I, ISD::SUB);
1962 case Instruction::Ret:
1963 return SelectRet(I);
1964 case Instruction::Trunc:
1965 return SelectTrunc(I);
1966 case Instruction::ZExt:
1967 case Instruction::SExt:
1968 return SelectIntExt(I);
1969 // Here add other flavors of Instruction::XXX that automated
1970 // cases don't catch. For example, switches are terminators
1971 // that aren't yet handled.
1972 default:
1973 break;
1974 }
1975 return false;
1976}
1977
1978// Materialize a floating-point constant into a register, and return
1979// the register number (or zero if we failed to handle it).
1980unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1981 // If this is a PC-Rel function, let SDISel handle constant pool.
1982 if (Subtarget->isUsingPCRelativeCalls())
1983 return false;
1984
1985 // No plans to handle long double here.
1986 if (VT != MVT::f32 && VT != MVT::f64)
1987 return 0;
1988
1989 // All FP constants are loaded from the constant pool.
1990 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
1991 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
1992 const bool HasSPE = Subtarget->hasSPE();
1993 const TargetRegisterClass *RC;
1994 if (HasSPE)
1995 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1996 else
1997 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1998
1999 Register DestReg = createResultReg(RC);
2000 CodeModel::Model CModel = TM.getCodeModel();
2001
2002 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2004 MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
2005
2006 unsigned Opc;
2007
2008 if (HasSPE)
2009 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2010 else
2011 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2012
2013 Register TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2014
2015 PPCFuncInfo->setUsesTOCBasePtr();
2016 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2017 if (CModel == CodeModel::Small) {
2018 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocCPT),
2019 TmpReg)
2020 .addConstantPoolIndex(Idx).addReg(PPC::X2);
2021 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2022 .addImm(0).addReg(TmpReg).addMemOperand(MMO);
2023 } else {
2024 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2025 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2026 TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
2027 // But for large code model, we must generate a LDtocL followed
2028 // by the LF[SD].
2029 if (CModel == CodeModel::Large) {
2030 Register TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2031 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2032 TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
2033 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2034 .addImm(0)
2035 .addReg(TmpReg2);
2036 } else
2037 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2039 .addReg(TmpReg)
2040 .addMemOperand(MMO);
2041 }
2042
2043 return DestReg;
2044}
2045
2046// Materialize the address of a global value into a register, and return
2047// the register number (or zero if we failed to handle it).
2048unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2049 // If this is a PC-Rel function, let SDISel handle GV materialization.
2050 if (Subtarget->isUsingPCRelativeCalls())
2051 return false;
2052
2053 assert(VT == MVT::i64 && "Non-address!");
2054 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2055 Register DestReg = createResultReg(RC);
2056
2057 // Global values may be plain old object addresses, TLS object
2058 // addresses, constant pool entries, or jump tables. How we generate
2059 // code for these may depend on small, medium, or large code model.
2060 CodeModel::Model CModel = TM.getCodeModel();
2061
2062 // FIXME: Jump tables are not yet required because fast-isel doesn't
2063 // handle switches; if that changes, we need them as well. For now,
2064 // what follows assumes everything's a generic (or TLS) global address.
2065
2066 // FIXME: We don't yet handle the complexity of TLS.
2067 if (GV->isThreadLocal())
2068 return 0;
2069
2070 PPCFuncInfo->setUsesTOCBasePtr();
2071 bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2072 isa<GlobalVariable>(GV) &&
2073 cast<GlobalVariable>(GV)->hasAttribute("toc-data");
2074
2075 // For small code model, generate a simple TOC load.
2076 if (CModel == CodeModel::Small) {
2077 auto MIB = BuildMI(
2078 *FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2079 IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc), DestReg);
2080 if (IsAIXTocData)
2081 MIB.addReg(PPC::X2).addGlobalAddress(GV);
2082 else
2083 MIB.addGlobalAddress(GV).addReg(PPC::X2);
2084 } else {
2085 // If the address is an externally defined symbol, a symbol with common
2086 // or externally available linkage, a non-local function address, or a
2087 // jump table address (not yet needed), or if we are generating code
2088 // for large code model, we generate:
2089 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2090 // Otherwise we generate:
2091 // ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2092 // Either way, start with the ADDIStocHA8:
2093 Register HighPartReg = createResultReg(RC);
2094 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2095 HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2096
2097 if (Subtarget->isGVIndirectSymbol(GV)) {
2098 assert(!IsAIXTocData && "TOC data should always be direct.");
2099 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2100 DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2101 } else {
2102 // Otherwise generate the ADDItocL8.
2103 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8),
2104 DestReg)
2105 .addReg(HighPartReg)
2106 .addGlobalAddress(GV);
2107 }
2108 }
2109
2110 return DestReg;
2111}
2112
2113// Materialize a 32-bit integer constant into a register, and return
2114// the register number (or zero if we failed to handle it).
2115unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2116 const TargetRegisterClass *RC) {
2117 unsigned Lo = Imm & 0xFFFF;
2118 unsigned Hi = (Imm >> 16) & 0xFFFF;
2119
2120 Register ResultReg = createResultReg(RC);
2121 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2122
2123 if (isInt<16>(Imm))
2124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2125 TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2126 .addImm(Imm);
2127 else if (Lo) {
2128 // Both Lo and Hi have nonzero bits.
2129 Register TmpReg = createResultReg(RC);
2130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2131 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2132 .addImm(Hi);
2133 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2134 TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2135 .addReg(TmpReg).addImm(Lo);
2136 } else
2137 // Just Hi bits.
2138 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2139 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2140 .addImm(Hi);
2141
2142 return ResultReg;
2143}
2144
2145// Materialize a 64-bit integer constant into a register, and return
2146// the register number (or zero if we failed to handle it).
2147unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2148 const TargetRegisterClass *RC) {
2149 unsigned Remainder = 0;
2150 unsigned Shift = 0;
2151
2152 // If the value doesn't fit in 32 bits, see if we can shift it
2153 // so that it fits in 32 bits.
2154 if (!isInt<32>(Imm)) {
2155 Shift = llvm::countr_zero<uint64_t>(Imm);
2156 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2157
2158 if (isInt<32>(ImmSh))
2159 Imm = ImmSh;
2160 else {
2161 Remainder = Imm;
2162 Shift = 32;
2163 Imm >>= 32;
2164 }
2165 }
2166
2167 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2168 // (if not shifted).
2169 unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2170 if (!Shift)
2171 return TmpReg1;
2172
2173 // If upper 32 bits were not zero, we've built them and need to shift
2174 // them into place.
2175 unsigned TmpReg2;
2176 if (Imm) {
2177 TmpReg2 = createResultReg(RC);
2178 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLDICR),
2179 TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2180 } else
2181 TmpReg2 = TmpReg1;
2182
2183 unsigned TmpReg3, Hi, Lo;
2184 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2185 TmpReg3 = createResultReg(RC);
2186 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORIS8),
2187 TmpReg3).addReg(TmpReg2).addImm(Hi);
2188 } else
2189 TmpReg3 = TmpReg2;
2190
2191 if ((Lo = Remainder & 0xFFFF)) {
2192 Register ResultReg = createResultReg(RC);
2193 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORI8),
2194 ResultReg).addReg(TmpReg3).addImm(Lo);
2195 return ResultReg;
2196 }
2197
2198 return TmpReg3;
2199}
2200
2201// Materialize an integer constant into a register, and return
2202// the register number (or zero if we failed to handle it).
2203unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2204 bool UseSExt) {
2205 // If we're using CR bit registers for i1 values, handle that as a special
2206 // case first.
2207 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2208 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2209 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2210 TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2211 return ImmReg;
2212 }
2213
2214 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2215 VT != MVT::i1)
2216 return 0;
2217
2218 const TargetRegisterClass *RC =
2219 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2220 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2221
2222 // If the constant is in range, use a load-immediate.
2223 // Since LI will sign extend the constant we need to make sure that for
2224 // our zeroext constants that the sign extended constant fits into 16-bits -
2225 // a range of 0..0x7fff.
2226 if (isInt<16>(Imm)) {
2227 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2228 Register ImmReg = createResultReg(RC);
2229 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ImmReg)
2230 .addImm(Imm);
2231 return ImmReg;
2232 }
2233
2234 // Construct the constant piecewise.
2235 if (VT == MVT::i64)
2236 return PPCMaterialize64BitInt(Imm, RC);
2237 else if (VT == MVT::i32)
2238 return PPCMaterialize32BitInt(Imm, RC);
2239
2240 return 0;
2241}
2242
2243// Materialize a constant into a register, and return the register
2244// number (or zero if we failed to handle it).
2245unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
2246 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2247
2248 // Only handle simple types.
2249 if (!CEVT.isSimple()) return 0;
2250 MVT VT = CEVT.getSimpleVT();
2251
2252 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2253 return PPCMaterializeFP(CFP, VT);
2254 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2255 return PPCMaterializeGV(GV, VT);
2256 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2257 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2258 // assumes that constant PHI operands will be zero extended, and failure to
2259 // match that assumption will cause problems if we sign extend here but
2260 // some user of a PHI is in a block for which we fall back to full SDAG
2261 // instruction selection.
2262 return PPCMaterializeInt(CI, VT, false);
2263
2264 return 0;
2265}
2266
2267// Materialize the address created by an alloca into a register, and
2268// return the register number (or zero if we failed to handle it).
2269unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2270 // Don't handle dynamic allocas.
2271 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
2272
2273 MVT VT;
2274 if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
2275
2277 FuncInfo.StaticAllocaMap.find(AI);
2278
2279 if (SI != FuncInfo.StaticAllocaMap.end()) {
2280 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2281 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
2282 ResultReg).addFrameIndex(SI->second).addImm(0);
2283 return ResultReg;
2284 }
2285
2286 return 0;
2287}
2288
2289// Fold loads into extends when possible.
2290// FIXME: We can have multiple redundant extend/trunc instructions
2291// following a load. The folding only picks up one. Extend this
2292// to check subsequent instructions for the same pattern and remove
2293// them. Thus ResultReg should be the def reg for the last redundant
2294// instruction in a chain, and all intervening instructions can be
2295// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2296// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2297bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2298 const LoadInst *LI) {
2299 // Verify we have a legal type before going any further.
2300 MVT VT;
2301 if (!isLoadTypeLegal(LI->getType(), VT))
2302 return false;
2303
2304 // Combine load followed by zero- or sign-extend.
2305 bool IsZExt = false;
2306 switch(MI->getOpcode()) {
2307 default:
2308 return false;
2309
2310 case PPC::RLDICL:
2311 case PPC::RLDICL_32_64: {
2312 IsZExt = true;
2313 unsigned MB = MI->getOperand(3).getImm();
2314 if ((VT == MVT::i8 && MB <= 56) ||
2315 (VT == MVT::i16 && MB <= 48) ||
2316 (VT == MVT::i32 && MB <= 32))
2317 break;
2318 return false;
2319 }
2320
2321 case PPC::RLWINM:
2322 case PPC::RLWINM8: {
2323 IsZExt = true;
2324 unsigned MB = MI->getOperand(3).getImm();
2325 if ((VT == MVT::i8 && MB <= 24) ||
2326 (VT == MVT::i16 && MB <= 16))
2327 break;
2328 return false;
2329 }
2330
2331 case PPC::EXTSB:
2332 case PPC::EXTSB8:
2333 case PPC::EXTSB8_32_64:
2334 /* There is no sign-extending load-byte instruction. */
2335 return false;
2336
2337 case PPC::EXTSH:
2338 case PPC::EXTSH8:
2339 case PPC::EXTSH8_32_64: {
2340 if (VT != MVT::i16 && VT != MVT::i8)
2341 return false;
2342 break;
2343 }
2344
2345 case PPC::EXTSW:
2346 case PPC::EXTSW_32:
2347 case PPC::EXTSW_32_64: {
2348 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2349 return false;
2350 break;
2351 }
2352 }
2353
2354 // See if we can handle this address.
2355 Address Addr;
2356 if (!PPCComputeAddress(LI->getOperand(0), Addr))
2357 return false;
2358
2359 Register ResultReg = MI->getOperand(0).getReg();
2360
2361 if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
2362 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2363 return false;
2364
2366 removeDeadCode(I, std::next(I));
2367 return true;
2368}
2369
2370// Attempt to lower call arguments in a faster way than done by
2371// the selection DAG code.
2372bool PPCFastISel::fastLowerArguments() {
2373 // Defer to normal argument lowering for now. It's reasonably
2374 // efficient. Consider doing something like ARM to handle the
2375 // case where all args fit in registers, no varargs, no float
2376 // or vector args.
2377 return false;
2378}
2379
2380// Handle materializing integer constants into a register. This is not
2381// automatically generated for PowerPC, so must be explicitly created here.
2382unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2383
2384 if (Opc != ISD::Constant)
2385 return 0;
2386
2387 // If we're using CR bit registers for i1 values, handle that as a special
2388 // case first.
2389 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2390 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2391 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2392 TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2393 return ImmReg;
2394 }
2395
2396 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2397 VT != MVT::i1)
2398 return 0;
2399
2400 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2401 &PPC::GPRCRegClass);
2402 if (VT == MVT::i64)
2403 return PPCMaterialize64BitInt(Imm, RC);
2404 else
2405 return PPCMaterialize32BitInt(Imm, RC);
2406}
2407
2408// Override for ADDI and ADDI8 to set the correct register class
2409// on RHS operand 0. The automatic infrastructure naively assumes
2410// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2411// for these cases. At the moment, none of the other automatically
2412// generated RI instructions require special treatment. However, once
2413// SelectSelect is implemented, "isel" requires similar handling.
2414//
2415// Also be conservative about the output register class. Avoid
2416// assigning R0 or X0 to the output register for GPRC and G8RC
2417// register classes, as any such result could be used in ADDI, etc.,
2418// where those regs have another meaning.
2419unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2420 const TargetRegisterClass *RC,
2421 unsigned Op0,
2422 uint64_t Imm) {
2423 if (MachineInstOpcode == PPC::ADDI)
2424 MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2425 else if (MachineInstOpcode == PPC::ADDI8)
2426 MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2427
2428 const TargetRegisterClass *UseRC =
2429 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2430 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2431
2432 return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Imm);
2433}
2434
2435// Override for instructions with one register operand to avoid use of
2436// R0/X0. The automatic infrastructure isn't aware of the context so
2437// we must be conservative.
2438unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2439 const TargetRegisterClass* RC,
2440 unsigned Op0) {
2441 const TargetRegisterClass *UseRC =
2442 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2443 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2444
2445 return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0);
2446}
2447
2448// Override for instructions with two register operands to avoid use
2449// of R0/X0. The automatic infrastructure isn't aware of the context
2450// so we must be conservative.
2451unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2452 const TargetRegisterClass* RC,
2453 unsigned Op0, unsigned Op1) {
2454 const TargetRegisterClass *UseRC =
2455 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2456 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2457
2458 return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op1);
2459}
2460
2461namespace llvm {
2462 // Create the fast instruction selector for PowerPC64 ELF.
2464 const TargetLibraryInfo *LibInfo) {
2465 // Only available on 64-bit for now.
2466 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2467 if (Subtarget.isPPC64())
2468 return new PPCFastISel(FuncInfo, LibInfo);
2469 return nullptr;
2470 }
2471}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
static std::optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
static constexpr Register FPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
support::ulittle16_t & Lo
Definition: aarch32.cpp:204
support::ulittle16_t & Hi
Definition: aarch32.cpp:203
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
an instruction to allocate memory on the stack
Definition: Instructions.h:63
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:99
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:690
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:688
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:687
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:681
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:684
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:682
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:689
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:699
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:675
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:163
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Register fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0)
Emit a MachineInstr with one register operand and a result register in the given register class.
Definition: FastISel.cpp:2046
virtual bool tryToFoldLoadIntoMI(MachineInstr *, unsigned, const LoadInst *)
The specified machine instr operand is a vreg, and that vreg is being provided by the specified load ...
Definition: FastISel.h:300
virtual unsigned fastMaterializeConstant(const Constant *C)
Emit a constant in a register using target-specific logic, such as constant pool loads.
Definition: FastISel.h:473
virtual bool fastLowerCall(CallLoweringInfo &CLI)
This method is called by target-independent code to do target- specific call lowering.
Definition: FastISel.cpp:1946
Register fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, unsigned Op1)
Emit a MachineInstr with two register operands and a result register in the given register class.
Definition: FastISel.cpp:2067
Register createResultReg(const TargetRegisterClass *RC)
Definition: FastISel.cpp:2016
virtual bool fastLowerArguments()
This method is called by target-independent code to do target- specific argument lowering.
Definition: FastISel.cpp:1944
const TargetInstrInfo & TII
Definition: FastISel.h:211
virtual bool fastSelectInstruction(const Instruction *I)=0
This method is called by target-independent code when the normal FastISel process fails to select an ...
const TargetLowering & TLI
Definition: FastISel.h:212
const TargetMachine & TM
Definition: FastISel.h:209
Register fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Definition: FastISel.cpp:2118
virtual unsigned fastMaterializeAlloca(const AllocaInst *C)
Emit an alloca address in a register using target-specific logic.
Definition: FastISel.h:476
virtual unsigned fastEmit_i(MVT VT, MVT RetVT, unsigned Opcode, uint64_t Imm)
This method is called by target-independent code to request that an instruction with the given type,...
Definition: FastISel.cpp:1963
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
MachineBasicBlock::iterator InsertPt
MBB - The current insert position inside the current block.
MachineBasicBlock * MBB
MBB - The current block.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
Indirect Branch Instruction.
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:151
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:150
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Return a value (possibly void), from a function.
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void reserve(size_type N)
Definition: SmallVector.h:663
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:567
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:596
Class to represent struct types.
Definition: DerivedTypes.h:218
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
unsigned getID() const
Return the register class ID number.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt64Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition: ilist_node.h:32
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ MO_TOC_LO
Definition: PPC.h:183
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
gep_type_iterator gep_type_begin(const User *GEP)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.