LLVM 23.0.0git
PPCFastISel.cpp
Go to the documentation of this file.
1//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PowerPC-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// PPCGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCISelLowering.h"
20#include "PPCSelectionDAGInfo.h"
21#include "PPCSubtarget.h"
30#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/Operator.h"
35
36//===----------------------------------------------------------------------===//
37//
38// TBD:
39// fastLowerArguments: Handle simple cases.
40// PPCMaterializeGV: Handle TLS.
41// SelectCall: Handle function pointers.
42// SelectCall: Handle multi-register return values.
43// SelectCall: Optimize away nops for local calls.
44// processCallArgs: Handle bit-converted arguments.
45// finishCall: Handle multi-register return values.
46// PPCComputeAddress: Handle parameter references as FrameIndex's.
47// PPCEmitCmp: Handle immediate as operand 1.
48// SelectCall: Handle small byval arguments.
49// SelectIntrinsicCall: Implement.
50// SelectSelect: Implement.
51// Consider factoring isTypeLegal into the base class.
52// Implement switches and jump tables.
53//
54//===----------------------------------------------------------------------===//
55using namespace llvm;
56
57#define DEBUG_TYPE "ppcfastisel"
58
59namespace {
60
61struct Address {
62 enum {
63 RegBase,
64 FrameIndexBase
65 } BaseType;
66
67 union {
68 unsigned Reg;
69 int FI;
70 } Base;
71
72 int64_t Offset;
73
74 // Innocuous defaults for our address.
75 Address()
76 : BaseType(RegBase), Offset(0) {
77 Base.Reg = 0;
78 }
79};
80
81class PPCFastISel final : public FastISel {
82
83 const TargetMachine &TM;
84 const PPCSubtarget *Subtarget;
85 PPCFunctionInfo *PPCFuncInfo;
86 const TargetInstrInfo &TII;
87 const TargetLowering &TLI;
88 LLVMContext *Context;
89
90 public:
91 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
92 const TargetLibraryInfo *LibInfo,
93 const LibcallLoweringInfo *LibcallLowering)
94 : FastISel(FuncInfo, LibInfo, LibcallLowering),
95 TM(FuncInfo.MF->getTarget()),
96 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
97 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
98 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
99 Context(&FuncInfo.Fn->getContext()) {}
100
101 // Backend specific FastISel code.
102 private:
103 bool fastSelectInstruction(const Instruction *I) override;
104 Register fastMaterializeConstant(const Constant *C) override;
105 Register fastMaterializeAlloca(const AllocaInst *AI) override;
106 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
107 const LoadInst *LI) override;
108 bool fastLowerArguments() override;
109 Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
110 Register fastEmitInst_ri(unsigned MachineInstOpcode,
111 const TargetRegisterClass *RC, Register Op0,
112 uint64_t Imm);
113 Register fastEmitInst_r(unsigned MachineInstOpcode,
114 const TargetRegisterClass *RC, Register Op0);
115 Register fastEmitInst_rr(unsigned MachineInstOpcode,
116 const TargetRegisterClass *RC, Register Op0,
117 Register Op1);
118
119 bool fastLowerCall(CallLoweringInfo &CLI) override;
120
121 // Instruction selection routines.
122 private:
123 bool SelectLoad(const Instruction *I);
124 bool SelectStore(const Instruction *I);
125 bool SelectBranch(const Instruction *I);
126 bool SelectIndirectBr(const Instruction *I);
127 bool SelectFPExt(const Instruction *I);
128 bool SelectFPTrunc(const Instruction *I);
129 bool SelectIToFP(const Instruction *I, bool IsSigned);
130 bool SelectFPToI(const Instruction *I, bool IsSigned);
131 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
132 bool SelectRet(const Instruction *I);
133 bool SelectTrunc(const Instruction *I);
134 bool SelectIntExt(const Instruction *I);
135
136 // Utility routines.
137 private:
138 bool isTypeLegal(Type *Ty, MVT &VT);
139 bool isLoadTypeLegal(Type *Ty, MVT &VT);
140 bool isValueAvailable(const Value *V) const;
141 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
142 return RC->getID() == PPC::VSFRCRegClassID;
143 }
144 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
145 return RC->getID() == PPC::VSSRCRegClassID;
146 }
147 Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
148 unsigned Flag = 0, unsigned SubReg = 0) {
149 Register TmpReg = createResultReg(ToRC);
150 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
151 TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
152 return TmpReg;
153 }
154 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt,
155 Register DestReg, const PPC::Predicate Pred);
156 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
157 const TargetRegisterClass *RC, bool IsZExt = true,
158 unsigned FP64LoadOpc = PPC::LFD);
159 bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
160 bool PPCComputeAddress(const Value *Obj, Address &Addr);
161 void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
162 bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
163 bool IsZExt);
164 Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
165 Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
166 Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
167 bool UseSExt = true);
168 Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
169 Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
170 Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
171 bool IsSigned);
172 Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
173
174 // Call handling routines.
175 private:
176 bool processCallArgs(SmallVectorImpl<Value *> &Args,
178 SmallVectorImpl<MVT> &ArgVTs,
181 unsigned &NumBytes, bool IsVarArg);
182 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
183
184 private:
185 #include "PPCGenFastISel.inc"
186
187};
188
189} // end anonymous namespace
190
191static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
192 switch (Pred) {
193 // These are not representable with any single compare.
196 // Major concern about the following 6 cases is NaN result. The comparison
197 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
198 // only one of which will be set. The result is generated by fcmpu
199 // instruction. However, bc instruction only inspects one of the first 3
200 // bits, so when un is set, bc instruction may jump to an undesired
201 // place.
202 //
203 // More specifically, if we expect an unordered comparison and un is set, we
204 // expect to always go to true branch; in such case UEQ, UGT and ULT still
205 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
206 // since they are tested by inspecting !eq, !lt, !gt, respectively.
207 //
208 // Similarly, for ordered comparison, when un is set, we always expect the
209 // result to be false. In such case OGT, OLT and OEQ is good, since they are
210 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
211 // and ONE are tested through !lt, !gt and !eq, and these are true.
218 default:
219 return std::nullopt;
220
222 case CmpInst::ICMP_EQ:
223 return PPC::PRED_EQ;
224
228 return PPC::PRED_GT;
229
233 return PPC::PRED_GE;
234
238 return PPC::PRED_LT;
239
243 return PPC::PRED_LE;
244
246 case CmpInst::ICMP_NE:
247 return PPC::PRED_NE;
248
250 return PPC::PRED_NU;
251
253 return PPC::PRED_UN;
254 }
255}
256
257// Determine whether the type Ty is simple enough to be handled by
258// fast-isel, and return its equivalent machine type in VT.
259// FIXME: Copied directly from ARM -- factor into base class?
260bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
261 EVT Evt = TLI.getValueType(DL, Ty, true);
262
263 // Only handle simple types.
264 if (Evt == MVT::Other || !Evt.isSimple()) return false;
265 VT = Evt.getSimpleVT();
266
267 // Handle all legal types, i.e. a register that will directly hold this
268 // value.
269 return TLI.isTypeLegal(VT);
270}
271
272// Determine whether the type Ty is simple enough to be handled by
273// fast-isel as a load target, and return its equivalent machine type in VT.
274bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
275 if (isTypeLegal(Ty, VT)) return true;
276
277 // If this is a type than can be sign or zero-extended to a basic operation
278 // go ahead and accept it now.
279 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
280 return true;
281 }
282
283 return false;
284}
285
286bool PPCFastISel::isValueAvailable(const Value *V) const {
287 if (!isa<Instruction>(V))
288 return true;
289
290 const auto *I = cast<Instruction>(V);
291 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
292}
293
294// Given a value Obj, create an Address object Addr that represents its
295// address. Return false if we can't handle it.
296bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
297 const User *U = nullptr;
298 unsigned Opcode = Instruction::UserOp1;
299 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
300 // Don't walk into other basic blocks unless the object is an alloca from
301 // another block, otherwise it may not have a virtual register assigned.
302 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
303 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
304 Opcode = I->getOpcode();
305 U = I;
306 }
307 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
308 Opcode = C->getOpcode();
309 U = C;
310 }
311
312 switch (Opcode) {
313 default:
314 break;
315 case Instruction::BitCast:
316 // Look through bitcasts.
317 return PPCComputeAddress(U->getOperand(0), Addr);
318 case Instruction::IntToPtr:
319 // Look past no-op inttoptrs.
320 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
321 TLI.getPointerTy(DL))
322 return PPCComputeAddress(U->getOperand(0), Addr);
323 break;
324 case Instruction::PtrToInt:
325 // Look past no-op ptrtoints.
326 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
327 return PPCComputeAddress(U->getOperand(0), Addr);
328 break;
329 case Instruction::GetElementPtr: {
330 Address SavedAddr = Addr;
331 int64_t TmpOffset = Addr.Offset;
332
333 // Iterate through the GEP folding the constants into offsets where
334 // we can.
336 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
337 II != IE; ++II, ++GTI) {
338 const Value *Op = *II;
339 if (StructType *STy = GTI.getStructTypeOrNull()) {
340 const StructLayout *SL = DL.getStructLayout(STy);
341 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
342 TmpOffset += SL->getElementOffset(Idx);
343 } else {
344 uint64_t S = GTI.getSequentialElementStride(DL);
345 for (;;) {
346 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
347 // Constant-offset addressing.
348 TmpOffset += CI->getSExtValue() * S;
349 break;
350 }
351 if (canFoldAddIntoGEP(U, Op)) {
352 // A compatible add with a constant operand. Fold the constant.
353 ConstantInt *CI =
354 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
355 TmpOffset += CI->getSExtValue() * S;
356 // Iterate on the other operand.
357 Op = cast<AddOperator>(Op)->getOperand(0);
358 continue;
359 }
360 // Unsupported
361 goto unsupported_gep;
362 }
363 }
364 }
365
366 // Try to grab the base operand now.
367 Addr.Offset = TmpOffset;
368 if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
369
370 // We failed, restore everything and try the other options.
371 Addr = SavedAddr;
372
373 unsupported_gep:
374 break;
375 }
376 case Instruction::Alloca: {
377 const AllocaInst *AI = cast<AllocaInst>(Obj);
378 DenseMap<const AllocaInst*, int>::iterator SI =
379 FuncInfo.StaticAllocaMap.find(AI);
380 if (SI != FuncInfo.StaticAllocaMap.end()) {
381 Addr.BaseType = Address::FrameIndexBase;
382 Addr.Base.FI = SI->second;
383 return true;
384 }
385 break;
386 }
387 }
388
389 // FIXME: References to parameters fall through to the behavior
390 // below. They should be able to reference a frame index since
391 // they are stored to the stack, so we can get "ld rx, offset(r1)"
392 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
393 // just contain the parameter. Try to handle this with a FI.
394
395 // Try to get this in a register if nothing else has worked.
396 if (Addr.Base.Reg == 0)
397 Addr.Base.Reg = getRegForValue(Obj);
398
399 // Prevent assignment of base register to X0, which is inappropriate
400 // for loads and stores alike.
401 if (Addr.Base.Reg != 0)
402 MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
403
404 return Addr.Base.Reg != 0;
405}
406
407// Fix up some addresses that can't be used directly. For example, if
408// an offset won't fit in an instruction field, we may need to move it
409// into an index register.
410void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
411 Register &IndexReg) {
412
413 // Check whether the offset fits in the instruction field.
414 if (!isInt<16>(Addr.Offset))
415 UseOffset = false;
416
417 // If this is a stack pointer and the offset needs to be simplified then
418 // put the alloca address into a register, set the base type back to
419 // register and continue. This should almost never happen.
420 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
421 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
423 ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
424 Addr.Base.Reg = ResultReg;
425 Addr.BaseType = Address::RegBase;
426 }
427
428 if (!UseOffset) {
429 IntegerType *OffsetTy = Type::getInt64Ty(*Context);
430 const ConstantInt *Offset = ConstantInt::getSigned(OffsetTy, Addr.Offset);
431 IndexReg = PPCMaterializeInt(Offset, MVT::i64);
432 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
433 }
434}
435
436// Emit a load instruction if possible, returning true if we succeeded,
437// otherwise false. See commentary below for how the register class of
438// the load is determined.
439bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
440 const TargetRegisterClass *RC,
441 bool IsZExt, unsigned FP64LoadOpc) {
442 unsigned Opc;
443 bool UseOffset = true;
444 bool HasSPE = Subtarget->hasSPE();
445
446 // If ResultReg is given, it determines the register class of the load.
447 // Otherwise, RC is the register class to use. If the result of the
448 // load isn't anticipated in this block, both may be zero, in which
449 // case we must make a conservative guess. In particular, don't assign
450 // R0 or X0 to the result register, as the result may be used in a load,
451 // store, add-immediate, or isel that won't permit this. (Though
452 // perhaps the spill and reload of live-exit values would handle this?)
453 const TargetRegisterClass *UseRC =
454 (ResultReg ? MRI.getRegClass(ResultReg) :
455 (RC ? RC :
456 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
457 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
458 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
459 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
460
461 bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
462
463 switch (VT.SimpleTy) {
464 default: // e.g., vector types not handled
465 return false;
466 case MVT::i8:
467 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
468 break;
469 case MVT::i16:
470 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
471 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
472 break;
473 case MVT::i32:
474 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
475 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
476 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
477 UseOffset = false;
478 break;
479 case MVT::i64:
480 Opc = PPC::LD;
481 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
482 "64-bit load with 32-bit target??");
483 UseOffset = ((Addr.Offset & 3) == 0);
484 break;
485 case MVT::f32:
486 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
487 break;
488 case MVT::f64:
489 Opc = FP64LoadOpc;
490 break;
491 }
492
493 // If necessary, materialize the offset into a register and use
494 // the indexed form. Also handle stack pointers with special needs.
495 Register IndexReg;
496 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
497
498 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
499 // be used.
500 bool IsVSSRC = isVSSRCRegClass(UseRC);
501 bool IsVSFRC = isVSFRCRegClass(UseRC);
502 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
503 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
504 if ((Is32VSXLoad || Is64VSXLoad) &&
505 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
506 (Addr.Offset == 0)) {
507 UseOffset = false;
508 }
509
510 if (!ResultReg)
511 ResultReg = createResultReg(UseRC);
512
513 // Note: If we still have a frame index here, we know the offset is
514 // in range, as otherwise PPCSimplifyAddress would have converted it
515 // into a RegBase.
516 if (Addr.BaseType == Address::FrameIndexBase) {
517 // VSX only provides an indexed load.
518 if (Is32VSXLoad || Is64VSXLoad) return false;
519
520 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
521 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
522 Addr.Offset),
523 MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
524 MFI.getObjectAlign(Addr.Base.FI));
525
526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
527 .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
528
529 // Base reg with offset in range.
530 } else if (UseOffset) {
531 // VSX only provides an indexed load.
532 if (Is32VSXLoad || Is64VSXLoad) return false;
533
534 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
535 .addImm(Addr.Offset).addReg(Addr.Base.Reg);
536
537 // Indexed form.
538 } else {
539 // Get the RR opcode corresponding to the RI one. FIXME: It would be
540 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
541 // is hard to get at.
542 switch (Opc) {
543 default: llvm_unreachable("Unexpected opcode!");
544 case PPC::LBZ: Opc = PPC::LBZX; break;
545 case PPC::LBZ8: Opc = PPC::LBZX8; break;
546 case PPC::LHZ: Opc = PPC::LHZX; break;
547 case PPC::LHZ8: Opc = PPC::LHZX8; break;
548 case PPC::LHA: Opc = PPC::LHAX; break;
549 case PPC::LHA8: Opc = PPC::LHAX8; break;
550 case PPC::LWZ: Opc = PPC::LWZX; break;
551 case PPC::LWZ8: Opc = PPC::LWZX8; break;
552 case PPC::LWA: Opc = PPC::LWAX; break;
553 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
554 case PPC::LD: Opc = PPC::LDX; break;
555 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
556 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
557 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
558 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
559 }
560
561 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
562 ResultReg);
563
564 // If we have an index register defined we use it in the store inst,
565 // otherwise we use X0 as base as it makes the vector instructions to
566 // use zero in the computation of the effective address regardless the
567 // content of the register.
568 if (IndexReg)
569 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
570 else
571 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
572 }
573
574 return true;
575}
576
577// Attempt to fast-select a load instruction.
578bool PPCFastISel::SelectLoad(const Instruction *I) {
579 // FIXME: No atomic loads are supported.
580 if (cast<LoadInst>(I)->isAtomic())
581 return false;
582
583 // Verify we have a legal type before going any further.
584 MVT VT;
585 if (!isLoadTypeLegal(I->getType(), VT))
586 return false;
587
588 // See if we can handle this address.
589 Address Addr;
590 if (!PPCComputeAddress(I->getOperand(0), Addr))
591 return false;
592
593 // Look at the currently assigned register for this instruction
594 // to determine the required register class. This is necessary
595 // to constrain RA from using R0/X0 when this is not legal.
596 Register AssignedReg = FuncInfo.ValueMap[I];
597 const TargetRegisterClass *RC =
598 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
599
600 Register ResultReg = 0;
601 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
602 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
603 return false;
604 updateValueMap(I, ResultReg);
605 return true;
606}
607
608// Emit a store instruction to store SrcReg at Addr.
609bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
610 assert(SrcReg && "Nothing to store!");
611 unsigned Opc;
612 bool UseOffset = true;
613
614 const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
615 bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
616
617 switch (VT.SimpleTy) {
618 default: // e.g., vector types not handled
619 return false;
620 case MVT::i8:
621 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
622 break;
623 case MVT::i16:
624 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
625 break;
626 case MVT::i32:
627 assert(Is32BitInt && "Not GPRC for i32??");
628 Opc = PPC::STW;
629 break;
630 case MVT::i64:
631 Opc = PPC::STD;
632 UseOffset = ((Addr.Offset & 3) == 0);
633 break;
634 case MVT::f32:
635 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
636 break;
637 case MVT::f64:
638 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
639 break;
640 }
641
642 // If necessary, materialize the offset into a register and use
643 // the indexed form. Also handle stack pointers with special needs.
644 Register IndexReg;
645 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
646
647 // If this is a potential VSX store with an offset of 0, a VSX indexed store
648 // can be used.
649 bool IsVSSRC = isVSSRCRegClass(RC);
650 bool IsVSFRC = isVSFRCRegClass(RC);
651 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
652 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
653 if ((Is32VSXStore || Is64VSXStore) &&
654 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
655 (Addr.Offset == 0)) {
656 UseOffset = false;
657 }
658
659 // Note: If we still have a frame index here, we know the offset is
660 // in range, as otherwise PPCSimplifyAddress would have converted it
661 // into a RegBase.
662 if (Addr.BaseType == Address::FrameIndexBase) {
663 // VSX only provides an indexed store.
664 if (Is32VSXStore || Is64VSXStore) return false;
665
666 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
667 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
668 Addr.Offset),
669 MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
670 MFI.getObjectAlign(Addr.Base.FI));
671
672 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
673 .addReg(SrcReg)
674 .addImm(Addr.Offset)
675 .addFrameIndex(Addr.Base.FI)
676 .addMemOperand(MMO);
677
678 // Base reg with offset in range.
679 } else if (UseOffset) {
680 // VSX only provides an indexed store.
681 if (Is32VSXStore || Is64VSXStore)
682 return false;
683
684 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
685 .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
686
687 // Indexed form.
688 } else {
689 // Get the RR opcode corresponding to the RI one. FIXME: It would be
690 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
691 // is hard to get at.
692 switch (Opc) {
693 default: llvm_unreachable("Unexpected opcode!");
694 case PPC::STB: Opc = PPC::STBX; break;
695 case PPC::STH : Opc = PPC::STHX; break;
696 case PPC::STW : Opc = PPC::STWX; break;
697 case PPC::STB8: Opc = PPC::STBX8; break;
698 case PPC::STH8: Opc = PPC::STHX8; break;
699 case PPC::STW8: Opc = PPC::STWX8; break;
700 case PPC::STD: Opc = PPC::STDX; break;
701 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
702 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
703 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
704 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
705 }
706
707 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
708 .addReg(SrcReg);
709
710 // If we have an index register defined we use it in the store inst,
711 // otherwise we use X0 as base as it makes the vector instructions to
712 // use zero in the computation of the effective address regardless the
713 // content of the register.
714 if (IndexReg)
715 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
716 else
717 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
718 }
719
720 return true;
721}
722
723// Attempt to fast-select a store instruction.
724bool PPCFastISel::SelectStore(const Instruction *I) {
725 Value *Op0 = I->getOperand(0);
726 Register SrcReg;
727
728 // FIXME: No atomics loads are supported.
729 if (cast<StoreInst>(I)->isAtomic())
730 return false;
731
732 // Verify we have a legal type before going any further.
733 MVT VT;
734 if (!isLoadTypeLegal(Op0->getType(), VT))
735 return false;
736
737 // Get the value to be stored into a register.
738 SrcReg = getRegForValue(Op0);
739 if (!SrcReg)
740 return false;
741
742 // See if we can handle this address.
743 Address Addr;
744 if (!PPCComputeAddress(I->getOperand(1), Addr))
745 return false;
746
747 if (!PPCEmitStore(VT, SrcReg, Addr))
748 return false;
749
750 return true;
751}
752
753// Attempt to fast-select a branch instruction.
754bool PPCFastISel::SelectBranch(const Instruction *I) {
755 const BranchInst *BI = cast<BranchInst>(I);
756 MachineBasicBlock *BrBB = FuncInfo.MBB;
757 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
758 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
759
760 // For now, just try the simplest case where it's fed by a compare.
761 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
762 if (isValueAvailable(CI)) {
763 std::optional<PPC::Predicate> OptPPCPred =
764 getComparePred(CI->getPredicate());
765 if (!OptPPCPred)
766 return false;
767
768 PPC::Predicate PPCPred = *OptPPCPred;
769
770 // Take advantage of fall-through opportunities.
771 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
772 std::swap(TBB, FBB);
773 PPCPred = PPC::InvertPredicate(PPCPred);
774 }
775
776 Register CondReg = createResultReg(&PPC::CRRCRegClass);
777
778 if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
779 CondReg, PPCPred))
780 return false;
781
782 BuildMI(*BrBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCC))
783 .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
784 .addReg(CondReg)
785 .addMBB(TBB);
786 finishCondBranch(BI->getParent(), TBB, FBB);
787 return true;
788 }
789 } else if (const ConstantInt *CI =
791 uint64_t Imm = CI->getZExtValue();
792 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
793 fastEmitBranch(Target, MIMD.getDL());
794 return true;
795 }
796
797 // FIXME: ARM looks for a case where the block containing the compare
798 // has been split from the block containing the branch. If this happens,
799 // there is a vreg available containing the result of the compare. I'm
800 // not sure we can do much, as we've lost the predicate information with
801 // the compare instruction -- we have a 4-bit CR but don't know which bit
802 // to test here.
803 return false;
804}
805
806// Attempt to emit a compare of the two source values. Signed and unsigned
807// comparisons are supported. Return false if we can't handle it.
808bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
809 bool IsZExt, Register DestReg,
810 const PPC::Predicate Pred) {
811 Type *Ty = SrcValue1->getType();
812 EVT SrcEVT = TLI.getValueType(DL, Ty, true);
813 if (!SrcEVT.isSimple())
814 return false;
815 MVT SrcVT = SrcEVT.getSimpleVT();
816
817 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
818 return false;
819
820 // See if operand 2 is an immediate encodeable in the compare.
821 // FIXME: Operands are not in canonical order at -O0, so an immediate
822 // operand in position 1 is a lost opportunity for now. We are
823 // similar to ARM in this regard.
824 int64_t Imm = 0;
825 bool UseImm = false;
826 const bool HasSPE = Subtarget->hasSPE();
827
828 // Only 16-bit integer constants can be represented in compares for
829 // PowerPC. Others will be materialized into a register.
830 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
831 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
832 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
833 const APInt &CIVal = ConstInt->getValue();
834 Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
835 (int64_t)CIVal.getSExtValue();
836 if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
837 UseImm = true;
838 }
839 }
840
841 Register SrcReg1 = getRegForValue(SrcValue1);
842 if (!SrcReg1)
843 return false;
844
845 Register SrcReg2;
846 if (!UseImm) {
847 SrcReg2 = getRegForValue(SrcValue2);
848 if (!SrcReg2)
849 return false;
850 }
851
852 unsigned CmpOpc;
853 bool NeedsExt = false;
854
855 auto RC1 = MRI.getRegClass(SrcReg1);
856 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
857
858 switch (SrcVT.SimpleTy) {
859 default: return false;
860 case MVT::f32:
861 if (HasSPE) {
862 switch (Pred) {
863 default: return false;
864 case PPC::PRED_EQ:
865 CmpOpc = PPC::EFSCMPEQ;
866 break;
867 case PPC::PRED_LT:
868 CmpOpc = PPC::EFSCMPLT;
869 break;
870 case PPC::PRED_GT:
871 CmpOpc = PPC::EFSCMPGT;
872 break;
873 }
874 } else {
875 CmpOpc = PPC::FCMPUS;
876 if (isVSSRCRegClass(RC1))
877 SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
878 if (RC2 && isVSSRCRegClass(RC2))
879 SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
880 }
881 break;
882 case MVT::f64:
883 if (HasSPE) {
884 switch (Pred) {
885 default: return false;
886 case PPC::PRED_EQ:
887 CmpOpc = PPC::EFDCMPEQ;
888 break;
889 case PPC::PRED_LT:
890 CmpOpc = PPC::EFDCMPLT;
891 break;
892 case PPC::PRED_GT:
893 CmpOpc = PPC::EFDCMPGT;
894 break;
895 }
896 } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
897 CmpOpc = PPC::XSCMPUDP;
898 } else {
899 CmpOpc = PPC::FCMPUD;
900 }
901 break;
902 case MVT::i1:
903 case MVT::i8:
904 case MVT::i16:
905 NeedsExt = true;
906 [[fallthrough]];
907 case MVT::i32:
908 if (!UseImm)
909 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
910 else
911 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
912 break;
913 case MVT::i64:
914 if (!UseImm)
915 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
916 else
917 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
918 break;
919 }
920
921 if (NeedsExt) {
922 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
923 if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
924 return false;
925 SrcReg1 = ExtReg;
926
927 if (!UseImm) {
928 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
929 if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
930 return false;
931 SrcReg2 = ExtReg;
932 }
933 }
934
935 if (!UseImm)
936 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
937 .addReg(SrcReg1).addReg(SrcReg2);
938 else
939 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
940 .addReg(SrcReg1).addImm(Imm);
941
942 return true;
943}
944
945// Attempt to fast-select a floating-point extend instruction.
946bool PPCFastISel::SelectFPExt(const Instruction *I) {
947 Value *Src = I->getOperand(0);
948 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
949 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
950
951 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
952 return false;
953
954 Register SrcReg = getRegForValue(Src);
955 if (!SrcReg)
956 return false;
957
958 // No code is generated for a FP extend.
959 updateValueMap(I, SrcReg);
960 return true;
961}
962
963// Attempt to fast-select a floating-point truncate instruction.
964bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
965 Value *Src = I->getOperand(0);
966 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
967 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
968
969 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
970 return false;
971
972 Register SrcReg = getRegForValue(Src);
973 if (!SrcReg)
974 return false;
975
976 // Round the result to single precision.
977 Register DestReg;
978 auto RC = MRI.getRegClass(SrcReg);
979 if (Subtarget->hasSPE()) {
980 DestReg = createResultReg(&PPC::GPRCRegClass);
981 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::EFSCFD),
982 DestReg)
983 .addReg(SrcReg);
984 } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
985 DestReg = createResultReg(&PPC::VSSRCRegClass);
986 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::XSRSP),
987 DestReg)
988 .addReg(SrcReg);
989 } else {
990 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
991 DestReg = createResultReg(&PPC::F4RCRegClass);
992 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
993 TII.get(PPC::FRSP), DestReg)
994 .addReg(SrcReg);
995 }
996
997 updateValueMap(I, DestReg);
998 return true;
999}
1000
1001// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
1002// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1003// those should be used instead of moving via a stack slot when the
1004// subtarget permits.
1005// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1006// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1007// case to 8 bytes which produces tighter code but wastes stack space.
1008Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1009 bool IsSigned) {
1010
1011 // If necessary, extend 32-bit int to 64-bit.
1012 if (SrcVT == MVT::i32) {
1013 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1014 if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
1015 return Register();
1016 SrcReg = TmpReg;
1017 }
1018
1019 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1020 Address Addr;
1021 Addr.BaseType = Address::FrameIndexBase;
1022 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1023
1024 // Store the value from the GPR.
1025 if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
1026 return Register();
1027
1028 // Load the integer value into an FPR. The kind of load used depends
1029 // on a number of conditions.
1030 unsigned LoadOpc = PPC::LFD;
1031
1032 if (SrcVT == MVT::i32) {
1033 if (!IsSigned) {
1034 LoadOpc = PPC::LFIWZX;
1035 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1036 } else if (Subtarget->hasLFIWAX()) {
1037 LoadOpc = PPC::LFIWAX;
1038 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1039 }
1040 }
1041
1042 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1043 Register ResultReg;
1044 if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1045 return Register();
1046
1047 return ResultReg;
1048}
1049
1050// Attempt to fast-select an integer-to-floating-point conversion.
1051// FIXME: Once fast-isel has better support for VSX, conversions using
1052// direct moves should be implemented.
1053bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1054 MVT DstVT;
1055 Type *DstTy = I->getType();
1056 if (!isTypeLegal(DstTy, DstVT))
1057 return false;
1058
1059 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1060 return false;
1061
1062 Value *Src = I->getOperand(0);
1063 EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1064 if (!SrcEVT.isSimple())
1065 return false;
1066
1067 MVT SrcVT = SrcEVT.getSimpleVT();
1068
1069 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1070 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1071 return false;
1072
1073 Register SrcReg = getRegForValue(Src);
1074 if (!SrcReg)
1075 return false;
1076
1077 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1078 if (Subtarget->hasSPE()) {
1079 unsigned Opc;
1080 if (DstVT == MVT::f32)
1081 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1082 else
1083 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1084
1085 Register DestReg = createResultReg(&PPC::SPERCRegClass);
1086 // Generate the convert.
1087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1088 .addReg(SrcReg);
1089 updateValueMap(I, DestReg);
1090 return true;
1091 }
1092
1093 // We can only lower an unsigned convert if we have the newer
1094 // floating-point conversion operations.
1095 if (!IsSigned && !Subtarget->hasFPCVT())
1096 return false;
1097
1098 // FIXME: For now we require the newer floating-point conversion operations
1099 // (which are present only on P7 and A2 server models) when converting
1100 // to single-precision float. Otherwise we have to generate a lot of
1101 // fiddly code to avoid double rounding. If necessary, the fiddly code
1102 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1103 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1104 return false;
1105
1106 // Extend the input if necessary.
1107 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1108 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1109 if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1110 return false;
1111 SrcVT = MVT::i64;
1112 SrcReg = TmpReg;
1113 }
1114
1115 // Move the integer value to an FPR.
1116 Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1117 if (!FPReg)
1118 return false;
1119
1120 // Determine the opcode for the conversion.
1121 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1122 Register DestReg = createResultReg(RC);
1123 unsigned Opc;
1124
1125 if (DstVT == MVT::f32)
1126 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1127 else
1128 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1129
1130 // Generate the convert.
1131 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1132 .addReg(FPReg);
1133
1134 updateValueMap(I, DestReg);
1135 return true;
1136}
1137
1138// Move the floating-point value in SrcReg into an integer destination
1139// register, and return the register (or zero if we can't handle it).
1140// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1141// those should be used instead of moving via a stack slot when the
1142// subtarget permits.
1143Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1144 Register SrcReg, bool IsSigned) {
1145 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1146 // Note that if have STFIWX available, we could use a 4-byte stack
1147 // slot for i32, but this being fast-isel we'll just go with the
1148 // easiest code gen possible.
1149 Address Addr;
1150 Addr.BaseType = Address::FrameIndexBase;
1151 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1152
1153 // Store the value from the FPR.
1154 if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1155 return Register();
1156
1157 // Reload it into a GPR. If we want an i32 on big endian, modify the
1158 // address to have a 4-byte offset so we load from the right place.
1159 if (VT == MVT::i32)
1160 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1161
1162 // Look at the currently assigned register for this instruction
1163 // to determine the required register class.
1164 Register AssignedReg = FuncInfo.ValueMap[I];
1165 const TargetRegisterClass *RC =
1166 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1167
1168 Register ResultReg;
1169 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1170 return Register();
1171
1172 return ResultReg;
1173}
1174
1175// Attempt to fast-select a floating-point-to-integer conversion.
1176// FIXME: Once fast-isel has better support for VSX, conversions using
1177// direct moves should be implemented.
1178bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1179 MVT DstVT, SrcVT;
1180 Type *DstTy = I->getType();
1181 if (!isTypeLegal(DstTy, DstVT))
1182 return false;
1183
1184 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1185 return false;
1186
1187 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1188 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1189 !Subtarget->hasSPE())
1190 return false;
1191
1192 Value *Src = I->getOperand(0);
1193 Type *SrcTy = Src->getType();
1194 if (!isTypeLegal(SrcTy, SrcVT))
1195 return false;
1196
1197 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1198 return false;
1199
1200 Register SrcReg = getRegForValue(Src);
1201 if (!SrcReg)
1202 return false;
1203
1204 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1205 // meaningless copy to get the register class right.
1206 const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1207 if (InRC == &PPC::F4RCRegClass)
1208 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1209 else if (InRC == &PPC::VSSRCRegClass)
1210 SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
1211
1212 // Determine the opcode for the conversion, which takes place
1213 // entirely within FPRs or VSRs.
1214 Register DestReg;
1215 unsigned Opc;
1216 auto RC = MRI.getRegClass(SrcReg);
1217
1218 if (Subtarget->hasSPE()) {
1219 DestReg = createResultReg(&PPC::GPRCRegClass);
1220 if (IsSigned)
1221 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1222 else
1223 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1224 } else if (isVSFRCRegClass(RC)) {
1225 DestReg = createResultReg(&PPC::VSFRCRegClass);
1226 if (DstVT == MVT::i32)
1227 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1228 else
1229 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1230 } else {
1231 DestReg = createResultReg(&PPC::F8RCRegClass);
1232 if (DstVT == MVT::i32)
1233 if (IsSigned)
1234 Opc = PPC::FCTIWZ;
1235 else
1236 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1237 else
1238 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1239 }
1240
1241 // Generate the convert.
1242 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1243 .addReg(SrcReg);
1244
1245 // Now move the integer value from a float register to an integer register.
1246 Register IntReg = Subtarget->hasSPE()
1247 ? DestReg
1248 : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1249
1250 if (!IntReg)
1251 return false;
1252
1253 updateValueMap(I, IntReg);
1254 return true;
1255}
1256
1257// Attempt to fast-select a binary integer operation that isn't already
1258// handled automatically.
1259bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1260 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1261
1262 // We can get here in the case when we have a binary operation on a non-legal
1263 // type and the target independent selector doesn't know how to handle it.
1264 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1265 return false;
1266
1267 // Look at the currently assigned register for this instruction
1268 // to determine the required register class. If there is no register,
1269 // make a conservative choice (don't assign R0).
1270 Register AssignedReg = FuncInfo.ValueMap[I];
1271 const TargetRegisterClass *RC =
1272 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1273 &PPC::GPRC_and_GPRC_NOR0RegClass);
1274 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1275
1276 unsigned Opc;
1277 switch (ISDOpcode) {
1278 default: return false;
1279 case ISD::ADD:
1280 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1281 break;
1282 case ISD::OR:
1283 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1284 break;
1285 case ISD::SUB:
1286 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1287 break;
1288 }
1289
1290 Register ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1291 Register SrcReg1 = getRegForValue(I->getOperand(0));
1292 if (!SrcReg1)
1293 return false;
1294
1295 // Handle case of small immediate operand.
1296 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1297 const APInt &CIVal = ConstInt->getValue();
1298 int Imm = (int)CIVal.getSExtValue();
1299 bool UseImm = true;
1300 if (isInt<16>(Imm)) {
1301 switch (Opc) {
1302 default:
1303 llvm_unreachable("Missing case!");
1304 case PPC::ADD4:
1305 Opc = PPC::ADDI;
1306 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1307 break;
1308 case PPC::ADD8:
1309 Opc = PPC::ADDI8;
1310 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1311 break;
1312 case PPC::OR:
1313 Opc = PPC::ORI;
1314 break;
1315 case PPC::OR8:
1316 Opc = PPC::ORI8;
1317 break;
1318 case PPC::SUBF:
1319 if (Imm == -32768)
1320 UseImm = false;
1321 else {
1322 Opc = PPC::ADDI;
1323 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1324 Imm = -Imm;
1325 }
1326 break;
1327 case PPC::SUBF8:
1328 if (Imm == -32768)
1329 UseImm = false;
1330 else {
1331 Opc = PPC::ADDI8;
1332 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1333 Imm = -Imm;
1334 }
1335 break;
1336 }
1337
1338 if (UseImm) {
1339 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
1340 ResultReg)
1341 .addReg(SrcReg1)
1342 .addImm(Imm);
1343 updateValueMap(I, ResultReg);
1344 return true;
1345 }
1346 }
1347 }
1348
1349 // Reg-reg case.
1350 Register SrcReg2 = getRegForValue(I->getOperand(1));
1351 if (!SrcReg2)
1352 return false;
1353
1354 // Reverse operands for subtract-from.
1355 if (ISDOpcode == ISD::SUB)
1356 std::swap(SrcReg1, SrcReg2);
1357
1358 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
1359 .addReg(SrcReg1).addReg(SrcReg2);
1360 updateValueMap(I, ResultReg);
1361 return true;
1362}
1363
1364// Handle arguments to a call that we're attempting to fast-select.
1365// Return false if the arguments are too complex for us at the moment.
1366bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1367 SmallVectorImpl<Register> &ArgRegs,
1368 SmallVectorImpl<MVT> &ArgVTs,
1369 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1370 SmallVectorImpl<unsigned> &RegArgs,
1371 CallingConv::ID CC, unsigned &NumBytes,
1372 bool IsVarArg) {
1374 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1375
1376 // Reserve space for the linkage area on the stack.
1377 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1378 CCInfo.AllocateStack(LinkageSize, Align(8));
1379
1381 for (Value *Arg : Args)
1382 ArgTys.push_back(Arg->getType());
1383 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, ArgTys, CC_PPC64_ELF_FIS);
1384
1385 // Bail out if we can't handle any of the arguments.
1386 for (const CCValAssign &VA : ArgLocs) {
1387 MVT ArgVT = ArgVTs[VA.getValNo()];
1388
1389 // Skip vector arguments for now, as well as long double and
1390 // uint128_t, and anything that isn't passed in a register.
1391 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1392 !VA.isRegLoc() || VA.needsCustom())
1393 return false;
1394
1395 // Skip bit-converted arguments for now.
1396 if (VA.getLocInfo() == CCValAssign::BCvt)
1397 return false;
1398 }
1399
1400 // Get a count of how many bytes are to be pushed onto the stack.
1401 NumBytes = CCInfo.getStackSize();
1402
1403 // The prolog code of the callee may store up to 8 GPR argument registers to
1404 // the stack, allowing va_start to index over them in memory if its varargs.
1405 // Because we cannot tell if this is needed on the caller side, we have to
1406 // conservatively assume that it is needed. As such, make sure we have at
1407 // least enough stack space for the caller to store the 8 GPRs.
1408 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1409 NumBytes = std::max(NumBytes, LinkageSize + 64);
1410
1411 // Issue CALLSEQ_START.
1412 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1413 TII.get(TII.getCallFrameSetupOpcode()))
1414 .addImm(NumBytes).addImm(0);
1415
1416 // Prepare to assign register arguments. Every argument uses up a
1417 // GPR protocol register even if it's passed in a floating-point
1418 // register (unless we're using the fast calling convention).
1419 unsigned NextGPR = PPC::X3;
1420 unsigned NextFPR = PPC::F1;
1421
1422 // Process arguments.
1423 for (const CCValAssign &VA : ArgLocs) {
1424 Register Arg = ArgRegs[VA.getValNo()];
1425 MVT ArgVT = ArgVTs[VA.getValNo()];
1426
1427 // Handle argument promotion and bitcasts.
1428 switch (VA.getLocInfo()) {
1429 default:
1430 llvm_unreachable("Unknown loc info!");
1431 case CCValAssign::Full:
1432 break;
1433 case CCValAssign::SExt: {
1434 MVT DestVT = VA.getLocVT();
1435 const TargetRegisterClass *RC =
1436 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1437 Register TmpReg = createResultReg(RC);
1438 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1439 llvm_unreachable("Failed to emit a sext!");
1440 ArgVT = DestVT;
1441 Arg = TmpReg;
1442 break;
1443 }
1444 case CCValAssign::AExt:
1445 case CCValAssign::ZExt: {
1446 MVT DestVT = VA.getLocVT();
1447 const TargetRegisterClass *RC =
1448 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1449 Register TmpReg = createResultReg(RC);
1450 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1451 llvm_unreachable("Failed to emit a zext!");
1452 ArgVT = DestVT;
1453 Arg = TmpReg;
1454 break;
1455 }
1456 case CCValAssign::BCvt: {
1457 // FIXME: Not yet handled.
1458 llvm_unreachable("Should have bailed before getting here!");
1459 break;
1460 }
1461 }
1462
1463 // Copy this argument to the appropriate register.
1464 unsigned ArgReg;
1465 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1466 ArgReg = NextFPR++;
1467 if (CC != CallingConv::Fast)
1468 ++NextGPR;
1469 } else
1470 ArgReg = NextGPR++;
1471
1472 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1473 TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1474 RegArgs.push_back(ArgReg);
1475 }
1476
1477 return true;
1478}
1479
1480// For a call that we've determined we can fast-select, finish the
1481// call sequence and generate a copy to obtain the return value (if any).
1482bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1483 CallingConv::ID CC = CLI.CallConv;
1484
1485 // Issue CallSEQ_END.
1486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1487 TII.get(TII.getCallFrameDestroyOpcode()))
1488 .addImm(NumBytes).addImm(0);
1489
1490 // Next, generate a copy to obtain the return value.
1491 // FIXME: No multi-register return values yet, though I don't foresee
1492 // any real difficulties there.
1493 if (RetVT != MVT::isVoid) {
1495 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1496 CCInfo.AnalyzeCallResult(RetVT, CLI.RetTy, RetCC_PPC64_ELF_FIS);
1497 CCValAssign &VA = RVLocs[0];
1498 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1499 assert(VA.isRegLoc() && "Can only return in registers!");
1500
1501 MVT DestVT = VA.getValVT();
1502 MVT CopyVT = DestVT;
1503
1504 // Ints smaller than a register still arrive in a full 64-bit
1505 // register, so make sure we recognize this.
1506 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1507 CopyVT = MVT::i64;
1508
1509 Register SourcePhysReg = VA.getLocReg();
1510 Register ResultReg;
1511
1512 if (RetVT == CopyVT) {
1513 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1514 ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
1515
1516 // If necessary, round the floating result to single precision.
1517 } else if (CopyVT == MVT::f64) {
1518 ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1519 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::FRSP),
1520 ResultReg).addReg(SourcePhysReg);
1521
1522 // If only the low half of a general register is needed, generate
1523 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1524 // used along the fast-isel path (not lowered), and downstream logic
1525 // also doesn't like a direct subreg copy on a physical reg.)
1526 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1527 // Convert physical register from G8RC to GPRC.
1528 SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1529 ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
1530 }
1531
1532 assert(ResultReg && "ResultReg unset!");
1533 CLI.InRegs.push_back(SourcePhysReg);
1534 CLI.ResultReg = ResultReg;
1535 CLI.NumResultRegs = 1;
1536 }
1537
1538 return true;
1539}
1540
1541bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1542 CallingConv::ID CC = CLI.CallConv;
1543 bool IsTailCall = CLI.IsTailCall;
1544 bool IsVarArg = CLI.IsVarArg;
1545 const Value *Callee = CLI.Callee;
1546 const MCSymbol *Symbol = CLI.Symbol;
1547
1548 if (!Callee && !Symbol)
1549 return false;
1550
1551 // Allow SelectionDAG isel to handle tail calls and long calls.
1552 if (IsTailCall || Subtarget->useLongCalls())
1553 return false;
1554
1555 // Let SDISel handle vararg functions.
1556 if (IsVarArg)
1557 return false;
1558
1559 // If this is a PC-Rel function, let SDISel handle the call.
1560 if (Subtarget->isUsingPCRelativeCalls())
1561 return false;
1562
1563 // Handle simple calls for now, with legal return types and
1564 // those that can be extended.
1565 Type *RetTy = CLI.RetTy;
1566 MVT RetVT;
1567 if (RetTy->isVoidTy())
1568 RetVT = MVT::isVoid;
1569 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1570 RetVT != MVT::i8)
1571 return false;
1572 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1573 // We can't handle boolean returns when CR bits are in use.
1574 return false;
1575
1576 // FIXME: No multi-register return values yet.
1577 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1578 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1579 RetVT != MVT::f64) {
1581 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1582 CCInfo.AnalyzeCallResult(RetVT, RetTy, RetCC_PPC64_ELF_FIS);
1583 if (RVLocs.size() > 1)
1584 return false;
1585 }
1586
1587 // Bail early if more than 8 arguments, as we only currently
1588 // handle arguments passed in registers.
1589 unsigned NumArgs = CLI.OutVals.size();
1590 if (NumArgs > 8)
1591 return false;
1592
1593 // Set up the argument vectors.
1594 SmallVector<Value*, 8> Args;
1596 SmallVector<MVT, 8> ArgVTs;
1598
1599 Args.reserve(NumArgs);
1600 ArgRegs.reserve(NumArgs);
1601 ArgVTs.reserve(NumArgs);
1602 ArgFlags.reserve(NumArgs);
1603
1604 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1605 // Only handle easy calls for now. It would be reasonably easy
1606 // to handle <= 8-byte structures passed ByVal in registers, but we
1607 // have to ensure they are right-justified in the register.
1608 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1609 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1610 return false;
1611
1612 Value *ArgValue = CLI.OutVals[i];
1613 Type *ArgTy = ArgValue->getType();
1614 MVT ArgVT;
1615 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1616 return false;
1617
1618 // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1619 // types, which is passed through vector register. Skip these types and
1620 // fallback to default SelectionDAG based selection.
1621 if (ArgVT.isVector() || ArgVT == MVT::f128)
1622 return false;
1623
1624 Register Arg = getRegForValue(ArgValue);
1625 if (!Arg)
1626 return false;
1627
1628 Args.push_back(ArgValue);
1629 ArgRegs.push_back(Arg);
1630 ArgVTs.push_back(ArgVT);
1631 ArgFlags.push_back(Flags);
1632 }
1633
1634 // Process the arguments.
1635 SmallVector<unsigned, 8> RegArgs;
1636 unsigned NumBytes;
1637
1638 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1639 RegArgs, CC, NumBytes, IsVarArg))
1640 return false;
1641
1642 MachineInstrBuilder MIB;
1643 // FIXME: No handling for function pointers yet. This requires
1644 // implementing the function descriptor (OPD) setup.
1645 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1646 if (!GV) {
1647 // patchpoints are a special case; they always dispatch to a pointer value.
1648 // However, we don't actually want to generate the indirect call sequence
1649 // here (that will be generated, as necessary, during asm printing), and
1650 // the call we generate here will be erased by FastISel::selectPatchpoint,
1651 // so don't try very hard...
1652 if (CLI.IsPatchPoint)
1653 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::NOP));
1654 else
1655 return false;
1656 } else {
1657 // Build direct call with NOP for TOC restore.
1658 // FIXME: We can and should optimize away the NOP for local calls.
1659 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1660 TII.get(PPC::BL8_NOP));
1661 // Add callee.
1662 MIB.addGlobalAddress(GV);
1663 }
1664
1665 // Add implicit physical register uses to the call.
1666 for (unsigned Reg : RegArgs)
1668
1669 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1670 // into the call.
1671 PPCFuncInfo->setUsesTOCBasePtr();
1672 MIB.addReg(PPC::X2, RegState::Implicit);
1673
1674 // Add a register mask with the call-preserved registers. Proper
1675 // defs for return values will be added by setPhysRegsDeadExcept().
1676 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1677
1678 CLI.Call = MIB;
1679
1680 // Finish off the call including any return values.
1681 return finishCall(RetVT, CLI, NumBytes);
1682}
1683
1684// Attempt to fast-select a return instruction.
1685bool PPCFastISel::SelectRet(const Instruction *I) {
1686
1687 if (!FuncInfo.CanLowerReturn)
1688 return false;
1689
1690 const ReturnInst *Ret = cast<ReturnInst>(I);
1691 const Function &F = *I->getParent()->getParent();
1692
1693 // Build a list of return value registers.
1695 CallingConv::ID CC = F.getCallingConv();
1696
1697 if (Ret->getNumOperands() > 0) {
1699 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1700
1701 // Analyze operands of the call, assigning locations to each operand.
1703 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1704 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1705 const Value *RV = Ret->getOperand(0);
1706
1707 // FIXME: Only one output register for now.
1708 if (ValLocs.size() > 1)
1709 return false;
1710
1711 // Special case for returning a constant integer of any size - materialize
1712 // the constant as an i64 and copy it to the return register.
1713 if (isa<ConstantInt>(RV) && RV->getType()->isIntegerTy()) {
1714 const ConstantInt *CI = cast<ConstantInt>(RV);
1715 CCValAssign &VA = ValLocs[0];
1716
1717 Register RetReg = VA.getLocReg();
1718 // We still need to worry about properly extending the sign. For example,
1719 // we could have only a single bit or a constant that needs zero
1720 // extension rather than sign extension. Make sure we pass the return
1721 // value extension property to integer materialization.
1722 Register SrcReg =
1723 PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1724
1725 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1726 TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1727
1728 RetRegs.push_back(RetReg);
1729
1730 } else {
1731 Register Reg = getRegForValue(RV);
1732
1733 if (!Reg)
1734 return false;
1735
1736 // Copy the result values into the output registers.
1737 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1738
1739 CCValAssign &VA = ValLocs[i];
1740 assert(VA.isRegLoc() && "Can only return in registers!");
1741 RetRegs.push_back(VA.getLocReg());
1742 Register SrcReg = Reg + VA.getValNo();
1743
1744 EVT RVEVT = TLI.getValueType(DL, RV->getType());
1745 if (!RVEVT.isSimple())
1746 return false;
1747 MVT RVVT = RVEVT.getSimpleVT();
1748 MVT DestVT = VA.getLocVT();
1749
1750 if (RVVT != DestVT && RVVT != MVT::i8 &&
1751 RVVT != MVT::i16 && RVVT != MVT::i32)
1752 return false;
1753
1754 if (RVVT != DestVT) {
1755 switch (VA.getLocInfo()) {
1756 default:
1757 llvm_unreachable("Unknown loc info!");
1758 case CCValAssign::Full:
1759 llvm_unreachable("Full value assign but types don't match?");
1760 case CCValAssign::AExt:
1761 case CCValAssign::ZExt: {
1762 const TargetRegisterClass *RC =
1763 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1764 Register TmpReg = createResultReg(RC);
1765 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1766 return false;
1767 SrcReg = TmpReg;
1768 break;
1769 }
1770 case CCValAssign::SExt: {
1771 const TargetRegisterClass *RC =
1772 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1773 Register TmpReg = createResultReg(RC);
1774 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1775 return false;
1776 SrcReg = TmpReg;
1777 break;
1778 }
1779 }
1780 }
1781
1782 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1783 TII.get(TargetOpcode::COPY), RetRegs[i])
1784 .addReg(SrcReg);
1785 }
1786 }
1787 }
1788
1789 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1790 TII.get(PPC::BLR8));
1791
1792 for (Register Reg : RetRegs)
1794
1795 return true;
1796}
1797
1798// Attempt to emit an integer extend of SrcReg into DestReg. Both
1799// signed and zero extensions are supported. Return false if we
1800// can't handle it.
1801bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1802 Register DestReg, bool IsZExt) {
1803 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1804 return false;
1805 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1806 return false;
1807
1808 // Signed extensions use EXTSB, EXTSH, EXTSW.
1809 if (!IsZExt) {
1810 unsigned Opc;
1811 if (SrcVT == MVT::i8)
1812 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1813 else if (SrcVT == MVT::i16)
1814 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1815 else {
1816 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1817 Opc = PPC::EXTSW_32_64;
1818 }
1819 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1820 .addReg(SrcReg);
1821
1822 // Unsigned 32-bit extensions use RLWINM.
1823 } else if (DestVT == MVT::i32) {
1824 unsigned MB;
1825 if (SrcVT == MVT::i8)
1826 MB = 24;
1827 else {
1828 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1829 MB = 16;
1830 }
1831 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLWINM),
1832 DestReg)
1833 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1834
1835 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1836 } else {
1837 unsigned MB;
1838 if (SrcVT == MVT::i8)
1839 MB = 56;
1840 else if (SrcVT == MVT::i16)
1841 MB = 48;
1842 else
1843 MB = 32;
1844 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1845 TII.get(PPC::RLDICL_32_64), DestReg)
1846 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1847 }
1848
1849 return true;
1850}
1851
1852// Attempt to fast-select an indirect branch instruction.
1853bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1854 Register AddrReg = getRegForValue(I->getOperand(0));
1855 if (!AddrReg)
1856 return false;
1857
1858 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::MTCTR8))
1859 .addReg(AddrReg);
1860 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCTR8));
1861
1862 const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1863 for (const BasicBlock *SuccBB : IB->successors())
1864 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(SuccBB));
1865
1866 return true;
1867}
1868
1869// Attempt to fast-select an integer truncate instruction.
1870bool PPCFastISel::SelectTrunc(const Instruction *I) {
1871 Value *Src = I->getOperand(0);
1872 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1873 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1874
1875 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1876 return false;
1877
1878 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1879 return false;
1880
1881 Register SrcReg = getRegForValue(Src);
1882 if (!SrcReg)
1883 return false;
1884
1885 // The only interesting case is when we need to switch register classes.
1886 if (SrcVT == MVT::i64)
1887 SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
1888
1889 updateValueMap(I, SrcReg);
1890 return true;
1891}
1892
1893// Attempt to fast-select an integer extend instruction.
1894bool PPCFastISel::SelectIntExt(const Instruction *I) {
1895 Type *DestTy = I->getType();
1896 Value *Src = I->getOperand(0);
1897 Type *SrcTy = Src->getType();
1898
1899 bool IsZExt = isa<ZExtInst>(I);
1900 Register SrcReg = getRegForValue(Src);
1901 if (!SrcReg) return false;
1902
1903 EVT SrcEVT, DestEVT;
1904 SrcEVT = TLI.getValueType(DL, SrcTy, true);
1905 DestEVT = TLI.getValueType(DL, DestTy, true);
1906 if (!SrcEVT.isSimple())
1907 return false;
1908 if (!DestEVT.isSimple())
1909 return false;
1910
1911 MVT SrcVT = SrcEVT.getSimpleVT();
1912 MVT DestVT = DestEVT.getSimpleVT();
1913
1914 // If we know the register class needed for the result of this
1915 // instruction, use it. Otherwise pick the register class of the
1916 // correct size that does not contain X0/R0, since we don't know
1917 // whether downstream uses permit that assignment.
1918 Register AssignedReg = FuncInfo.ValueMap[I];
1919 const TargetRegisterClass *RC =
1920 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1921 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1922 &PPC::GPRC_and_GPRC_NOR0RegClass));
1923 Register ResultReg = createResultReg(RC);
1924
1925 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1926 return false;
1927
1928 updateValueMap(I, ResultReg);
1929 return true;
1930}
1931
1932// Attempt to fast-select an instruction that wasn't handled by
1933// the table-generated machinery.
1934bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1935
1936 switch (I->getOpcode()) {
1937 case Instruction::Load:
1938 return SelectLoad(I);
1939 case Instruction::Store:
1940 return SelectStore(I);
1941 case Instruction::Br:
1942 return SelectBranch(I);
1943 case Instruction::IndirectBr:
1944 return SelectIndirectBr(I);
1945 case Instruction::FPExt:
1946 return SelectFPExt(I);
1947 case Instruction::FPTrunc:
1948 return SelectFPTrunc(I);
1949 case Instruction::SIToFP:
1950 return SelectIToFP(I, /*IsSigned*/ true);
1951 case Instruction::UIToFP:
1952 return SelectIToFP(I, /*IsSigned*/ false);
1953 case Instruction::FPToSI:
1954 return SelectFPToI(I, /*IsSigned*/ true);
1955 case Instruction::FPToUI:
1956 return SelectFPToI(I, /*IsSigned*/ false);
1957 case Instruction::Add:
1958 return SelectBinaryIntOp(I, ISD::ADD);
1959 case Instruction::Or:
1960 return SelectBinaryIntOp(I, ISD::OR);
1961 case Instruction::Sub:
1962 return SelectBinaryIntOp(I, ISD::SUB);
1963 case Instruction::Ret:
1964 return SelectRet(I);
1965 case Instruction::Trunc:
1966 return SelectTrunc(I);
1967 case Instruction::ZExt:
1968 case Instruction::SExt:
1969 return SelectIntExt(I);
1970 // Here add other flavors of Instruction::XXX that automated
1971 // cases don't catch. For example, switches are terminators
1972 // that aren't yet handled.
1973 default:
1974 break;
1975 }
1976 return false;
1977}
1978
1979// Materialize a floating-point constant into a register, and return
1980// the register number (or zero if we failed to handle it).
1981Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1982 // If this is a PC-Rel function, let SDISel handle constant pool.
1983 if (Subtarget->isUsingPCRelativeCalls())
1984 return Register();
1985
1986 // No plans to handle long double here.
1987 if (VT != MVT::f32 && VT != MVT::f64)
1988 return Register();
1989
1990 // All FP constants are loaded from the constant pool.
1991 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
1992 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
1993 const bool HasSPE = Subtarget->hasSPE();
1994 const TargetRegisterClass *RC;
1995 if (HasSPE)
1996 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1997 else
1998 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1999
2000 Register DestReg = createResultReg(RC);
2001 CodeModel::Model CModel = TM.getCodeModel();
2002
2003 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2005 MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
2006
2007 unsigned Opc;
2008
2009 if (HasSPE)
2010 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2011 else
2012 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2013
2014 Register TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2015
2016 PPCFuncInfo->setUsesTOCBasePtr();
2017 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2018 if (CModel == CodeModel::Small) {
2019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocCPT),
2020 TmpReg)
2021 .addConstantPoolIndex(Idx).addReg(PPC::X2);
2022 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2023 .addImm(0).addReg(TmpReg).addMemOperand(MMO);
2024 } else {
2025 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2026 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2027 TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
2028 // But for large code model, we must generate a LDtocL followed
2029 // by the LF[SD].
2030 if (CModel == CodeModel::Large) {
2031 Register TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2032 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2033 TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
2034 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2035 .addImm(0)
2036 .addReg(TmpReg2);
2037 } else
2038 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2040 .addReg(TmpReg)
2041 .addMemOperand(MMO);
2042 }
2043
2044 return DestReg;
2045}
2046
2047// Materialize the address of a global value into a register, and return
2048// the register number (or zero if we failed to handle it).
2049Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2050 // If this is a PC-Rel function, let SDISel handle GV materialization.
2051 if (Subtarget->isUsingPCRelativeCalls())
2052 return Register();
2053
2054 assert(VT == MVT::i64 && "Non-address!");
2055 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2056 Register DestReg = createResultReg(RC);
2057
2058 // Global values may be plain old object addresses, TLS object
2059 // addresses, constant pool entries, or jump tables. How we generate
2060 // code for these may depend on small, medium, or large code model.
2061 CodeModel::Model CModel = TM.getCodeModel();
2062
2063 // FIXME: Jump tables are not yet required because fast-isel doesn't
2064 // handle switches; if that changes, we need them as well. For now,
2065 // what follows assumes everything's a generic (or TLS) global address.
2066
2067 // FIXME: We don't yet handle the complexity of TLS.
2068 if (GV->isThreadLocal())
2069 return Register();
2070
2071 PPCFuncInfo->setUsesTOCBasePtr();
2072 bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2073 isa<GlobalVariable>(GV) &&
2074 cast<GlobalVariable>(GV)->hasAttribute("toc-data");
2075
2076 // For small code model, generate a simple TOC load.
2077 if (CModel == CodeModel::Small) {
2078 auto MIB = BuildMI(
2079 *FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2080 IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc), DestReg);
2081 if (IsAIXTocData)
2082 MIB.addReg(PPC::X2).addGlobalAddress(GV);
2083 else
2084 MIB.addGlobalAddress(GV).addReg(PPC::X2);
2085 } else {
2086 // If the address is an externally defined symbol, a symbol with common
2087 // or externally available linkage, a non-local function address, or a
2088 // jump table address (not yet needed), or if we are generating code
2089 // for large code model, we generate:
2090 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2091 // Otherwise we generate:
2092 // ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2093 // Either way, start with the ADDIStocHA8:
2094 Register HighPartReg = createResultReg(RC);
2095 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2096 HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2097
2098 if (Subtarget->isGVIndirectSymbol(GV)) {
2099 assert(!IsAIXTocData && "TOC data should always be direct.");
2100 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2101 DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2102 } else {
2103 // Otherwise generate the ADDItocL8.
2104 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8),
2105 DestReg)
2106 .addReg(HighPartReg)
2107 .addGlobalAddress(GV);
2108 }
2109 }
2110
2111 return DestReg;
2112}
2113
2114// Materialize a 32-bit integer constant into a register, and return
2115// the register number (or zero if we failed to handle it).
2116Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2117 const TargetRegisterClass *RC) {
2118 unsigned Lo = Imm & 0xFFFF;
2119 unsigned Hi = (Imm >> 16) & 0xFFFF;
2120
2121 Register ResultReg = createResultReg(RC);
2122 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2123
2124 if (isInt<16>(Imm))
2125 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2126 TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2127 .addImm(Imm);
2128 else if (Lo) {
2129 // Both Lo and Hi have nonzero bits.
2130 Register TmpReg = createResultReg(RC);
2131 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2132 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2133 .addImm(Hi);
2134 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2135 TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2136 .addReg(TmpReg).addImm(Lo);
2137 } else
2138 // Just Hi bits.
2139 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2140 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2141 .addImm(Hi);
2142
2143 return ResultReg;
2144}
2145
2146// Materialize a 64-bit integer constant into a register, and return
2147// the register number (or zero if we failed to handle it).
2148Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2149 const TargetRegisterClass *RC) {
2150 unsigned Remainder = 0;
2151 unsigned Shift = 0;
2152
2153 // If the value doesn't fit in 32 bits, see if we can shift it
2154 // so that it fits in 32 bits.
2155 if (!isInt<32>(Imm)) {
2156 Shift = llvm::countr_zero<uint64_t>(Imm);
2157 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2158
2159 if (isInt<32>(ImmSh))
2160 Imm = ImmSh;
2161 else {
2162 Remainder = Imm;
2163 Shift = 32;
2164 Imm >>= 32;
2165 }
2166 }
2167
2168 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2169 // (if not shifted).
2170 Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2171 if (!Shift)
2172 return TmpReg1;
2173
2174 // If upper 32 bits were not zero, we've built them and need to shift
2175 // them into place.
2176 Register TmpReg2;
2177 if (Imm) {
2178 TmpReg2 = createResultReg(RC);
2179 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLDICR),
2180 TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2181 } else
2182 TmpReg2 = TmpReg1;
2183
2184 Register TmpReg3;
2185 unsigned Hi, Lo;
2186 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2187 TmpReg3 = createResultReg(RC);
2188 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORIS8),
2189 TmpReg3).addReg(TmpReg2).addImm(Hi);
2190 } else
2191 TmpReg3 = TmpReg2;
2192
2193 if ((Lo = Remainder & 0xFFFF)) {
2194 Register ResultReg = createResultReg(RC);
2195 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORI8),
2196 ResultReg).addReg(TmpReg3).addImm(Lo);
2197 return ResultReg;
2198 }
2199
2200 return TmpReg3;
2201}
2202
2203// Materialize an integer constant into a register, and return
2204// the register number (or zero if we failed to handle it).
2205Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2206 bool UseSExt) {
2207 // If we're using CR bit registers for i1 values, handle that as a special
2208 // case first.
2209 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2210 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2211 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2212 TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2213 return ImmReg;
2214 }
2215
2216 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2217 VT != MVT::i1)
2218 return Register();
2219
2220 const TargetRegisterClass *RC =
2221 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2222 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2223
2224 // If the constant is in range, use a load-immediate.
2225 // Since LI will sign extend the constant we need to make sure that for
2226 // our zeroext constants that the sign extended constant fits into 16-bits -
2227 // a range of 0..0x7fff.
2228 if (isInt<16>(Imm)) {
2229 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2230 Register ImmReg = createResultReg(RC);
2231 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ImmReg)
2232 .addImm(Imm);
2233 return ImmReg;
2234 }
2235
2236 // Construct the constant piecewise.
2237 if (VT == MVT::i64)
2238 return PPCMaterialize64BitInt(Imm, RC);
2239 else if (VT == MVT::i32)
2240 return PPCMaterialize32BitInt(Imm, RC);
2241
2242 return Register();
2243}
2244
2245// Materialize a constant into a register, and return the register
2246// number (or zero if we failed to handle it).
2247Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2248 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2249
2250 // Only handle simple types.
2251 if (!CEVT.isSimple())
2252 return Register();
2253 MVT VT = CEVT.getSimpleVT();
2254
2255 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2256 return PPCMaterializeFP(CFP, VT);
2257 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2258 return PPCMaterializeGV(GV, VT);
2259 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2260 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2261 // assumes that constant PHI operands will be zero extended, and failure to
2262 // match that assumption will cause problems if we sign extend here but
2263 // some user of a PHI is in a block for which we fall back to full SDAG
2264 // instruction selection.
2265 return PPCMaterializeInt(CI, VT, false);
2266
2267 return Register();
2268}
2269
2270// Materialize the address created by an alloca into a register, and
2271// return the register number (or zero if we failed to handle it).
2272Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2273 DenseMap<const AllocaInst *, int>::iterator SI =
2274 FuncInfo.StaticAllocaMap.find(AI);
2275
2276 // Don't handle dynamic allocas.
2277 if (SI == FuncInfo.StaticAllocaMap.end())
2278 return Register();
2279
2280 MVT VT;
2281 if (!isLoadTypeLegal(AI->getType(), VT))
2282 return Register();
2283
2284 if (SI != FuncInfo.StaticAllocaMap.end()) {
2285 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2286 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
2287 ResultReg).addFrameIndex(SI->second).addImm(0);
2288 return ResultReg;
2289 }
2290
2291 return Register();
2292}
2293
2294// Fold loads into extends when possible.
2295// FIXME: We can have multiple redundant extend/trunc instructions
2296// following a load. The folding only picks up one. Extend this
2297// to check subsequent instructions for the same pattern and remove
2298// them. Thus ResultReg should be the def reg for the last redundant
2299// instruction in a chain, and all intervening instructions can be
2300// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2301// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2302bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2303 const LoadInst *LI) {
2304 // Verify we have a legal type before going any further.
2305 MVT VT;
2306 if (!isLoadTypeLegal(LI->getType(), VT))
2307 return false;
2308
2309 // Combine load followed by zero- or sign-extend.
2310 bool IsZExt = false;
2311 switch(MI->getOpcode()) {
2312 default:
2313 return false;
2314
2315 case PPC::RLDICL:
2316 case PPC::RLDICL_32_64: {
2317 IsZExt = true;
2318 unsigned MB = MI->getOperand(3).getImm();
2319 if ((VT == MVT::i8 && MB <= 56) ||
2320 (VT == MVT::i16 && MB <= 48) ||
2321 (VT == MVT::i32 && MB <= 32))
2322 break;
2323 return false;
2324 }
2325
2326 case PPC::RLWINM:
2327 case PPC::RLWINM8: {
2328 IsZExt = true;
2329 unsigned MB = MI->getOperand(3).getImm();
2330 if ((VT == MVT::i8 && MB <= 24) ||
2331 (VT == MVT::i16 && MB <= 16))
2332 break;
2333 return false;
2334 }
2335
2336 case PPC::EXTSB:
2337 case PPC::EXTSB8:
2338 case PPC::EXTSB8_32_64:
2339 /* There is no sign-extending load-byte instruction. */
2340 return false;
2341
2342 case PPC::EXTSH:
2343 case PPC::EXTSH8:
2344 case PPC::EXTSH8_32_64: {
2345 if (VT != MVT::i16 && VT != MVT::i8)
2346 return false;
2347 break;
2348 }
2349
2350 case PPC::EXTSW:
2351 case PPC::EXTSW_32:
2352 case PPC::EXTSW_32_64: {
2353 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2354 return false;
2355 break;
2356 }
2357 }
2358
2359 // See if we can handle this address.
2360 Address Addr;
2361 if (!PPCComputeAddress(LI->getOperand(0), Addr))
2362 return false;
2363
2364 Register ResultReg = MI->getOperand(0).getReg();
2365
2366 if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
2367 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2368 return false;
2369
2371 removeDeadCode(I, std::next(I));
2372 return true;
2373}
2374
2375// Attempt to lower call arguments in a faster way than done by
2376// the selection DAG code.
2377bool PPCFastISel::fastLowerArguments() {
2378 // Defer to normal argument lowering for now. It's reasonably
2379 // efficient. Consider doing something like ARM to handle the
2380 // case where all args fit in registers, no varargs, no float
2381 // or vector args.
2382 return false;
2383}
2384
2385// Handle materializing integer constants into a register. This is not
2386// automatically generated for PowerPC, so must be explicitly created here.
2387Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2388
2389 if (Opc != ISD::Constant)
2390 return Register();
2391
2392 // If we're using CR bit registers for i1 values, handle that as a special
2393 // case first.
2394 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2395 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2396 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2397 TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2398 return ImmReg;
2399 }
2400
2401 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2402 VT != MVT::i1)
2403 return Register();
2404
2405 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2406 &PPC::GPRCRegClass);
2407 if (VT == MVT::i64)
2408 return PPCMaterialize64BitInt(Imm, RC);
2409 else
2410 return PPCMaterialize32BitInt(Imm, RC);
2411}
2412
2413// Override for ADDI and ADDI8 to set the correct register class
2414// on RHS operand 0. The automatic infrastructure naively assumes
2415// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2416// for these cases. At the moment, none of the other automatically
2417// generated RI instructions require special treatment. However, once
2418// SelectSelect is implemented, "isel" requires similar handling.
2419//
2420// Also be conservative about the output register class. Avoid
2421// assigning R0 or X0 to the output register for GPRC and G8RC
2422// register classes, as any such result could be used in ADDI, etc.,
2423// where those regs have another meaning.
2424Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2425 const TargetRegisterClass *RC,
2426 Register Op0, uint64_t Imm) {
2427 if (MachineInstOpcode == PPC::ADDI)
2428 MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2429 else if (MachineInstOpcode == PPC::ADDI8)
2430 MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2431
2432 const TargetRegisterClass *UseRC =
2433 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2434 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2435
2436 return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Imm);
2437}
2438
2439// Override for instructions with one register operand to avoid use of
2440// R0/X0. The automatic infrastructure isn't aware of the context so
2441// we must be conservative.
2442Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2443 const TargetRegisterClass *RC,
2444 Register Op0) {
2445 const TargetRegisterClass *UseRC =
2446 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2447 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2448
2449 return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0);
2450}
2451
2452// Override for instructions with two register operands to avoid use
2453// of R0/X0. The automatic infrastructure isn't aware of the context
2454// so we must be conservative.
2455Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2456 const TargetRegisterClass *RC,
2457 Register Op0, Register Op1) {
2458 const TargetRegisterClass *UseRC =
2459 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2460 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2461
2462 return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op1);
2463}
2464
2465namespace llvm {
2466 // Create the fast instruction selector for PowerPC64 ELF.
2468 const TargetLibraryInfo *LibInfo,
2469 const LibcallLoweringInfo *LibcallLowering) {
2470 // Only available on 64-bit for now.
2471 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2472 if (Subtarget.isPPC64())
2473 return new PPCFastISel(FuncInfo, LibInfo, LibcallLowering);
2474 return nullptr;
2475}
2476}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
static std::optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
static constexpr MCPhysReg FPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file describes how to lower LLVM code to machine code.
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
an instruction to allocate memory on the stack
PointerType * getType() const
Overload to return most specific pointer type.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
This is an important base class in LLVM.
Definition Constant.h:43
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
Register fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Register fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, Register Op1)
Emit a MachineInstr with two register operands and a result register in the given register class.
Register fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0)
Emit a MachineInstr with one register operand and a result register in the given register class.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
MachineBasicBlock::iterator InsertPt
MBB - The current insert position inside the current block.
MachineBasicBlock * MBB
MBB - The current block.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
Machine Value Type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MachineInstrBundleIterator< MachineInstr > iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
const PPCTargetLowering * getTargetLowering() const override
const PPCInstrInfo * getInstrInfo() const override
bool isLittleEndian() const
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned getID() const
Return the register class ID number.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
bool isOSAIX() const
Tests whether the OS is AIX.
Definition Triple.h:782
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
const Use * const_op_iterator
Definition User.h:255
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ MO_TOC_LO
Definition PPC.h:185
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.