LLVM 22.0.0git
PPCFastISel.cpp
Go to the documentation of this file.
1//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PowerPC-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// PPCGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCISelLowering.h"
20#include "PPCSubtarget.h"
29#include "llvm/IR/CallingConv.h"
32#include "llvm/IR/Operator.h"
34
35//===----------------------------------------------------------------------===//
36//
37// TBD:
38// fastLowerArguments: Handle simple cases.
39// PPCMaterializeGV: Handle TLS.
40// SelectCall: Handle function pointers.
41// SelectCall: Handle multi-register return values.
42// SelectCall: Optimize away nops for local calls.
43// processCallArgs: Handle bit-converted arguments.
44// finishCall: Handle multi-register return values.
45// PPCComputeAddress: Handle parameter references as FrameIndex's.
46// PPCEmitCmp: Handle immediate as operand 1.
47// SelectCall: Handle small byval arguments.
48// SelectIntrinsicCall: Implement.
49// SelectSelect: Implement.
50// Consider factoring isTypeLegal into the base class.
51// Implement switches and jump tables.
52//
53//===----------------------------------------------------------------------===//
54using namespace llvm;
55
56#define DEBUG_TYPE "ppcfastisel"
57
58namespace {
59
60struct Address {
61 enum {
62 RegBase,
63 FrameIndexBase
64 } BaseType;
65
66 union {
67 unsigned Reg;
68 int FI;
69 } Base;
70
71 int64_t Offset;
72
73 // Innocuous defaults for our address.
74 Address()
75 : BaseType(RegBase), Offset(0) {
76 Base.Reg = 0;
77 }
78};
79
80class PPCFastISel final : public FastISel {
81
82 const TargetMachine &TM;
83 const PPCSubtarget *Subtarget;
84 PPCFunctionInfo *PPCFuncInfo;
85 const TargetInstrInfo &TII;
86 const TargetLowering &TLI;
87 LLVMContext *Context;
88
89 public:
90 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
91 const TargetLibraryInfo *LibInfo)
92 : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
93 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
94 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
95 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
96 Context(&FuncInfo.Fn->getContext()) {}
97
98 // Backend specific FastISel code.
99 private:
100 bool fastSelectInstruction(const Instruction *I) override;
101 Register fastMaterializeConstant(const Constant *C) override;
102 Register fastMaterializeAlloca(const AllocaInst *AI) override;
103 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
104 const LoadInst *LI) override;
105 bool fastLowerArguments() override;
106 Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
107 Register fastEmitInst_ri(unsigned MachineInstOpcode,
108 const TargetRegisterClass *RC, Register Op0,
109 uint64_t Imm);
110 Register fastEmitInst_r(unsigned MachineInstOpcode,
111 const TargetRegisterClass *RC, Register Op0);
112 Register fastEmitInst_rr(unsigned MachineInstOpcode,
113 const TargetRegisterClass *RC, Register Op0,
114 Register Op1);
115
116 bool fastLowerCall(CallLoweringInfo &CLI) override;
117
118 // Instruction selection routines.
119 private:
120 bool SelectLoad(const Instruction *I);
121 bool SelectStore(const Instruction *I);
122 bool SelectBranch(const Instruction *I);
123 bool SelectIndirectBr(const Instruction *I);
124 bool SelectFPExt(const Instruction *I);
125 bool SelectFPTrunc(const Instruction *I);
126 bool SelectIToFP(const Instruction *I, bool IsSigned);
127 bool SelectFPToI(const Instruction *I, bool IsSigned);
128 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
129 bool SelectRet(const Instruction *I);
130 bool SelectTrunc(const Instruction *I);
131 bool SelectIntExt(const Instruction *I);
132
133 // Utility routines.
134 private:
135 bool isTypeLegal(Type *Ty, MVT &VT);
136 bool isLoadTypeLegal(Type *Ty, MVT &VT);
137 bool isValueAvailable(const Value *V) const;
138 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
139 return RC->getID() == PPC::VSFRCRegClassID;
140 }
141 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
142 return RC->getID() == PPC::VSSRCRegClassID;
143 }
144 Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
145 unsigned Flag = 0, unsigned SubReg = 0) {
146 Register TmpReg = createResultReg(ToRC);
147 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
148 TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
149 return TmpReg;
150 }
151 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt,
152 Register DestReg, const PPC::Predicate Pred);
153 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
154 const TargetRegisterClass *RC, bool IsZExt = true,
155 unsigned FP64LoadOpc = PPC::LFD);
156 bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
157 bool PPCComputeAddress(const Value *Obj, Address &Addr);
158 void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
159 bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
160 bool IsZExt);
161 Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
162 Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
163 Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
164 bool UseSExt = true);
165 Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
166 Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
167 Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
168 bool IsSigned);
169 Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
170
171 // Call handling routines.
172 private:
173 bool processCallArgs(SmallVectorImpl<Value *> &Args,
175 SmallVectorImpl<MVT> &ArgVTs,
178 unsigned &NumBytes, bool IsVarArg);
179 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
180
181 private:
182 #include "PPCGenFastISel.inc"
183
184};
185
186} // end anonymous namespace
187
188static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
189 switch (Pred) {
190 // These are not representable with any single compare.
193 // Major concern about the following 6 cases is NaN result. The comparison
194 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
195 // only one of which will be set. The result is generated by fcmpu
196 // instruction. However, bc instruction only inspects one of the first 3
197 // bits, so when un is set, bc instruction may jump to an undesired
198 // place.
199 //
200 // More specifically, if we expect an unordered comparison and un is set, we
201 // expect to always go to true branch; in such case UEQ, UGT and ULT still
202 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
203 // since they are tested by inspecting !eq, !lt, !gt, respectively.
204 //
205 // Similarly, for ordered comparison, when un is set, we always expect the
206 // result to be false. In such case OGT, OLT and OEQ is good, since they are
207 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
208 // and ONE are tested through !lt, !gt and !eq, and these are true.
215 default:
216 return std::nullopt;
217
219 case CmpInst::ICMP_EQ:
220 return PPC::PRED_EQ;
221
225 return PPC::PRED_GT;
226
230 return PPC::PRED_GE;
231
235 return PPC::PRED_LT;
236
240 return PPC::PRED_LE;
241
243 case CmpInst::ICMP_NE:
244 return PPC::PRED_NE;
245
247 return PPC::PRED_NU;
248
250 return PPC::PRED_UN;
251 }
252}
253
254// Determine whether the type Ty is simple enough to be handled by
255// fast-isel, and return its equivalent machine type in VT.
256// FIXME: Copied directly from ARM -- factor into base class?
257bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
258 EVT Evt = TLI.getValueType(DL, Ty, true);
259
260 // Only handle simple types.
261 if (Evt == MVT::Other || !Evt.isSimple()) return false;
262 VT = Evt.getSimpleVT();
263
264 // Handle all legal types, i.e. a register that will directly hold this
265 // value.
266 return TLI.isTypeLegal(VT);
267}
268
269// Determine whether the type Ty is simple enough to be handled by
270// fast-isel as a load target, and return its equivalent machine type in VT.
271bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
272 if (isTypeLegal(Ty, VT)) return true;
273
274 // If this is a type than can be sign or zero-extended to a basic operation
275 // go ahead and accept it now.
276 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
277 return true;
278 }
279
280 return false;
281}
282
283bool PPCFastISel::isValueAvailable(const Value *V) const {
284 if (!isa<Instruction>(V))
285 return true;
286
287 const auto *I = cast<Instruction>(V);
288 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
289}
290
291// Given a value Obj, create an Address object Addr that represents its
292// address. Return false if we can't handle it.
293bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
294 const User *U = nullptr;
295 unsigned Opcode = Instruction::UserOp1;
296 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
297 // Don't walk into other basic blocks unless the object is an alloca from
298 // another block, otherwise it may not have a virtual register assigned.
299 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
300 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
301 Opcode = I->getOpcode();
302 U = I;
303 }
304 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
305 Opcode = C->getOpcode();
306 U = C;
307 }
308
309 switch (Opcode) {
310 default:
311 break;
312 case Instruction::BitCast:
313 // Look through bitcasts.
314 return PPCComputeAddress(U->getOperand(0), Addr);
315 case Instruction::IntToPtr:
316 // Look past no-op inttoptrs.
317 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
318 TLI.getPointerTy(DL))
319 return PPCComputeAddress(U->getOperand(0), Addr);
320 break;
321 case Instruction::PtrToInt:
322 // Look past no-op ptrtoints.
323 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
324 return PPCComputeAddress(U->getOperand(0), Addr);
325 break;
326 case Instruction::GetElementPtr: {
327 Address SavedAddr = Addr;
328 int64_t TmpOffset = Addr.Offset;
329
330 // Iterate through the GEP folding the constants into offsets where
331 // we can.
333 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
334 II != IE; ++II, ++GTI) {
335 const Value *Op = *II;
336 if (StructType *STy = GTI.getStructTypeOrNull()) {
337 const StructLayout *SL = DL.getStructLayout(STy);
338 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
339 TmpOffset += SL->getElementOffset(Idx);
340 } else {
341 uint64_t S = GTI.getSequentialElementStride(DL);
342 for (;;) {
343 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
344 // Constant-offset addressing.
345 TmpOffset += CI->getSExtValue() * S;
346 break;
347 }
348 if (canFoldAddIntoGEP(U, Op)) {
349 // A compatible add with a constant operand. Fold the constant.
350 ConstantInt *CI =
351 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
352 TmpOffset += CI->getSExtValue() * S;
353 // Iterate on the other operand.
354 Op = cast<AddOperator>(Op)->getOperand(0);
355 continue;
356 }
357 // Unsupported
358 goto unsupported_gep;
359 }
360 }
361 }
362
363 // Try to grab the base operand now.
364 Addr.Offset = TmpOffset;
365 if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
366
367 // We failed, restore everything and try the other options.
368 Addr = SavedAddr;
369
370 unsupported_gep:
371 break;
372 }
373 case Instruction::Alloca: {
374 const AllocaInst *AI = cast<AllocaInst>(Obj);
375 DenseMap<const AllocaInst*, int>::iterator SI =
376 FuncInfo.StaticAllocaMap.find(AI);
377 if (SI != FuncInfo.StaticAllocaMap.end()) {
378 Addr.BaseType = Address::FrameIndexBase;
379 Addr.Base.FI = SI->second;
380 return true;
381 }
382 break;
383 }
384 }
385
386 // FIXME: References to parameters fall through to the behavior
387 // below. They should be able to reference a frame index since
388 // they are stored to the stack, so we can get "ld rx, offset(r1)"
389 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
390 // just contain the parameter. Try to handle this with a FI.
391
392 // Try to get this in a register if nothing else has worked.
393 if (Addr.Base.Reg == 0)
394 Addr.Base.Reg = getRegForValue(Obj);
395
396 // Prevent assignment of base register to X0, which is inappropriate
397 // for loads and stores alike.
398 if (Addr.Base.Reg != 0)
399 MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
400
401 return Addr.Base.Reg != 0;
402}
403
404// Fix up some addresses that can't be used directly. For example, if
405// an offset won't fit in an instruction field, we may need to move it
406// into an index register.
407void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
408 Register &IndexReg) {
409
410 // Check whether the offset fits in the instruction field.
411 if (!isInt<16>(Addr.Offset))
412 UseOffset = false;
413
414 // If this is a stack pointer and the offset needs to be simplified then
415 // put the alloca address into a register, set the base type back to
416 // register and continue. This should almost never happen.
417 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
418 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
419 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
420 ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
421 Addr.Base.Reg = ResultReg;
422 Addr.BaseType = Address::RegBase;
423 }
424
425 if (!UseOffset) {
426 IntegerType *OffsetTy = Type::getInt64Ty(*Context);
427 const ConstantInt *Offset = ConstantInt::getSigned(OffsetTy, Addr.Offset);
428 IndexReg = PPCMaterializeInt(Offset, MVT::i64);
429 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
430 }
431}
432
433// Emit a load instruction if possible, returning true if we succeeded,
434// otherwise false. See commentary below for how the register class of
435// the load is determined.
436bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
437 const TargetRegisterClass *RC,
438 bool IsZExt, unsigned FP64LoadOpc) {
439 unsigned Opc;
440 bool UseOffset = true;
441 bool HasSPE = Subtarget->hasSPE();
442
443 // If ResultReg is given, it determines the register class of the load.
444 // Otherwise, RC is the register class to use. If the result of the
445 // load isn't anticipated in this block, both may be zero, in which
446 // case we must make a conservative guess. In particular, don't assign
447 // R0 or X0 to the result register, as the result may be used in a load,
448 // store, add-immediate, or isel that won't permit this. (Though
449 // perhaps the spill and reload of live-exit values would handle this?)
450 const TargetRegisterClass *UseRC =
451 (ResultReg ? MRI.getRegClass(ResultReg) :
452 (RC ? RC :
453 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
454 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
455 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
456 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
457
458 bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
459
460 switch (VT.SimpleTy) {
461 default: // e.g., vector types not handled
462 return false;
463 case MVT::i8:
464 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
465 break;
466 case MVT::i16:
467 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
468 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
469 break;
470 case MVT::i32:
471 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
472 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
473 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
474 UseOffset = false;
475 break;
476 case MVT::i64:
477 Opc = PPC::LD;
478 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
479 "64-bit load with 32-bit target??");
480 UseOffset = ((Addr.Offset & 3) == 0);
481 break;
482 case MVT::f32:
483 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
484 break;
485 case MVT::f64:
486 Opc = FP64LoadOpc;
487 break;
488 }
489
490 // If necessary, materialize the offset into a register and use
491 // the indexed form. Also handle stack pointers with special needs.
492 Register IndexReg;
493 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
494
495 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
496 // be used.
497 bool IsVSSRC = isVSSRCRegClass(UseRC);
498 bool IsVSFRC = isVSFRCRegClass(UseRC);
499 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
500 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
501 if ((Is32VSXLoad || Is64VSXLoad) &&
502 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
503 (Addr.Offset == 0)) {
504 UseOffset = false;
505 }
506
507 if (!ResultReg)
508 ResultReg = createResultReg(UseRC);
509
510 // Note: If we still have a frame index here, we know the offset is
511 // in range, as otherwise PPCSimplifyAddress would have converted it
512 // into a RegBase.
513 if (Addr.BaseType == Address::FrameIndexBase) {
514 // VSX only provides an indexed load.
515 if (Is32VSXLoad || Is64VSXLoad) return false;
516
517 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
518 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
519 Addr.Offset),
520 MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
521 MFI.getObjectAlign(Addr.Base.FI));
522
523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
524 .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
525
526 // Base reg with offset in range.
527 } else if (UseOffset) {
528 // VSX only provides an indexed load.
529 if (Is32VSXLoad || Is64VSXLoad) return false;
530
531 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
532 .addImm(Addr.Offset).addReg(Addr.Base.Reg);
533
534 // Indexed form.
535 } else {
536 // Get the RR opcode corresponding to the RI one. FIXME: It would be
537 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
538 // is hard to get at.
539 switch (Opc) {
540 default: llvm_unreachable("Unexpected opcode!");
541 case PPC::LBZ: Opc = PPC::LBZX; break;
542 case PPC::LBZ8: Opc = PPC::LBZX8; break;
543 case PPC::LHZ: Opc = PPC::LHZX; break;
544 case PPC::LHZ8: Opc = PPC::LHZX8; break;
545 case PPC::LHA: Opc = PPC::LHAX; break;
546 case PPC::LHA8: Opc = PPC::LHAX8; break;
547 case PPC::LWZ: Opc = PPC::LWZX; break;
548 case PPC::LWZ8: Opc = PPC::LWZX8; break;
549 case PPC::LWA: Opc = PPC::LWAX; break;
550 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
551 case PPC::LD: Opc = PPC::LDX; break;
552 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
553 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
554 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
555 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
556 }
557
558 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
559 ResultReg);
560
561 // If we have an index register defined we use it in the store inst,
562 // otherwise we use X0 as base as it makes the vector instructions to
563 // use zero in the computation of the effective address regardless the
564 // content of the register.
565 if (IndexReg)
566 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
567 else
568 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
569 }
570
571 return true;
572}
573
574// Attempt to fast-select a load instruction.
575bool PPCFastISel::SelectLoad(const Instruction *I) {
576 // FIXME: No atomic loads are supported.
577 if (cast<LoadInst>(I)->isAtomic())
578 return false;
579
580 // Verify we have a legal type before going any further.
581 MVT VT;
582 if (!isLoadTypeLegal(I->getType(), VT))
583 return false;
584
585 // See if we can handle this address.
586 Address Addr;
587 if (!PPCComputeAddress(I->getOperand(0), Addr))
588 return false;
589
590 // Look at the currently assigned register for this instruction
591 // to determine the required register class. This is necessary
592 // to constrain RA from using R0/X0 when this is not legal.
593 Register AssignedReg = FuncInfo.ValueMap[I];
594 const TargetRegisterClass *RC =
595 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
596
597 Register ResultReg = 0;
598 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
599 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
600 return false;
601 updateValueMap(I, ResultReg);
602 return true;
603}
604
605// Emit a store instruction to store SrcReg at Addr.
606bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
607 assert(SrcReg && "Nothing to store!");
608 unsigned Opc;
609 bool UseOffset = true;
610
611 const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
612 bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
613
614 switch (VT.SimpleTy) {
615 default: // e.g., vector types not handled
616 return false;
617 case MVT::i8:
618 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
619 break;
620 case MVT::i16:
621 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
622 break;
623 case MVT::i32:
624 assert(Is32BitInt && "Not GPRC for i32??");
625 Opc = PPC::STW;
626 break;
627 case MVT::i64:
628 Opc = PPC::STD;
629 UseOffset = ((Addr.Offset & 3) == 0);
630 break;
631 case MVT::f32:
632 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
633 break;
634 case MVT::f64:
635 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
636 break;
637 }
638
639 // If necessary, materialize the offset into a register and use
640 // the indexed form. Also handle stack pointers with special needs.
641 Register IndexReg;
642 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
643
644 // If this is a potential VSX store with an offset of 0, a VSX indexed store
645 // can be used.
646 bool IsVSSRC = isVSSRCRegClass(RC);
647 bool IsVSFRC = isVSFRCRegClass(RC);
648 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
649 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
650 if ((Is32VSXStore || Is64VSXStore) &&
651 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
652 (Addr.Offset == 0)) {
653 UseOffset = false;
654 }
655
656 // Note: If we still have a frame index here, we know the offset is
657 // in range, as otherwise PPCSimplifyAddress would have converted it
658 // into a RegBase.
659 if (Addr.BaseType == Address::FrameIndexBase) {
660 // VSX only provides an indexed store.
661 if (Is32VSXStore || Is64VSXStore) return false;
662
663 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
664 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
665 Addr.Offset),
666 MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
667 MFI.getObjectAlign(Addr.Base.FI));
668
669 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
670 .addReg(SrcReg)
671 .addImm(Addr.Offset)
672 .addFrameIndex(Addr.Base.FI)
673 .addMemOperand(MMO);
674
675 // Base reg with offset in range.
676 } else if (UseOffset) {
677 // VSX only provides an indexed store.
678 if (Is32VSXStore || Is64VSXStore)
679 return false;
680
681 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
682 .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
683
684 // Indexed form.
685 } else {
686 // Get the RR opcode corresponding to the RI one. FIXME: It would be
687 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
688 // is hard to get at.
689 switch (Opc) {
690 default: llvm_unreachable("Unexpected opcode!");
691 case PPC::STB: Opc = PPC::STBX; break;
692 case PPC::STH : Opc = PPC::STHX; break;
693 case PPC::STW : Opc = PPC::STWX; break;
694 case PPC::STB8: Opc = PPC::STBX8; break;
695 case PPC::STH8: Opc = PPC::STHX8; break;
696 case PPC::STW8: Opc = PPC::STWX8; break;
697 case PPC::STD: Opc = PPC::STDX; break;
698 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
699 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
700 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
701 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
702 }
703
704 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
705 .addReg(SrcReg);
706
707 // If we have an index register defined we use it in the store inst,
708 // otherwise we use X0 as base as it makes the vector instructions to
709 // use zero in the computation of the effective address regardless the
710 // content of the register.
711 if (IndexReg)
712 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
713 else
714 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
715 }
716
717 return true;
718}
719
720// Attempt to fast-select a store instruction.
721bool PPCFastISel::SelectStore(const Instruction *I) {
722 Value *Op0 = I->getOperand(0);
723 Register SrcReg;
724
725 // FIXME: No atomics loads are supported.
726 if (cast<StoreInst>(I)->isAtomic())
727 return false;
728
729 // Verify we have a legal type before going any further.
730 MVT VT;
731 if (!isLoadTypeLegal(Op0->getType(), VT))
732 return false;
733
734 // Get the value to be stored into a register.
735 SrcReg = getRegForValue(Op0);
736 if (!SrcReg)
737 return false;
738
739 // See if we can handle this address.
740 Address Addr;
741 if (!PPCComputeAddress(I->getOperand(1), Addr))
742 return false;
743
744 if (!PPCEmitStore(VT, SrcReg, Addr))
745 return false;
746
747 return true;
748}
749
750// Attempt to fast-select a branch instruction.
751bool PPCFastISel::SelectBranch(const Instruction *I) {
752 const BranchInst *BI = cast<BranchInst>(I);
753 MachineBasicBlock *BrBB = FuncInfo.MBB;
754 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
755 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
756
757 // For now, just try the simplest case where it's fed by a compare.
758 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
759 if (isValueAvailable(CI)) {
760 std::optional<PPC::Predicate> OptPPCPred =
761 getComparePred(CI->getPredicate());
762 if (!OptPPCPred)
763 return false;
764
765 PPC::Predicate PPCPred = *OptPPCPred;
766
767 // Take advantage of fall-through opportunities.
768 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
769 std::swap(TBB, FBB);
770 PPCPred = PPC::InvertPredicate(PPCPred);
771 }
772
773 Register CondReg = createResultReg(&PPC::CRRCRegClass);
774
775 if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
776 CondReg, PPCPred))
777 return false;
778
779 BuildMI(*BrBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCC))
780 .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
781 .addReg(CondReg)
782 .addMBB(TBB);
783 finishCondBranch(BI->getParent(), TBB, FBB);
784 return true;
785 }
786 } else if (const ConstantInt *CI =
788 uint64_t Imm = CI->getZExtValue();
789 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
790 fastEmitBranch(Target, MIMD.getDL());
791 return true;
792 }
793
794 // FIXME: ARM looks for a case where the block containing the compare
795 // has been split from the block containing the branch. If this happens,
796 // there is a vreg available containing the result of the compare. I'm
797 // not sure we can do much, as we've lost the predicate information with
798 // the compare instruction -- we have a 4-bit CR but don't know which bit
799 // to test here.
800 return false;
801}
802
803// Attempt to emit a compare of the two source values. Signed and unsigned
804// comparisons are supported. Return false if we can't handle it.
805bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
806 bool IsZExt, Register DestReg,
807 const PPC::Predicate Pred) {
808 Type *Ty = SrcValue1->getType();
809 EVT SrcEVT = TLI.getValueType(DL, Ty, true);
810 if (!SrcEVT.isSimple())
811 return false;
812 MVT SrcVT = SrcEVT.getSimpleVT();
813
814 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
815 return false;
816
817 // See if operand 2 is an immediate encodeable in the compare.
818 // FIXME: Operands are not in canonical order at -O0, so an immediate
819 // operand in position 1 is a lost opportunity for now. We are
820 // similar to ARM in this regard.
821 int64_t Imm = 0;
822 bool UseImm = false;
823 const bool HasSPE = Subtarget->hasSPE();
824
825 // Only 16-bit integer constants can be represented in compares for
826 // PowerPC. Others will be materialized into a register.
827 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
828 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
829 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
830 const APInt &CIVal = ConstInt->getValue();
831 Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
832 (int64_t)CIVal.getSExtValue();
833 if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
834 UseImm = true;
835 }
836 }
837
838 Register SrcReg1 = getRegForValue(SrcValue1);
839 if (!SrcReg1)
840 return false;
841
842 Register SrcReg2;
843 if (!UseImm) {
844 SrcReg2 = getRegForValue(SrcValue2);
845 if (!SrcReg2)
846 return false;
847 }
848
849 unsigned CmpOpc;
850 bool NeedsExt = false;
851
852 auto RC1 = MRI.getRegClass(SrcReg1);
853 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
854
855 switch (SrcVT.SimpleTy) {
856 default: return false;
857 case MVT::f32:
858 if (HasSPE) {
859 switch (Pred) {
860 default: return false;
861 case PPC::PRED_EQ:
862 CmpOpc = PPC::EFSCMPEQ;
863 break;
864 case PPC::PRED_LT:
865 CmpOpc = PPC::EFSCMPLT;
866 break;
867 case PPC::PRED_GT:
868 CmpOpc = PPC::EFSCMPGT;
869 break;
870 }
871 } else {
872 CmpOpc = PPC::FCMPUS;
873 if (isVSSRCRegClass(RC1))
874 SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
875 if (RC2 && isVSSRCRegClass(RC2))
876 SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
877 }
878 break;
879 case MVT::f64:
880 if (HasSPE) {
881 switch (Pred) {
882 default: return false;
883 case PPC::PRED_EQ:
884 CmpOpc = PPC::EFDCMPEQ;
885 break;
886 case PPC::PRED_LT:
887 CmpOpc = PPC::EFDCMPLT;
888 break;
889 case PPC::PRED_GT:
890 CmpOpc = PPC::EFDCMPGT;
891 break;
892 }
893 } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
894 CmpOpc = PPC::XSCMPUDP;
895 } else {
896 CmpOpc = PPC::FCMPUD;
897 }
898 break;
899 case MVT::i1:
900 case MVT::i8:
901 case MVT::i16:
902 NeedsExt = true;
903 [[fallthrough]];
904 case MVT::i32:
905 if (!UseImm)
906 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
907 else
908 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
909 break;
910 case MVT::i64:
911 if (!UseImm)
912 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
913 else
914 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
915 break;
916 }
917
918 if (NeedsExt) {
919 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
920 if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
921 return false;
922 SrcReg1 = ExtReg;
923
924 if (!UseImm) {
925 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
926 if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
927 return false;
928 SrcReg2 = ExtReg;
929 }
930 }
931
932 if (!UseImm)
933 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
934 .addReg(SrcReg1).addReg(SrcReg2);
935 else
936 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
937 .addReg(SrcReg1).addImm(Imm);
938
939 return true;
940}
941
942// Attempt to fast-select a floating-point extend instruction.
943bool PPCFastISel::SelectFPExt(const Instruction *I) {
944 Value *Src = I->getOperand(0);
945 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
946 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
947
948 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
949 return false;
950
951 Register SrcReg = getRegForValue(Src);
952 if (!SrcReg)
953 return false;
954
955 // No code is generated for a FP extend.
956 updateValueMap(I, SrcReg);
957 return true;
958}
959
960// Attempt to fast-select a floating-point truncate instruction.
961bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
962 Value *Src = I->getOperand(0);
963 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
964 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
965
966 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
967 return false;
968
969 Register SrcReg = getRegForValue(Src);
970 if (!SrcReg)
971 return false;
972
973 // Round the result to single precision.
974 Register DestReg;
975 auto RC = MRI.getRegClass(SrcReg);
976 if (Subtarget->hasSPE()) {
977 DestReg = createResultReg(&PPC::GPRCRegClass);
978 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::EFSCFD),
979 DestReg)
980 .addReg(SrcReg);
981 } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
982 DestReg = createResultReg(&PPC::VSSRCRegClass);
983 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::XSRSP),
984 DestReg)
985 .addReg(SrcReg);
986 } else {
987 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
988 DestReg = createResultReg(&PPC::F4RCRegClass);
989 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
990 TII.get(PPC::FRSP), DestReg)
991 .addReg(SrcReg);
992 }
993
994 updateValueMap(I, DestReg);
995 return true;
996}
997
998// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
999// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1000// those should be used instead of moving via a stack slot when the
1001// subtarget permits.
1002// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1003// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1004// case to 8 bytes which produces tighter code but wastes stack space.
1005Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1006 bool IsSigned) {
1007
1008 // If necessary, extend 32-bit int to 64-bit.
1009 if (SrcVT == MVT::i32) {
1010 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1011 if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
1012 return Register();
1013 SrcReg = TmpReg;
1014 }
1015
1016 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1017 Address Addr;
1018 Addr.BaseType = Address::FrameIndexBase;
1019 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1020
1021 // Store the value from the GPR.
1022 if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
1023 return Register();
1024
1025 // Load the integer value into an FPR. The kind of load used depends
1026 // on a number of conditions.
1027 unsigned LoadOpc = PPC::LFD;
1028
1029 if (SrcVT == MVT::i32) {
1030 if (!IsSigned) {
1031 LoadOpc = PPC::LFIWZX;
1032 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1033 } else if (Subtarget->hasLFIWAX()) {
1034 LoadOpc = PPC::LFIWAX;
1035 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1036 }
1037 }
1038
1039 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1040 Register ResultReg;
1041 if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1042 return Register();
1043
1044 return ResultReg;
1045}
1046
1047// Attempt to fast-select an integer-to-floating-point conversion.
1048// FIXME: Once fast-isel has better support for VSX, conversions using
1049// direct moves should be implemented.
1050bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1051 MVT DstVT;
1052 Type *DstTy = I->getType();
1053 if (!isTypeLegal(DstTy, DstVT))
1054 return false;
1055
1056 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1057 return false;
1058
1059 Value *Src = I->getOperand(0);
1060 EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1061 if (!SrcEVT.isSimple())
1062 return false;
1063
1064 MVT SrcVT = SrcEVT.getSimpleVT();
1065
1066 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1067 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1068 return false;
1069
1070 Register SrcReg = getRegForValue(Src);
1071 if (!SrcReg)
1072 return false;
1073
1074 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1075 if (Subtarget->hasSPE()) {
1076 unsigned Opc;
1077 if (DstVT == MVT::f32)
1078 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1079 else
1080 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1081
1082 Register DestReg = createResultReg(&PPC::SPERCRegClass);
1083 // Generate the convert.
1084 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1085 .addReg(SrcReg);
1086 updateValueMap(I, DestReg);
1087 return true;
1088 }
1089
1090 // We can only lower an unsigned convert if we have the newer
1091 // floating-point conversion operations.
1092 if (!IsSigned && !Subtarget->hasFPCVT())
1093 return false;
1094
1095 // FIXME: For now we require the newer floating-point conversion operations
1096 // (which are present only on P7 and A2 server models) when converting
1097 // to single-precision float. Otherwise we have to generate a lot of
1098 // fiddly code to avoid double rounding. If necessary, the fiddly code
1099 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1100 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1101 return false;
1102
1103 // Extend the input if necessary.
1104 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1105 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1106 if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1107 return false;
1108 SrcVT = MVT::i64;
1109 SrcReg = TmpReg;
1110 }
1111
1112 // Move the integer value to an FPR.
1113 Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1114 if (!FPReg)
1115 return false;
1116
1117 // Determine the opcode for the conversion.
1118 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1119 Register DestReg = createResultReg(RC);
1120 unsigned Opc;
1121
1122 if (DstVT == MVT::f32)
1123 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1124 else
1125 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1126
1127 // Generate the convert.
1128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1129 .addReg(FPReg);
1130
1131 updateValueMap(I, DestReg);
1132 return true;
1133}
1134
1135// Move the floating-point value in SrcReg into an integer destination
1136// register, and return the register (or zero if we can't handle it).
1137// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1138// those should be used instead of moving via a stack slot when the
1139// subtarget permits.
1140Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1141 Register SrcReg, bool IsSigned) {
1142 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1143 // Note that if have STFIWX available, we could use a 4-byte stack
1144 // slot for i32, but this being fast-isel we'll just go with the
1145 // easiest code gen possible.
1146 Address Addr;
1147 Addr.BaseType = Address::FrameIndexBase;
1148 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1149
1150 // Store the value from the FPR.
1151 if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1152 return Register();
1153
1154 // Reload it into a GPR. If we want an i32 on big endian, modify the
1155 // address to have a 4-byte offset so we load from the right place.
1156 if (VT == MVT::i32)
1157 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1158
1159 // Look at the currently assigned register for this instruction
1160 // to determine the required register class.
1161 Register AssignedReg = FuncInfo.ValueMap[I];
1162 const TargetRegisterClass *RC =
1163 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1164
1165 Register ResultReg;
1166 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1167 return Register();
1168
1169 return ResultReg;
1170}
1171
1172// Attempt to fast-select a floating-point-to-integer conversion.
1173// FIXME: Once fast-isel has better support for VSX, conversions using
1174// direct moves should be implemented.
1175bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1176 MVT DstVT, SrcVT;
1177 Type *DstTy = I->getType();
1178 if (!isTypeLegal(DstTy, DstVT))
1179 return false;
1180
1181 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1182 return false;
1183
1184 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1185 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1186 !Subtarget->hasSPE())
1187 return false;
1188
1189 Value *Src = I->getOperand(0);
1190 Type *SrcTy = Src->getType();
1191 if (!isTypeLegal(SrcTy, SrcVT))
1192 return false;
1193
1194 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1195 return false;
1196
1197 Register SrcReg = getRegForValue(Src);
1198 if (!SrcReg)
1199 return false;
1200
1201 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1202 // meaningless copy to get the register class right.
1203 const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1204 if (InRC == &PPC::F4RCRegClass)
1205 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1206 else if (InRC == &PPC::VSSRCRegClass)
1207 SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
1208
1209 // Determine the opcode for the conversion, which takes place
1210 // entirely within FPRs or VSRs.
1211 Register DestReg;
1212 unsigned Opc;
1213 auto RC = MRI.getRegClass(SrcReg);
1214
1215 if (Subtarget->hasSPE()) {
1216 DestReg = createResultReg(&PPC::GPRCRegClass);
1217 if (IsSigned)
1218 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1219 else
1220 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1221 } else if (isVSFRCRegClass(RC)) {
1222 DestReg = createResultReg(&PPC::VSFRCRegClass);
1223 if (DstVT == MVT::i32)
1224 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1225 else
1226 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1227 } else {
1228 DestReg = createResultReg(&PPC::F8RCRegClass);
1229 if (DstVT == MVT::i32)
1230 if (IsSigned)
1231 Opc = PPC::FCTIWZ;
1232 else
1233 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1234 else
1235 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1236 }
1237
1238 // Generate the convert.
1239 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1240 .addReg(SrcReg);
1241
1242 // Now move the integer value from a float register to an integer register.
1243 Register IntReg = Subtarget->hasSPE()
1244 ? DestReg
1245 : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1246
1247 if (!IntReg)
1248 return false;
1249
1250 updateValueMap(I, IntReg);
1251 return true;
1252}
1253
1254// Attempt to fast-select a binary integer operation that isn't already
1255// handled automatically.
1256bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1257 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1258
1259 // We can get here in the case when we have a binary operation on a non-legal
1260 // type and the target independent selector doesn't know how to handle it.
1261 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1262 return false;
1263
1264 // Look at the currently assigned register for this instruction
1265 // to determine the required register class. If there is no register,
1266 // make a conservative choice (don't assign R0).
1267 Register AssignedReg = FuncInfo.ValueMap[I];
1268 const TargetRegisterClass *RC =
1269 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1270 &PPC::GPRC_and_GPRC_NOR0RegClass);
1271 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1272
1273 unsigned Opc;
1274 switch (ISDOpcode) {
1275 default: return false;
1276 case ISD::ADD:
1277 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1278 break;
1279 case ISD::OR:
1280 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1281 break;
1282 case ISD::SUB:
1283 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1284 break;
1285 }
1286
1287 Register ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1288 Register SrcReg1 = getRegForValue(I->getOperand(0));
1289 if (!SrcReg1)
1290 return false;
1291
1292 // Handle case of small immediate operand.
1293 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1294 const APInt &CIVal = ConstInt->getValue();
1295 int Imm = (int)CIVal.getSExtValue();
1296 bool UseImm = true;
1297 if (isInt<16>(Imm)) {
1298 switch (Opc) {
1299 default:
1300 llvm_unreachable("Missing case!");
1301 case PPC::ADD4:
1302 Opc = PPC::ADDI;
1303 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1304 break;
1305 case PPC::ADD8:
1306 Opc = PPC::ADDI8;
1307 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1308 break;
1309 case PPC::OR:
1310 Opc = PPC::ORI;
1311 break;
1312 case PPC::OR8:
1313 Opc = PPC::ORI8;
1314 break;
1315 case PPC::SUBF:
1316 if (Imm == -32768)
1317 UseImm = false;
1318 else {
1319 Opc = PPC::ADDI;
1320 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1321 Imm = -Imm;
1322 }
1323 break;
1324 case PPC::SUBF8:
1325 if (Imm == -32768)
1326 UseImm = false;
1327 else {
1328 Opc = PPC::ADDI8;
1329 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1330 Imm = -Imm;
1331 }
1332 break;
1333 }
1334
1335 if (UseImm) {
1336 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
1337 ResultReg)
1338 .addReg(SrcReg1)
1339 .addImm(Imm);
1340 updateValueMap(I, ResultReg);
1341 return true;
1342 }
1343 }
1344 }
1345
1346 // Reg-reg case.
1347 Register SrcReg2 = getRegForValue(I->getOperand(1));
1348 if (!SrcReg2)
1349 return false;
1350
1351 // Reverse operands for subtract-from.
1352 if (ISDOpcode == ISD::SUB)
1353 std::swap(SrcReg1, SrcReg2);
1354
1355 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
1356 .addReg(SrcReg1).addReg(SrcReg2);
1357 updateValueMap(I, ResultReg);
1358 return true;
1359}
1360
1361// Handle arguments to a call that we're attempting to fast-select.
1362// Return false if the arguments are too complex for us at the moment.
1363bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1364 SmallVectorImpl<Register> &ArgRegs,
1365 SmallVectorImpl<MVT> &ArgVTs,
1366 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1367 SmallVectorImpl<unsigned> &RegArgs,
1368 CallingConv::ID CC, unsigned &NumBytes,
1369 bool IsVarArg) {
1371 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1372
1373 // Reserve space for the linkage area on the stack.
1374 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1375 CCInfo.AllocateStack(LinkageSize, Align(8));
1376
1378 for (Value *Arg : Args)
1379 ArgTys.push_back(Arg->getType());
1380 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, ArgTys, CC_PPC64_ELF_FIS);
1381
1382 // Bail out if we can't handle any of the arguments.
1383 for (const CCValAssign &VA : ArgLocs) {
1384 MVT ArgVT = ArgVTs[VA.getValNo()];
1385
1386 // Skip vector arguments for now, as well as long double and
1387 // uint128_t, and anything that isn't passed in a register.
1388 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1389 !VA.isRegLoc() || VA.needsCustom())
1390 return false;
1391
1392 // Skip bit-converted arguments for now.
1393 if (VA.getLocInfo() == CCValAssign::BCvt)
1394 return false;
1395 }
1396
1397 // Get a count of how many bytes are to be pushed onto the stack.
1398 NumBytes = CCInfo.getStackSize();
1399
1400 // The prolog code of the callee may store up to 8 GPR argument registers to
1401 // the stack, allowing va_start to index over them in memory if its varargs.
1402 // Because we cannot tell if this is needed on the caller side, we have to
1403 // conservatively assume that it is needed. As such, make sure we have at
1404 // least enough stack space for the caller to store the 8 GPRs.
1405 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1406 NumBytes = std::max(NumBytes, LinkageSize + 64);
1407
1408 // Issue CALLSEQ_START.
1409 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1410 TII.get(TII.getCallFrameSetupOpcode()))
1411 .addImm(NumBytes).addImm(0);
1412
1413 // Prepare to assign register arguments. Every argument uses up a
1414 // GPR protocol register even if it's passed in a floating-point
1415 // register (unless we're using the fast calling convention).
1416 unsigned NextGPR = PPC::X3;
1417 unsigned NextFPR = PPC::F1;
1418
1419 // Process arguments.
1420 for (const CCValAssign &VA : ArgLocs) {
1421 Register Arg = ArgRegs[VA.getValNo()];
1422 MVT ArgVT = ArgVTs[VA.getValNo()];
1423
1424 // Handle argument promotion and bitcasts.
1425 switch (VA.getLocInfo()) {
1426 default:
1427 llvm_unreachable("Unknown loc info!");
1428 case CCValAssign::Full:
1429 break;
1430 case CCValAssign::SExt: {
1431 MVT DestVT = VA.getLocVT();
1432 const TargetRegisterClass *RC =
1433 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1434 Register TmpReg = createResultReg(RC);
1435 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1436 llvm_unreachable("Failed to emit a sext!");
1437 ArgVT = DestVT;
1438 Arg = TmpReg;
1439 break;
1440 }
1441 case CCValAssign::AExt:
1442 case CCValAssign::ZExt: {
1443 MVT DestVT = VA.getLocVT();
1444 const TargetRegisterClass *RC =
1445 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1446 Register TmpReg = createResultReg(RC);
1447 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1448 llvm_unreachable("Failed to emit a zext!");
1449 ArgVT = DestVT;
1450 Arg = TmpReg;
1451 break;
1452 }
1453 case CCValAssign::BCvt: {
1454 // FIXME: Not yet handled.
1455 llvm_unreachable("Should have bailed before getting here!");
1456 break;
1457 }
1458 }
1459
1460 // Copy this argument to the appropriate register.
1461 unsigned ArgReg;
1462 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1463 ArgReg = NextFPR++;
1464 if (CC != CallingConv::Fast)
1465 ++NextGPR;
1466 } else
1467 ArgReg = NextGPR++;
1468
1469 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1470 TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1471 RegArgs.push_back(ArgReg);
1472 }
1473
1474 return true;
1475}
1476
1477// For a call that we've determined we can fast-select, finish the
1478// call sequence and generate a copy to obtain the return value (if any).
1479bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1480 CallingConv::ID CC = CLI.CallConv;
1481
1482 // Issue CallSEQ_END.
1483 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1484 TII.get(TII.getCallFrameDestroyOpcode()))
1485 .addImm(NumBytes).addImm(0);
1486
1487 // Next, generate a copy to obtain the return value.
1488 // FIXME: No multi-register return values yet, though I don't foresee
1489 // any real difficulties there.
1490 if (RetVT != MVT::isVoid) {
1492 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1493 CCInfo.AnalyzeCallResult(RetVT, CLI.RetTy, RetCC_PPC64_ELF_FIS);
1494 CCValAssign &VA = RVLocs[0];
1495 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1496 assert(VA.isRegLoc() && "Can only return in registers!");
1497
1498 MVT DestVT = VA.getValVT();
1499 MVT CopyVT = DestVT;
1500
1501 // Ints smaller than a register still arrive in a full 64-bit
1502 // register, so make sure we recognize this.
1503 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1504 CopyVT = MVT::i64;
1505
1506 Register SourcePhysReg = VA.getLocReg();
1507 Register ResultReg;
1508
1509 if (RetVT == CopyVT) {
1510 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1511 ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
1512
1513 // If necessary, round the floating result to single precision.
1514 } else if (CopyVT == MVT::f64) {
1515 ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1516 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::FRSP),
1517 ResultReg).addReg(SourcePhysReg);
1518
1519 // If only the low half of a general register is needed, generate
1520 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1521 // used along the fast-isel path (not lowered), and downstream logic
1522 // also doesn't like a direct subreg copy on a physical reg.)
1523 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1524 // Convert physical register from G8RC to GPRC.
1525 SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1526 ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
1527 }
1528
1529 assert(ResultReg && "ResultReg unset!");
1530 CLI.InRegs.push_back(SourcePhysReg);
1531 CLI.ResultReg = ResultReg;
1532 CLI.NumResultRegs = 1;
1533 }
1534
1535 return true;
1536}
1537
1538bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1539 CallingConv::ID CC = CLI.CallConv;
1540 bool IsTailCall = CLI.IsTailCall;
1541 bool IsVarArg = CLI.IsVarArg;
1542 const Value *Callee = CLI.Callee;
1543 const MCSymbol *Symbol = CLI.Symbol;
1544
1545 if (!Callee && !Symbol)
1546 return false;
1547
1548 // Allow SelectionDAG isel to handle tail calls and long calls.
1549 if (IsTailCall || Subtarget->useLongCalls())
1550 return false;
1551
1552 // Let SDISel handle vararg functions.
1553 if (IsVarArg)
1554 return false;
1555
1556 // If this is a PC-Rel function, let SDISel handle the call.
1557 if (Subtarget->isUsingPCRelativeCalls())
1558 return false;
1559
1560 // Handle simple calls for now, with legal return types and
1561 // those that can be extended.
1562 Type *RetTy = CLI.RetTy;
1563 MVT RetVT;
1564 if (RetTy->isVoidTy())
1565 RetVT = MVT::isVoid;
1566 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1567 RetVT != MVT::i8)
1568 return false;
1569 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1570 // We can't handle boolean returns when CR bits are in use.
1571 return false;
1572
1573 // FIXME: No multi-register return values yet.
1574 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1575 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1576 RetVT != MVT::f64) {
1578 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1579 CCInfo.AnalyzeCallResult(RetVT, RetTy, RetCC_PPC64_ELF_FIS);
1580 if (RVLocs.size() > 1)
1581 return false;
1582 }
1583
1584 // Bail early if more than 8 arguments, as we only currently
1585 // handle arguments passed in registers.
1586 unsigned NumArgs = CLI.OutVals.size();
1587 if (NumArgs > 8)
1588 return false;
1589
1590 // Set up the argument vectors.
1591 SmallVector<Value*, 8> Args;
1593 SmallVector<MVT, 8> ArgVTs;
1595
1596 Args.reserve(NumArgs);
1597 ArgRegs.reserve(NumArgs);
1598 ArgVTs.reserve(NumArgs);
1599 ArgFlags.reserve(NumArgs);
1600
1601 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1602 // Only handle easy calls for now. It would be reasonably easy
1603 // to handle <= 8-byte structures passed ByVal in registers, but we
1604 // have to ensure they are right-justified in the register.
1605 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1606 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1607 return false;
1608
1609 Value *ArgValue = CLI.OutVals[i];
1610 Type *ArgTy = ArgValue->getType();
1611 MVT ArgVT;
1612 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1613 return false;
1614
1615 // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1616 // types, which is passed through vector register. Skip these types and
1617 // fallback to default SelectionDAG based selection.
1618 if (ArgVT.isVector() || ArgVT == MVT::f128)
1619 return false;
1620
1621 Register Arg = getRegForValue(ArgValue);
1622 if (!Arg)
1623 return false;
1624
1625 Args.push_back(ArgValue);
1626 ArgRegs.push_back(Arg);
1627 ArgVTs.push_back(ArgVT);
1628 ArgFlags.push_back(Flags);
1629 }
1630
1631 // Process the arguments.
1632 SmallVector<unsigned, 8> RegArgs;
1633 unsigned NumBytes;
1634
1635 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1636 RegArgs, CC, NumBytes, IsVarArg))
1637 return false;
1638
1639 MachineInstrBuilder MIB;
1640 // FIXME: No handling for function pointers yet. This requires
1641 // implementing the function descriptor (OPD) setup.
1642 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1643 if (!GV) {
1644 // patchpoints are a special case; they always dispatch to a pointer value.
1645 // However, we don't actually want to generate the indirect call sequence
1646 // here (that will be generated, as necessary, during asm printing), and
1647 // the call we generate here will be erased by FastISel::selectPatchpoint,
1648 // so don't try very hard...
1649 if (CLI.IsPatchPoint)
1650 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::NOP));
1651 else
1652 return false;
1653 } else {
1654 // Build direct call with NOP for TOC restore.
1655 // FIXME: We can and should optimize away the NOP for local calls.
1656 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1657 TII.get(PPC::BL8_NOP));
1658 // Add callee.
1659 MIB.addGlobalAddress(GV);
1660 }
1661
1662 // Add implicit physical register uses to the call.
1663 for (unsigned Reg : RegArgs)
1665
1666 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1667 // into the call.
1668 PPCFuncInfo->setUsesTOCBasePtr();
1669 MIB.addReg(PPC::X2, RegState::Implicit);
1670
1671 // Add a register mask with the call-preserved registers. Proper
1672 // defs for return values will be added by setPhysRegsDeadExcept().
1673 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1674
1675 CLI.Call = MIB;
1676
1677 // Finish off the call including any return values.
1678 return finishCall(RetVT, CLI, NumBytes);
1679}
1680
1681// Attempt to fast-select a return instruction.
1682bool PPCFastISel::SelectRet(const Instruction *I) {
1683
1684 if (!FuncInfo.CanLowerReturn)
1685 return false;
1686
1687 const ReturnInst *Ret = cast<ReturnInst>(I);
1688 const Function &F = *I->getParent()->getParent();
1689
1690 // Build a list of return value registers.
1692 CallingConv::ID CC = F.getCallingConv();
1693
1694 if (Ret->getNumOperands() > 0) {
1696 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1697
1698 // Analyze operands of the call, assigning locations to each operand.
1700 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1701 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1702 const Value *RV = Ret->getOperand(0);
1703
1704 // FIXME: Only one output register for now.
1705 if (ValLocs.size() > 1)
1706 return false;
1707
1708 // Special case for returning a constant integer of any size - materialize
1709 // the constant as an i64 and copy it to the return register.
1710 if (isa<ConstantInt>(RV) && RV->getType()->isIntegerTy()) {
1711 const ConstantInt *CI = cast<ConstantInt>(RV);
1712 CCValAssign &VA = ValLocs[0];
1713
1714 Register RetReg = VA.getLocReg();
1715 // We still need to worry about properly extending the sign. For example,
1716 // we could have only a single bit or a constant that needs zero
1717 // extension rather than sign extension. Make sure we pass the return
1718 // value extension property to integer materialization.
1719 Register SrcReg =
1720 PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1721
1722 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1723 TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1724
1725 RetRegs.push_back(RetReg);
1726
1727 } else {
1728 Register Reg = getRegForValue(RV);
1729
1730 if (!Reg)
1731 return false;
1732
1733 // Copy the result values into the output registers.
1734 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1735
1736 CCValAssign &VA = ValLocs[i];
1737 assert(VA.isRegLoc() && "Can only return in registers!");
1738 RetRegs.push_back(VA.getLocReg());
1739 Register SrcReg = Reg + VA.getValNo();
1740
1741 EVT RVEVT = TLI.getValueType(DL, RV->getType());
1742 if (!RVEVT.isSimple())
1743 return false;
1744 MVT RVVT = RVEVT.getSimpleVT();
1745 MVT DestVT = VA.getLocVT();
1746
1747 if (RVVT != DestVT && RVVT != MVT::i8 &&
1748 RVVT != MVT::i16 && RVVT != MVT::i32)
1749 return false;
1750
1751 if (RVVT != DestVT) {
1752 switch (VA.getLocInfo()) {
1753 default:
1754 llvm_unreachable("Unknown loc info!");
1755 case CCValAssign::Full:
1756 llvm_unreachable("Full value assign but types don't match?");
1757 case CCValAssign::AExt:
1758 case CCValAssign::ZExt: {
1759 const TargetRegisterClass *RC =
1760 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1761 Register TmpReg = createResultReg(RC);
1762 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1763 return false;
1764 SrcReg = TmpReg;
1765 break;
1766 }
1767 case CCValAssign::SExt: {
1768 const TargetRegisterClass *RC =
1769 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1770 Register TmpReg = createResultReg(RC);
1771 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1772 return false;
1773 SrcReg = TmpReg;
1774 break;
1775 }
1776 }
1777 }
1778
1779 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1780 TII.get(TargetOpcode::COPY), RetRegs[i])
1781 .addReg(SrcReg);
1782 }
1783 }
1784 }
1785
1786 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1787 TII.get(PPC::BLR8));
1788
1789 for (Register Reg : RetRegs)
1791
1792 return true;
1793}
1794
1795// Attempt to emit an integer extend of SrcReg into DestReg. Both
1796// signed and zero extensions are supported. Return false if we
1797// can't handle it.
1798bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1799 Register DestReg, bool IsZExt) {
1800 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1801 return false;
1802 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1803 return false;
1804
1805 // Signed extensions use EXTSB, EXTSH, EXTSW.
1806 if (!IsZExt) {
1807 unsigned Opc;
1808 if (SrcVT == MVT::i8)
1809 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1810 else if (SrcVT == MVT::i16)
1811 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1812 else {
1813 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1814 Opc = PPC::EXTSW_32_64;
1815 }
1816 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1817 .addReg(SrcReg);
1818
1819 // Unsigned 32-bit extensions use RLWINM.
1820 } else if (DestVT == MVT::i32) {
1821 unsigned MB;
1822 if (SrcVT == MVT::i8)
1823 MB = 24;
1824 else {
1825 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1826 MB = 16;
1827 }
1828 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLWINM),
1829 DestReg)
1830 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1831
1832 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1833 } else {
1834 unsigned MB;
1835 if (SrcVT == MVT::i8)
1836 MB = 56;
1837 else if (SrcVT == MVT::i16)
1838 MB = 48;
1839 else
1840 MB = 32;
1841 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1842 TII.get(PPC::RLDICL_32_64), DestReg)
1843 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1844 }
1845
1846 return true;
1847}
1848
1849// Attempt to fast-select an indirect branch instruction.
1850bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1851 Register AddrReg = getRegForValue(I->getOperand(0));
1852 if (!AddrReg)
1853 return false;
1854
1855 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::MTCTR8))
1856 .addReg(AddrReg);
1857 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCTR8));
1858
1859 const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1860 for (const BasicBlock *SuccBB : IB->successors())
1861 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(SuccBB));
1862
1863 return true;
1864}
1865
1866// Attempt to fast-select an integer truncate instruction.
1867bool PPCFastISel::SelectTrunc(const Instruction *I) {
1868 Value *Src = I->getOperand(0);
1869 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1870 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1871
1872 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1873 return false;
1874
1875 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1876 return false;
1877
1878 Register SrcReg = getRegForValue(Src);
1879 if (!SrcReg)
1880 return false;
1881
1882 // The only interesting case is when we need to switch register classes.
1883 if (SrcVT == MVT::i64)
1884 SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
1885
1886 updateValueMap(I, SrcReg);
1887 return true;
1888}
1889
1890// Attempt to fast-select an integer extend instruction.
1891bool PPCFastISel::SelectIntExt(const Instruction *I) {
1892 Type *DestTy = I->getType();
1893 Value *Src = I->getOperand(0);
1894 Type *SrcTy = Src->getType();
1895
1896 bool IsZExt = isa<ZExtInst>(I);
1897 Register SrcReg = getRegForValue(Src);
1898 if (!SrcReg) return false;
1899
1900 EVT SrcEVT, DestEVT;
1901 SrcEVT = TLI.getValueType(DL, SrcTy, true);
1902 DestEVT = TLI.getValueType(DL, DestTy, true);
1903 if (!SrcEVT.isSimple())
1904 return false;
1905 if (!DestEVT.isSimple())
1906 return false;
1907
1908 MVT SrcVT = SrcEVT.getSimpleVT();
1909 MVT DestVT = DestEVT.getSimpleVT();
1910
1911 // If we know the register class needed for the result of this
1912 // instruction, use it. Otherwise pick the register class of the
1913 // correct size that does not contain X0/R0, since we don't know
1914 // whether downstream uses permit that assignment.
1915 Register AssignedReg = FuncInfo.ValueMap[I];
1916 const TargetRegisterClass *RC =
1917 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1918 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1919 &PPC::GPRC_and_GPRC_NOR0RegClass));
1920 Register ResultReg = createResultReg(RC);
1921
1922 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1923 return false;
1924
1925 updateValueMap(I, ResultReg);
1926 return true;
1927}
1928
1929// Attempt to fast-select an instruction that wasn't handled by
1930// the table-generated machinery.
1931bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1932
1933 switch (I->getOpcode()) {
1934 case Instruction::Load:
1935 return SelectLoad(I);
1936 case Instruction::Store:
1937 return SelectStore(I);
1938 case Instruction::Br:
1939 return SelectBranch(I);
1940 case Instruction::IndirectBr:
1941 return SelectIndirectBr(I);
1942 case Instruction::FPExt:
1943 return SelectFPExt(I);
1944 case Instruction::FPTrunc:
1945 return SelectFPTrunc(I);
1946 case Instruction::SIToFP:
1947 return SelectIToFP(I, /*IsSigned*/ true);
1948 case Instruction::UIToFP:
1949 return SelectIToFP(I, /*IsSigned*/ false);
1950 case Instruction::FPToSI:
1951 return SelectFPToI(I, /*IsSigned*/ true);
1952 case Instruction::FPToUI:
1953 return SelectFPToI(I, /*IsSigned*/ false);
1954 case Instruction::Add:
1955 return SelectBinaryIntOp(I, ISD::ADD);
1956 case Instruction::Or:
1957 return SelectBinaryIntOp(I, ISD::OR);
1958 case Instruction::Sub:
1959 return SelectBinaryIntOp(I, ISD::SUB);
1960 case Instruction::Ret:
1961 return SelectRet(I);
1962 case Instruction::Trunc:
1963 return SelectTrunc(I);
1964 case Instruction::ZExt:
1965 case Instruction::SExt:
1966 return SelectIntExt(I);
1967 // Here add other flavors of Instruction::XXX that automated
1968 // cases don't catch. For example, switches are terminators
1969 // that aren't yet handled.
1970 default:
1971 break;
1972 }
1973 return false;
1974}
1975
1976// Materialize a floating-point constant into a register, and return
1977// the register number (or zero if we failed to handle it).
1978Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1979 // If this is a PC-Rel function, let SDISel handle constant pool.
1980 if (Subtarget->isUsingPCRelativeCalls())
1981 return Register();
1982
1983 // No plans to handle long double here.
1984 if (VT != MVT::f32 && VT != MVT::f64)
1985 return Register();
1986
1987 // All FP constants are loaded from the constant pool.
1988 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
1989 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
1990 const bool HasSPE = Subtarget->hasSPE();
1991 const TargetRegisterClass *RC;
1992 if (HasSPE)
1993 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1994 else
1995 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1996
1997 Register DestReg = createResultReg(RC);
1998 CodeModel::Model CModel = TM.getCodeModel();
1999
2000 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2002 MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
2003
2004 unsigned Opc;
2005
2006 if (HasSPE)
2007 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2008 else
2009 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2010
2011 Register TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2012
2013 PPCFuncInfo->setUsesTOCBasePtr();
2014 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2015 if (CModel == CodeModel::Small) {
2016 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocCPT),
2017 TmpReg)
2018 .addConstantPoolIndex(Idx).addReg(PPC::X2);
2019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2020 .addImm(0).addReg(TmpReg).addMemOperand(MMO);
2021 } else {
2022 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2023 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2024 TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
2025 // But for large code model, we must generate a LDtocL followed
2026 // by the LF[SD].
2027 if (CModel == CodeModel::Large) {
2028 Register TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2030 TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
2031 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2032 .addImm(0)
2033 .addReg(TmpReg2);
2034 } else
2035 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2037 .addReg(TmpReg)
2038 .addMemOperand(MMO);
2039 }
2040
2041 return DestReg;
2042}
2043
2044// Materialize the address of a global value into a register, and return
2045// the register number (or zero if we failed to handle it).
2046Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2047 // If this is a PC-Rel function, let SDISel handle GV materialization.
2048 if (Subtarget->isUsingPCRelativeCalls())
2049 return Register();
2050
2051 assert(VT == MVT::i64 && "Non-address!");
2052 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2053 Register DestReg = createResultReg(RC);
2054
2055 // Global values may be plain old object addresses, TLS object
2056 // addresses, constant pool entries, or jump tables. How we generate
2057 // code for these may depend on small, medium, or large code model.
2058 CodeModel::Model CModel = TM.getCodeModel();
2059
2060 // FIXME: Jump tables are not yet required because fast-isel doesn't
2061 // handle switches; if that changes, we need them as well. For now,
2062 // what follows assumes everything's a generic (or TLS) global address.
2063
2064 // FIXME: We don't yet handle the complexity of TLS.
2065 if (GV->isThreadLocal())
2066 return Register();
2067
2068 PPCFuncInfo->setUsesTOCBasePtr();
2069 bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2070 isa<GlobalVariable>(GV) &&
2071 cast<GlobalVariable>(GV)->hasAttribute("toc-data");
2072
2073 // For small code model, generate a simple TOC load.
2074 if (CModel == CodeModel::Small) {
2075 auto MIB = BuildMI(
2076 *FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2077 IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc), DestReg);
2078 if (IsAIXTocData)
2079 MIB.addReg(PPC::X2).addGlobalAddress(GV);
2080 else
2081 MIB.addGlobalAddress(GV).addReg(PPC::X2);
2082 } else {
2083 // If the address is an externally defined symbol, a symbol with common
2084 // or externally available linkage, a non-local function address, or a
2085 // jump table address (not yet needed), or if we are generating code
2086 // for large code model, we generate:
2087 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2088 // Otherwise we generate:
2089 // ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2090 // Either way, start with the ADDIStocHA8:
2091 Register HighPartReg = createResultReg(RC);
2092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2093 HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2094
2095 if (Subtarget->isGVIndirectSymbol(GV)) {
2096 assert(!IsAIXTocData && "TOC data should always be direct.");
2097 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2098 DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2099 } else {
2100 // Otherwise generate the ADDItocL8.
2101 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8),
2102 DestReg)
2103 .addReg(HighPartReg)
2104 .addGlobalAddress(GV);
2105 }
2106 }
2107
2108 return DestReg;
2109}
2110
2111// Materialize a 32-bit integer constant into a register, and return
2112// the register number (or zero if we failed to handle it).
2113Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2114 const TargetRegisterClass *RC) {
2115 unsigned Lo = Imm & 0xFFFF;
2116 unsigned Hi = (Imm >> 16) & 0xFFFF;
2117
2118 Register ResultReg = createResultReg(RC);
2119 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2120
2121 if (isInt<16>(Imm))
2122 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2123 TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2124 .addImm(Imm);
2125 else if (Lo) {
2126 // Both Lo and Hi have nonzero bits.
2127 Register TmpReg = createResultReg(RC);
2128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2129 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2130 .addImm(Hi);
2131 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2132 TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2133 .addReg(TmpReg).addImm(Lo);
2134 } else
2135 // Just Hi bits.
2136 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2137 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2138 .addImm(Hi);
2139
2140 return ResultReg;
2141}
2142
2143// Materialize a 64-bit integer constant into a register, and return
2144// the register number (or zero if we failed to handle it).
2145Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2146 const TargetRegisterClass *RC) {
2147 unsigned Remainder = 0;
2148 unsigned Shift = 0;
2149
2150 // If the value doesn't fit in 32 bits, see if we can shift it
2151 // so that it fits in 32 bits.
2152 if (!isInt<32>(Imm)) {
2153 Shift = llvm::countr_zero<uint64_t>(Imm);
2154 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2155
2156 if (isInt<32>(ImmSh))
2157 Imm = ImmSh;
2158 else {
2159 Remainder = Imm;
2160 Shift = 32;
2161 Imm >>= 32;
2162 }
2163 }
2164
2165 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2166 // (if not shifted).
2167 Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2168 if (!Shift)
2169 return TmpReg1;
2170
2171 // If upper 32 bits were not zero, we've built them and need to shift
2172 // them into place.
2173 Register TmpReg2;
2174 if (Imm) {
2175 TmpReg2 = createResultReg(RC);
2176 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLDICR),
2177 TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2178 } else
2179 TmpReg2 = TmpReg1;
2180
2181 Register TmpReg3;
2182 unsigned Hi, Lo;
2183 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2184 TmpReg3 = createResultReg(RC);
2185 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORIS8),
2186 TmpReg3).addReg(TmpReg2).addImm(Hi);
2187 } else
2188 TmpReg3 = TmpReg2;
2189
2190 if ((Lo = Remainder & 0xFFFF)) {
2191 Register ResultReg = createResultReg(RC);
2192 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORI8),
2193 ResultReg).addReg(TmpReg3).addImm(Lo);
2194 return ResultReg;
2195 }
2196
2197 return TmpReg3;
2198}
2199
2200// Materialize an integer constant into a register, and return
2201// the register number (or zero if we failed to handle it).
2202Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2203 bool UseSExt) {
2204 // If we're using CR bit registers for i1 values, handle that as a special
2205 // case first.
2206 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2207 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2208 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2209 TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2210 return ImmReg;
2211 }
2212
2213 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2214 VT != MVT::i1)
2215 return Register();
2216
2217 const TargetRegisterClass *RC =
2218 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2219 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2220
2221 // If the constant is in range, use a load-immediate.
2222 // Since LI will sign extend the constant we need to make sure that for
2223 // our zeroext constants that the sign extended constant fits into 16-bits -
2224 // a range of 0..0x7fff.
2225 if (isInt<16>(Imm)) {
2226 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2227 Register ImmReg = createResultReg(RC);
2228 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ImmReg)
2229 .addImm(Imm);
2230 return ImmReg;
2231 }
2232
2233 // Construct the constant piecewise.
2234 if (VT == MVT::i64)
2235 return PPCMaterialize64BitInt(Imm, RC);
2236 else if (VT == MVT::i32)
2237 return PPCMaterialize32BitInt(Imm, RC);
2238
2239 return Register();
2240}
2241
2242// Materialize a constant into a register, and return the register
2243// number (or zero if we failed to handle it).
2244Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2245 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2246
2247 // Only handle simple types.
2248 if (!CEVT.isSimple())
2249 return Register();
2250 MVT VT = CEVT.getSimpleVT();
2251
2252 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2253 return PPCMaterializeFP(CFP, VT);
2254 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2255 return PPCMaterializeGV(GV, VT);
2256 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2257 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2258 // assumes that constant PHI operands will be zero extended, and failure to
2259 // match that assumption will cause problems if we sign extend here but
2260 // some user of a PHI is in a block for which we fall back to full SDAG
2261 // instruction selection.
2262 return PPCMaterializeInt(CI, VT, false);
2263
2264 return Register();
2265}
2266
2267// Materialize the address created by an alloca into a register, and
2268// return the register number (or zero if we failed to handle it).
2269Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2270 DenseMap<const AllocaInst *, int>::iterator SI =
2271 FuncInfo.StaticAllocaMap.find(AI);
2272
2273 // Don't handle dynamic allocas.
2274 if (SI == FuncInfo.StaticAllocaMap.end())
2275 return Register();
2276
2277 MVT VT;
2278 if (!isLoadTypeLegal(AI->getType(), VT))
2279 return Register();
2280
2281 if (SI != FuncInfo.StaticAllocaMap.end()) {
2282 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2283 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
2284 ResultReg).addFrameIndex(SI->second).addImm(0);
2285 return ResultReg;
2286 }
2287
2288 return Register();
2289}
2290
2291// Fold loads into extends when possible.
2292// FIXME: We can have multiple redundant extend/trunc instructions
2293// following a load. The folding only picks up one. Extend this
2294// to check subsequent instructions for the same pattern and remove
2295// them. Thus ResultReg should be the def reg for the last redundant
2296// instruction in a chain, and all intervening instructions can be
2297// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2298// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2299bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2300 const LoadInst *LI) {
2301 // Verify we have a legal type before going any further.
2302 MVT VT;
2303 if (!isLoadTypeLegal(LI->getType(), VT))
2304 return false;
2305
2306 // Combine load followed by zero- or sign-extend.
2307 bool IsZExt = false;
2308 switch(MI->getOpcode()) {
2309 default:
2310 return false;
2311
2312 case PPC::RLDICL:
2313 case PPC::RLDICL_32_64: {
2314 IsZExt = true;
2315 unsigned MB = MI->getOperand(3).getImm();
2316 if ((VT == MVT::i8 && MB <= 56) ||
2317 (VT == MVT::i16 && MB <= 48) ||
2318 (VT == MVT::i32 && MB <= 32))
2319 break;
2320 return false;
2321 }
2322
2323 case PPC::RLWINM:
2324 case PPC::RLWINM8: {
2325 IsZExt = true;
2326 unsigned MB = MI->getOperand(3).getImm();
2327 if ((VT == MVT::i8 && MB <= 24) ||
2328 (VT == MVT::i16 && MB <= 16))
2329 break;
2330 return false;
2331 }
2332
2333 case PPC::EXTSB:
2334 case PPC::EXTSB8:
2335 case PPC::EXTSB8_32_64:
2336 /* There is no sign-extending load-byte instruction. */
2337 return false;
2338
2339 case PPC::EXTSH:
2340 case PPC::EXTSH8:
2341 case PPC::EXTSH8_32_64: {
2342 if (VT != MVT::i16 && VT != MVT::i8)
2343 return false;
2344 break;
2345 }
2346
2347 case PPC::EXTSW:
2348 case PPC::EXTSW_32:
2349 case PPC::EXTSW_32_64: {
2350 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2351 return false;
2352 break;
2353 }
2354 }
2355
2356 // See if we can handle this address.
2357 Address Addr;
2358 if (!PPCComputeAddress(LI->getOperand(0), Addr))
2359 return false;
2360
2361 Register ResultReg = MI->getOperand(0).getReg();
2362
2363 if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
2364 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2365 return false;
2366
2368 removeDeadCode(I, std::next(I));
2369 return true;
2370}
2371
2372// Attempt to lower call arguments in a faster way than done by
2373// the selection DAG code.
2374bool PPCFastISel::fastLowerArguments() {
2375 // Defer to normal argument lowering for now. It's reasonably
2376 // efficient. Consider doing something like ARM to handle the
2377 // case where all args fit in registers, no varargs, no float
2378 // or vector args.
2379 return false;
2380}
2381
2382// Handle materializing integer constants into a register. This is not
2383// automatically generated for PowerPC, so must be explicitly created here.
2384Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2385
2386 if (Opc != ISD::Constant)
2387 return Register();
2388
2389 // If we're using CR bit registers for i1 values, handle that as a special
2390 // case first.
2391 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2392 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2393 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2394 TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2395 return ImmReg;
2396 }
2397
2398 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2399 VT != MVT::i1)
2400 return Register();
2401
2402 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2403 &PPC::GPRCRegClass);
2404 if (VT == MVT::i64)
2405 return PPCMaterialize64BitInt(Imm, RC);
2406 else
2407 return PPCMaterialize32BitInt(Imm, RC);
2408}
2409
2410// Override for ADDI and ADDI8 to set the correct register class
2411// on RHS operand 0. The automatic infrastructure naively assumes
2412// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2413// for these cases. At the moment, none of the other automatically
2414// generated RI instructions require special treatment. However, once
2415// SelectSelect is implemented, "isel" requires similar handling.
2416//
2417// Also be conservative about the output register class. Avoid
2418// assigning R0 or X0 to the output register for GPRC and G8RC
2419// register classes, as any such result could be used in ADDI, etc.,
2420// where those regs have another meaning.
2421Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2422 const TargetRegisterClass *RC,
2423 Register Op0, uint64_t Imm) {
2424 if (MachineInstOpcode == PPC::ADDI)
2425 MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2426 else if (MachineInstOpcode == PPC::ADDI8)
2427 MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2428
2429 const TargetRegisterClass *UseRC =
2430 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2431 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2432
2433 return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Imm);
2434}
2435
2436// Override for instructions with one register operand to avoid use of
2437// R0/X0. The automatic infrastructure isn't aware of the context so
2438// we must be conservative.
2439Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2440 const TargetRegisterClass *RC,
2441 Register Op0) {
2442 const TargetRegisterClass *UseRC =
2443 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2444 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2445
2446 return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0);
2447}
2448
2449// Override for instructions with two register operands to avoid use
2450// of R0/X0. The automatic infrastructure isn't aware of the context
2451// so we must be conservative.
2452Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2453 const TargetRegisterClass *RC,
2454 Register Op0, Register Op1) {
2455 const TargetRegisterClass *UseRC =
2456 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2457 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2458
2459 return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op1);
2460}
2461
2462namespace llvm {
2463 // Create the fast instruction selector for PowerPC64 ELF.
2465 const TargetLibraryInfo *LibInfo) {
2466 // Only available on 64-bit for now.
2467 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2468 if (Subtarget.isPPC64())
2469 return new PPCFastISel(FuncInfo, LibInfo);
2470 return nullptr;
2471 }
2472}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
static std::optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
static constexpr MCPhysReg FPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file describes how to lower LLVM code to machine code.
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
an instruction to allocate memory on the stack
PointerType * getType() const
Overload to return most specific pointer type.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:681
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:695
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:694
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:691
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:680
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
This is an important base class in LLVM.
Definition Constant.h:43
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
Register fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Register fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, Register Op1)
Emit a MachineInstr with two register operands and a result register in the given register class.
Register fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0)
Emit a MachineInstr with one register operand and a result register in the given register class.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
MachineBasicBlock::iterator InsertPt
MBB - The current insert position inside the current block.
MachineBasicBlock * MBB
MBB - The current block.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Machine Value Type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MachineInstrBundleIterator< MachineInstr > iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
const PPCTargetLowering * getTargetLowering() const override
const PPCInstrInfo * getInstrInfo() const override
bool isLittleEndian() const
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:652
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned getID() const
Return the register class ID number.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
bool isOSAIX() const
Tests whether the OS is AIX.
Definition Triple.h:760
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
const Use * const_op_iterator
Definition User.h:280
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ MO_TOC_LO
Definition PPC.h:185
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
gep_type_iterator gep_type_begin(const User *GEP)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.