LLVM  4.0.0
PPCFastISel.cpp
Go to the documentation of this file.
1 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the PowerPC-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // PPCGenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "PPC.h"
18 #include "PPCCallingConv.h"
19 #include "PPCCCState.h"
20 #include "PPCISelLowering.h"
21 #include "PPCMachineFunctionInfo.h"
22 #include "PPCSubtarget.h"
23 #include "PPCTargetMachine.h"
24 #include "llvm/ADT/Optional.h"
26 #include "llvm/CodeGen/FastISel.h"
32 #include "llvm/IR/CallingConv.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/Support/Debug.h"
41 
42 //===----------------------------------------------------------------------===//
43 //
44 // TBD:
45 // fastLowerArguments: Handle simple cases.
46 // PPCMaterializeGV: Handle TLS.
47 // SelectCall: Handle function pointers.
48 // SelectCall: Handle multi-register return values.
49 // SelectCall: Optimize away nops for local calls.
50 // processCallArgs: Handle bit-converted arguments.
51 // finishCall: Handle multi-register return values.
52 // PPCComputeAddress: Handle parameter references as FrameIndex's.
53 // PPCEmitCmp: Handle immediate as operand 1.
54 // SelectCall: Handle small byval arguments.
55 // SelectIntrinsicCall: Implement.
56 // SelectSelect: Implement.
57 // Consider factoring isTypeLegal into the base class.
58 // Implement switches and jump tables.
59 //
60 //===----------------------------------------------------------------------===//
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "ppcfastisel"
64 
65 namespace {
66 
67 typedef struct Address {
68  enum {
69  RegBase,
70  FrameIndexBase
71  } BaseType;
72 
73  union {
74  unsigned Reg;
75  int FI;
76  } Base;
77 
78  long Offset;
79 
80  // Innocuous defaults for our address.
81  Address()
82  : BaseType(RegBase), Offset(0) {
83  Base.Reg = 0;
84  }
85 } Address;
86 
87 class PPCFastISel final : public FastISel {
88 
89  const TargetMachine &TM;
90  const PPCSubtarget *PPCSubTarget;
91  PPCFunctionInfo *PPCFuncInfo;
92  const TargetInstrInfo &TII;
93  const TargetLowering &TLI;
95 
96  public:
97  explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
98  const TargetLibraryInfo *LibInfo)
99  : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
100  PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
101  PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
102  TII(*PPCSubTarget->getInstrInfo()),
103  TLI(*PPCSubTarget->getTargetLowering()),
104  Context(&FuncInfo.Fn->getContext()) {}
105 
106  // Backend specific FastISel code.
107  private:
108  bool fastSelectInstruction(const Instruction *I) override;
109  unsigned fastMaterializeConstant(const Constant *C) override;
110  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
111  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
112  const LoadInst *LI) override;
113  bool fastLowerArguments() override;
114  unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
115  unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
116  const TargetRegisterClass *RC,
117  unsigned Op0, bool Op0IsKill,
118  uint64_t Imm);
119  unsigned fastEmitInst_r(unsigned MachineInstOpcode,
120  const TargetRegisterClass *RC,
121  unsigned Op0, bool Op0IsKill);
122  unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
123  const TargetRegisterClass *RC,
124  unsigned Op0, bool Op0IsKill,
125  unsigned Op1, bool Op1IsKill);
126 
127  bool fastLowerCall(CallLoweringInfo &CLI) override;
128 
129  // Instruction selection routines.
130  private:
131  bool SelectLoad(const Instruction *I);
132  bool SelectStore(const Instruction *I);
133  bool SelectBranch(const Instruction *I);
134  bool SelectIndirectBr(const Instruction *I);
135  bool SelectFPExt(const Instruction *I);
136  bool SelectFPTrunc(const Instruction *I);
137  bool SelectIToFP(const Instruction *I, bool IsSigned);
138  bool SelectFPToI(const Instruction *I, bool IsSigned);
139  bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
140  bool SelectRet(const Instruction *I);
141  bool SelectTrunc(const Instruction *I);
142  bool SelectIntExt(const Instruction *I);
143 
144  // Utility routines.
145  private:
146  bool isTypeLegal(Type *Ty, MVT &VT);
147  bool isLoadTypeLegal(Type *Ty, MVT &VT);
148  bool isValueAvailable(const Value *V) const;
149  bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
150  return RC->getID() == PPC::VSFRCRegClassID;
151  }
152  bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
153  return RC->getID() == PPC::VSSRCRegClassID;
154  }
155  bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
156  bool isZExt, unsigned DestReg);
157  bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
158  const TargetRegisterClass *RC, bool IsZExt = true,
159  unsigned FP64LoadOpc = PPC::LFD);
160  bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
161  bool PPCComputeAddress(const Value *Obj, Address &Addr);
162  void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
163  unsigned &IndexReg);
164  bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
165  unsigned DestReg, bool IsZExt);
166  unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
167  unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
168  unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
169  bool UseSExt = true);
170  unsigned PPCMaterialize32BitInt(int64_t Imm,
171  const TargetRegisterClass *RC);
172  unsigned PPCMaterialize64BitInt(int64_t Imm,
173  const TargetRegisterClass *RC);
174  unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
175  unsigned SrcReg, bool IsSigned);
176  unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
177 
178  // Call handling routines.
179  private:
180  bool processCallArgs(SmallVectorImpl<Value*> &Args,
181  SmallVectorImpl<unsigned> &ArgRegs,
182  SmallVectorImpl<MVT> &ArgVTs,
184  SmallVectorImpl<unsigned> &RegArgs,
185  CallingConv::ID CC,
186  unsigned &NumBytes,
187  bool IsVarArg);
188  bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
189  LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag);
190 
191  private:
192  #include "PPCGenFastISel.inc"
193 
194 };
195 
196 } // end anonymous namespace
197 
198 #include "PPCGenCallingConv.inc"
199 
200 // Function whose sole purpose is to kill compiler warnings
201 // stemming from unused functions included from PPCGenCallingConv.inc.
202 CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
203  if (Flag == 1)
204  return CC_PPC32_SVR4;
205  else if (Flag == 2)
206  return CC_PPC32_SVR4_ByVal;
207  else if (Flag == 3)
208  return CC_PPC32_SVR4_VarArg;
209  else
210  return RetCC_PPC;
211 }
212 
214  switch (Pred) {
215  // These are not representable with any single compare.
216  case CmpInst::FCMP_FALSE:
217  case CmpInst::FCMP_TRUE:
218  // Major concern about the following 6 cases is NaN result. The comparison
219  // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
220  // only one of which will be set. The result is generated by fcmpu
221  // instruction. However, bc instruction only inspects one of the first 3
222  // bits, so when un is set, bc instruction may jump to to an undesired
223  // place.
224  //
225  // More specifically, if we expect an unordered comparison and un is set, we
226  // expect to always go to true branch; in such case UEQ, UGT and ULT still
227  // give false, which are undesired; but UNE, UGE, ULE happen to give true,
228  // since they are tested by inspecting !eq, !lt, !gt, respectively.
229  //
230  // Similarly, for ordered comparison, when un is set, we always expect the
231  // result to be false. In such case OGT, OLT and OEQ is good, since they are
232  // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
233  // and ONE are tested through !lt, !gt and !eq, and these are true.
234  case CmpInst::FCMP_UEQ:
235  case CmpInst::FCMP_UGT:
236  case CmpInst::FCMP_ULT:
237  case CmpInst::FCMP_OGE:
238  case CmpInst::FCMP_OLE:
239  case CmpInst::FCMP_ONE:
240  default:
241  return Optional<PPC::Predicate>();
242 
243  case CmpInst::FCMP_OEQ:
244  case CmpInst::ICMP_EQ:
245  return PPC::PRED_EQ;
246 
247  case CmpInst::FCMP_OGT:
248  case CmpInst::ICMP_UGT:
249  case CmpInst::ICMP_SGT:
250  return PPC::PRED_GT;
251 
252  case CmpInst::FCMP_UGE:
253  case CmpInst::ICMP_UGE:
254  case CmpInst::ICMP_SGE:
255  return PPC::PRED_GE;
256 
257  case CmpInst::FCMP_OLT:
258  case CmpInst::ICMP_ULT:
259  case CmpInst::ICMP_SLT:
260  return PPC::PRED_LT;
261 
262  case CmpInst::FCMP_ULE:
263  case CmpInst::ICMP_ULE:
264  case CmpInst::ICMP_SLE:
265  return PPC::PRED_LE;
266 
267  case CmpInst::FCMP_UNE:
268  case CmpInst::ICMP_NE:
269  return PPC::PRED_NE;
270 
271  case CmpInst::FCMP_ORD:
272  return PPC::PRED_NU;
273 
274  case CmpInst::FCMP_UNO:
275  return PPC::PRED_UN;
276  }
277 }
278 
279 // Determine whether the type Ty is simple enough to be handled by
280 // fast-isel, and return its equivalent machine type in VT.
281 // FIXME: Copied directly from ARM -- factor into base class?
282 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
283  EVT Evt = TLI.getValueType(DL, Ty, true);
284 
285  // Only handle simple types.
286  if (Evt == MVT::Other || !Evt.isSimple()) return false;
287  VT = Evt.getSimpleVT();
288 
289  // Handle all legal types, i.e. a register that will directly hold this
290  // value.
291  return TLI.isTypeLegal(VT);
292 }
293 
294 // Determine whether the type Ty is simple enough to be handled by
295 // fast-isel as a load target, and return its equivalent machine type in VT.
296 bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
297  if (isTypeLegal(Ty, VT)) return true;
298 
299  // If this is a type than can be sign or zero-extended to a basic operation
300  // go ahead and accept it now.
301  if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
302  return true;
303  }
304 
305  return false;
306 }
307 
308 bool PPCFastISel::isValueAvailable(const Value *V) const {
309  if (!isa<Instruction>(V))
310  return true;
311 
312  const auto *I = cast<Instruction>(V);
313  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
314 }
315 
316 // Given a value Obj, create an Address object Addr that represents its
317 // address. Return false if we can't handle it.
318 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
319  const User *U = nullptr;
320  unsigned Opcode = Instruction::UserOp1;
321  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
322  // Don't walk into other basic blocks unless the object is an alloca from
323  // another block, otherwise it may not have a virtual register assigned.
324  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
325  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
326  Opcode = I->getOpcode();
327  U = I;
328  }
329  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
330  Opcode = C->getOpcode();
331  U = C;
332  }
333 
334  switch (Opcode) {
335  default:
336  break;
337  case Instruction::BitCast:
338  // Look through bitcasts.
339  return PPCComputeAddress(U->getOperand(0), Addr);
340  case Instruction::IntToPtr:
341  // Look past no-op inttoptrs.
342  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
343  TLI.getPointerTy(DL))
344  return PPCComputeAddress(U->getOperand(0), Addr);
345  break;
346  case Instruction::PtrToInt:
347  // Look past no-op ptrtoints.
348  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
349  return PPCComputeAddress(U->getOperand(0), Addr);
350  break;
351  case Instruction::GetElementPtr: {
352  Address SavedAddr = Addr;
353  long TmpOffset = Addr.Offset;
354 
355  // Iterate through the GEP folding the constants into offsets where
356  // we can.
358  for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
359  II != IE; ++II, ++GTI) {
360  const Value *Op = *II;
361  if (StructType *STy = GTI.getStructTypeOrNull()) {
362  const StructLayout *SL = DL.getStructLayout(STy);
363  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
364  TmpOffset += SL->getElementOffset(Idx);
365  } else {
366  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
367  for (;;) {
368  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
369  // Constant-offset addressing.
370  TmpOffset += CI->getSExtValue() * S;
371  break;
372  }
373  if (canFoldAddIntoGEP(U, Op)) {
374  // A compatible add with a constant operand. Fold the constant.
375  ConstantInt *CI =
376  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
377  TmpOffset += CI->getSExtValue() * S;
378  // Iterate on the other operand.
379  Op = cast<AddOperator>(Op)->getOperand(0);
380  continue;
381  }
382  // Unsupported
383  goto unsupported_gep;
384  }
385  }
386  }
387 
388  // Try to grab the base operand now.
389  Addr.Offset = TmpOffset;
390  if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
391 
392  // We failed, restore everything and try the other options.
393  Addr = SavedAddr;
394 
395  unsupported_gep:
396  break;
397  }
398  case Instruction::Alloca: {
399  const AllocaInst *AI = cast<AllocaInst>(Obj);
401  FuncInfo.StaticAllocaMap.find(AI);
402  if (SI != FuncInfo.StaticAllocaMap.end()) {
403  Addr.BaseType = Address::FrameIndexBase;
404  Addr.Base.FI = SI->second;
405  return true;
406  }
407  break;
408  }
409  }
410 
411  // FIXME: References to parameters fall through to the behavior
412  // below. They should be able to reference a frame index since
413  // they are stored to the stack, so we can get "ld rx, offset(r1)"
414  // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
415  // just contain the parameter. Try to handle this with a FI.
416 
417  // Try to get this in a register if nothing else has worked.
418  if (Addr.Base.Reg == 0)
419  Addr.Base.Reg = getRegForValue(Obj);
420 
421  // Prevent assignment of base register to X0, which is inappropriate
422  // for loads and stores alike.
423  if (Addr.Base.Reg != 0)
424  MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
425 
426  return Addr.Base.Reg != 0;
427 }
428 
429 // Fix up some addresses that can't be used directly. For example, if
430 // an offset won't fit in an instruction field, we may need to move it
431 // into an index register.
432 void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
433  unsigned &IndexReg) {
434 
435  // Check whether the offset fits in the instruction field.
436  if (!isInt<16>(Addr.Offset))
437  UseOffset = false;
438 
439  // If this is a stack pointer and the offset needs to be simplified then
440  // put the alloca address into a register, set the base type back to
441  // register and continue. This should almost never happen.
442  if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
443  unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
444  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
445  ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
446  Addr.Base.Reg = ResultReg;
447  Addr.BaseType = Address::RegBase;
448  }
449 
450  if (!UseOffset) {
451  IntegerType *OffsetTy = Type::getInt64Ty(*Context);
452  const ConstantInt *Offset =
453  ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
454  IndexReg = PPCMaterializeInt(Offset, MVT::i64);
455  assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
456  }
457 }
458 
459 // Emit a load instruction if possible, returning true if we succeeded,
460 // otherwise false. See commentary below for how the register class of
461 // the load is determined.
462 bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
463  const TargetRegisterClass *RC,
464  bool IsZExt, unsigned FP64LoadOpc) {
465  unsigned Opc;
466  bool UseOffset = true;
467 
468  // If ResultReg is given, it determines the register class of the load.
469  // Otherwise, RC is the register class to use. If the result of the
470  // load isn't anticipated in this block, both may be zero, in which
471  // case we must make a conservative guess. In particular, don't assign
472  // R0 or X0 to the result register, as the result may be used in a load,
473  // store, add-immediate, or isel that won't permit this. (Though
474  // perhaps the spill and reload of live-exit values would handle this?)
475  const TargetRegisterClass *UseRC =
476  (ResultReg ? MRI.getRegClass(ResultReg) :
477  (RC ? RC :
478  (VT == MVT::f64 ? &PPC::F8RCRegClass :
479  (VT == MVT::f32 ? &PPC::F4RCRegClass :
480  (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
481  &PPC::GPRC_and_GPRC_NOR0RegClass)))));
482 
483  bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
484 
485  switch (VT.SimpleTy) {
486  default: // e.g., vector types not handled
487  return false;
488  case MVT::i8:
489  Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
490  break;
491  case MVT::i16:
492  Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
493  : (Is32BitInt ? PPC::LHA : PPC::LHA8));
494  break;
495  case MVT::i32:
496  Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
497  : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
498  if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
499  UseOffset = false;
500  break;
501  case MVT::i64:
502  Opc = PPC::LD;
503  assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
504  "64-bit load with 32-bit target??");
505  UseOffset = ((Addr.Offset & 3) == 0);
506  break;
507  case MVT::f32:
508  Opc = PPC::LFS;
509  break;
510  case MVT::f64:
511  Opc = FP64LoadOpc;
512  break;
513  }
514 
515  // If necessary, materialize the offset into a register and use
516  // the indexed form. Also handle stack pointers with special needs.
517  unsigned IndexReg = 0;
518  PPCSimplifyAddress(Addr, UseOffset, IndexReg);
519 
520  // If this is a potential VSX load with an offset of 0, a VSX indexed load can
521  // be used.
522  bool IsVSSRC = isVSSRCRegClass(UseRC);
523  bool IsVSFRC = isVSFRCRegClass(UseRC);
524  bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
525  bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
526  if ((Is32VSXLoad || Is64VSXLoad) &&
527  (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
528  (Addr.Offset == 0)) {
529  UseOffset = false;
530  }
531 
532  if (ResultReg == 0)
533  ResultReg = createResultReg(UseRC);
534 
535  // Note: If we still have a frame index here, we know the offset is
536  // in range, as otherwise PPCSimplifyAddress would have converted it
537  // into a RegBase.
538  if (Addr.BaseType == Address::FrameIndexBase) {
539  // VSX only provides an indexed load.
540  if (Is32VSXLoad || Is64VSXLoad) return false;
541 
542  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
543  MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
544  Addr.Offset),
545  MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
546  MFI.getObjectAlignment(Addr.Base.FI));
547 
548  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
549  .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
550 
551  // Base reg with offset in range.
552  } else if (UseOffset) {
553  // VSX only provides an indexed load.
554  if (Is32VSXLoad || Is64VSXLoad) return false;
555 
556  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
557  .addImm(Addr.Offset).addReg(Addr.Base.Reg);
558 
559  // Indexed form.
560  } else {
561  // Get the RR opcode corresponding to the RI one. FIXME: It would be
562  // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
563  // is hard to get at.
564  switch (Opc) {
565  default: llvm_unreachable("Unexpected opcode!");
566  case PPC::LBZ: Opc = PPC::LBZX; break;
567  case PPC::LBZ8: Opc = PPC::LBZX8; break;
568  case PPC::LHZ: Opc = PPC::LHZX; break;
569  case PPC::LHZ8: Opc = PPC::LHZX8; break;
570  case PPC::LHA: Opc = PPC::LHAX; break;
571  case PPC::LHA8: Opc = PPC::LHAX8; break;
572  case PPC::LWZ: Opc = PPC::LWZX; break;
573  case PPC::LWZ8: Opc = PPC::LWZX8; break;
574  case PPC::LWA: Opc = PPC::LWAX; break;
575  case PPC::LWA_32: Opc = PPC::LWAX_32; break;
576  case PPC::LD: Opc = PPC::LDX; break;
577  case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
578  case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
579  }
580 
581  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
582  ResultReg);
583 
584  // If we have an index register defined we use it in the store inst,
585  // otherwise we use X0 as base as it makes the vector instructions to
586  // use zero in the computation of the effective address regardless the
587  // content of the register.
588  if (IndexReg)
589  MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
590  else
591  MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
592  }
593 
594  return true;
595 }
596 
597 // Attempt to fast-select a load instruction.
598 bool PPCFastISel::SelectLoad(const Instruction *I) {
599  // FIXME: No atomic loads are supported.
600  if (cast<LoadInst>(I)->isAtomic())
601  return false;
602 
603  // Verify we have a legal type before going any further.
604  MVT VT;
605  if (!isLoadTypeLegal(I->getType(), VT))
606  return false;
607 
608  // See if we can handle this address.
609  Address Addr;
610  if (!PPCComputeAddress(I->getOperand(0), Addr))
611  return false;
612 
613  // Look at the currently assigned register for this instruction
614  // to determine the required register class. This is necessary
615  // to constrain RA from using R0/X0 when this is not legal.
616  unsigned AssignedReg = FuncInfo.ValueMap[I];
617  const TargetRegisterClass *RC =
618  AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
619 
620  unsigned ResultReg = 0;
621  if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
622  return false;
623  updateValueMap(I, ResultReg);
624  return true;
625 }
626 
627 // Emit a store instruction to store SrcReg at Addr.
628 bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
629  assert(SrcReg && "Nothing to store!");
630  unsigned Opc;
631  bool UseOffset = true;
632 
633  const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
634  bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
635 
636  switch (VT.SimpleTy) {
637  default: // e.g., vector types not handled
638  return false;
639  case MVT::i8:
640  Opc = Is32BitInt ? PPC::STB : PPC::STB8;
641  break;
642  case MVT::i16:
643  Opc = Is32BitInt ? PPC::STH : PPC::STH8;
644  break;
645  case MVT::i32:
646  assert(Is32BitInt && "Not GPRC for i32??");
647  Opc = PPC::STW;
648  break;
649  case MVT::i64:
650  Opc = PPC::STD;
651  UseOffset = ((Addr.Offset & 3) == 0);
652  break;
653  case MVT::f32:
654  Opc = PPC::STFS;
655  break;
656  case MVT::f64:
657  Opc = PPC::STFD;
658  break;
659  }
660 
661  // If necessary, materialize the offset into a register and use
662  // the indexed form. Also handle stack pointers with special needs.
663  unsigned IndexReg = 0;
664  PPCSimplifyAddress(Addr, UseOffset, IndexReg);
665 
666  // If this is a potential VSX store with an offset of 0, a VSX indexed store
667  // can be used.
668  bool IsVSSRC = isVSSRCRegClass(RC);
669  bool IsVSFRC = isVSFRCRegClass(RC);
670  bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
671  bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
672  if ((Is32VSXStore || Is64VSXStore) &&
673  (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
674  (Addr.Offset == 0)) {
675  UseOffset = false;
676  }
677 
678  // Note: If we still have a frame index here, we know the offset is
679  // in range, as otherwise PPCSimplifyAddress would have converted it
680  // into a RegBase.
681  if (Addr.BaseType == Address::FrameIndexBase) {
682  // VSX only provides an indexed store.
683  if (Is32VSXStore || Is64VSXStore) return false;
684 
685  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
686  MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
687  Addr.Offset),
688  MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
689  MFI.getObjectAlignment(Addr.Base.FI));
690 
691  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
692  .addReg(SrcReg)
693  .addImm(Addr.Offset)
694  .addFrameIndex(Addr.Base.FI)
695  .addMemOperand(MMO);
696 
697  // Base reg with offset in range.
698  } else if (UseOffset) {
699  // VSX only provides an indexed store.
700  if (Is32VSXStore || Is64VSXStore)
701  return false;
702 
703  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
704  .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
705 
706  // Indexed form.
707  } else {
708  // Get the RR opcode corresponding to the RI one. FIXME: It would be
709  // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
710  // is hard to get at.
711  switch (Opc) {
712  default: llvm_unreachable("Unexpected opcode!");
713  case PPC::STB: Opc = PPC::STBX; break;
714  case PPC::STH : Opc = PPC::STHX; break;
715  case PPC::STW : Opc = PPC::STWX; break;
716  case PPC::STB8: Opc = PPC::STBX8; break;
717  case PPC::STH8: Opc = PPC::STHX8; break;
718  case PPC::STW8: Opc = PPC::STWX8; break;
719  case PPC::STD: Opc = PPC::STDX; break;
720  case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
721  case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
722  }
723 
724  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
725  .addReg(SrcReg);
726 
727  // If we have an index register defined we use it in the store inst,
728  // otherwise we use X0 as base as it makes the vector instructions to
729  // use zero in the computation of the effective address regardless the
730  // content of the register.
731  if (IndexReg)
732  MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
733  else
734  MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
735  }
736 
737  return true;
738 }
739 
740 // Attempt to fast-select a store instruction.
741 bool PPCFastISel::SelectStore(const Instruction *I) {
742  Value *Op0 = I->getOperand(0);
743  unsigned SrcReg = 0;
744 
745  // FIXME: No atomics loads are supported.
746  if (cast<StoreInst>(I)->isAtomic())
747  return false;
748 
749  // Verify we have a legal type before going any further.
750  MVT VT;
751  if (!isLoadTypeLegal(Op0->getType(), VT))
752  return false;
753 
754  // Get the value to be stored into a register.
755  SrcReg = getRegForValue(Op0);
756  if (SrcReg == 0)
757  return false;
758 
759  // See if we can handle this address.
760  Address Addr;
761  if (!PPCComputeAddress(I->getOperand(1), Addr))
762  return false;
763 
764  if (!PPCEmitStore(VT, SrcReg, Addr))
765  return false;
766 
767  return true;
768 }
769 
770 // Attempt to fast-select a branch instruction.
771 bool PPCFastISel::SelectBranch(const Instruction *I) {
772  const BranchInst *BI = cast<BranchInst>(I);
773  MachineBasicBlock *BrBB = FuncInfo.MBB;
774  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
775  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
776 
777  // For now, just try the simplest case where it's fed by a compare.
778  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
779  if (isValueAvailable(CI)) {
780  Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
781  if (!OptPPCPred)
782  return false;
783 
784  PPC::Predicate PPCPred = OptPPCPred.getValue();
785 
786  // Take advantage of fall-through opportunities.
787  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
788  std::swap(TBB, FBB);
789  PPCPred = PPC::InvertPredicate(PPCPred);
790  }
791 
792  unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
793 
794  if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
795  CondReg))
796  return false;
797 
798  BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
799  .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
800  finishCondBranch(BI->getParent(), TBB, FBB);
801  return true;
802  }
803  } else if (const ConstantInt *CI =
804  dyn_cast<ConstantInt>(BI->getCondition())) {
805  uint64_t Imm = CI->getZExtValue();
806  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
807  fastEmitBranch(Target, DbgLoc);
808  return true;
809  }
810 
811  // FIXME: ARM looks for a case where the block containing the compare
812  // has been split from the block containing the branch. If this happens,
813  // there is a vreg available containing the result of the compare. I'm
814  // not sure we can do much, as we've lost the predicate information with
815  // the compare instruction -- we have a 4-bit CR but don't know which bit
816  // to test here.
817  return false;
818 }
819 
820 // Attempt to emit a compare of the two source values. Signed and unsigned
821 // comparisons are supported. Return false if we can't handle it.
822 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
823  bool IsZExt, unsigned DestReg) {
824  Type *Ty = SrcValue1->getType();
825  EVT SrcEVT = TLI.getValueType(DL, Ty, true);
826  if (!SrcEVT.isSimple())
827  return false;
828  MVT SrcVT = SrcEVT.getSimpleVT();
829 
830  if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
831  return false;
832 
833  // See if operand 2 is an immediate encodeable in the compare.
834  // FIXME: Operands are not in canonical order at -O0, so an immediate
835  // operand in position 1 is a lost opportunity for now. We are
836  // similar to ARM in this regard.
837  long Imm = 0;
838  bool UseImm = false;
839 
840  // Only 16-bit integer constants can be represented in compares for
841  // PowerPC. Others will be materialized into a register.
842  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
843  if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
844  SrcVT == MVT::i8 || SrcVT == MVT::i1) {
845  const APInt &CIVal = ConstInt->getValue();
846  Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
847  if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
848  UseImm = true;
849  }
850  }
851 
852  unsigned CmpOpc;
853  bool NeedsExt = false;
854  switch (SrcVT.SimpleTy) {
855  default: return false;
856  case MVT::f32:
857  CmpOpc = PPC::FCMPUS;
858  break;
859  case MVT::f64:
860  CmpOpc = PPC::FCMPUD;
861  break;
862  case MVT::i1:
863  case MVT::i8:
864  case MVT::i16:
865  NeedsExt = true;
866  // Intentional fall-through.
867  case MVT::i32:
868  if (!UseImm)
869  CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
870  else
871  CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
872  break;
873  case MVT::i64:
874  if (!UseImm)
875  CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
876  else
877  CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
878  break;
879  }
880 
881  unsigned SrcReg1 = getRegForValue(SrcValue1);
882  if (SrcReg1 == 0)
883  return false;
884 
885  unsigned SrcReg2 = 0;
886  if (!UseImm) {
887  SrcReg2 = getRegForValue(SrcValue2);
888  if (SrcReg2 == 0)
889  return false;
890  }
891 
892  if (NeedsExt) {
893  unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
894  if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
895  return false;
896  SrcReg1 = ExtReg;
897 
898  if (!UseImm) {
899  unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
900  if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
901  return false;
902  SrcReg2 = ExtReg;
903  }
904  }
905 
906  if (!UseImm)
907  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
908  .addReg(SrcReg1).addReg(SrcReg2);
909  else
910  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
911  .addReg(SrcReg1).addImm(Imm);
912 
913  return true;
914 }
915 
916 // Attempt to fast-select a floating-point extend instruction.
917 bool PPCFastISel::SelectFPExt(const Instruction *I) {
918  Value *Src = I->getOperand(0);
919  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
920  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
921 
922  if (SrcVT != MVT::f32 || DestVT != MVT::f64)
923  return false;
924 
925  unsigned SrcReg = getRegForValue(Src);
926  if (!SrcReg)
927  return false;
928 
929  // No code is generated for a FP extend.
930  updateValueMap(I, SrcReg);
931  return true;
932 }
933 
934 // Attempt to fast-select a floating-point truncate instruction.
935 bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
936  Value *Src = I->getOperand(0);
937  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
938  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
939 
940  if (SrcVT != MVT::f64 || DestVT != MVT::f32)
941  return false;
942 
943  unsigned SrcReg = getRegForValue(Src);
944  if (!SrcReg)
945  return false;
946 
947  // Round the result to single precision.
948  unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
949  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
950  .addReg(SrcReg);
951 
952  updateValueMap(I, DestReg);
953  return true;
954 }
955 
956 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
957 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
958 // those should be used instead of moving via a stack slot when the
959 // subtarget permits.
960 // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
961 // stack slot and 4-byte store/load sequence. Or just sext the 4-byte
962 // case to 8 bytes which produces tighter code but wastes stack space.
963 unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
964  bool IsSigned) {
965 
966  // If necessary, extend 32-bit int to 64-bit.
967  if (SrcVT == MVT::i32) {
968  unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
969  if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
970  return 0;
971  SrcReg = TmpReg;
972  }
973 
974  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
975  Address Addr;
976  Addr.BaseType = Address::FrameIndexBase;
977  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
978 
979  // Store the value from the GPR.
980  if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
981  return 0;
982 
983  // Load the integer value into an FPR. The kind of load used depends
984  // on a number of conditions.
985  unsigned LoadOpc = PPC::LFD;
986 
987  if (SrcVT == MVT::i32) {
988  if (!IsSigned) {
989  LoadOpc = PPC::LFIWZX;
990  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
991  } else if (PPCSubTarget->hasLFIWAX()) {
992  LoadOpc = PPC::LFIWAX;
993  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
994  }
995  }
996 
997  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
998  unsigned ResultReg = 0;
999  if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1000  return 0;
1001 
1002  return ResultReg;
1003 }
1004 
1005 // Attempt to fast-select an integer-to-floating-point conversion.
1006 // FIXME: Once fast-isel has better support for VSX, conversions using
1007 // direct moves should be implemented.
1008 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1009  MVT DstVT;
1010  Type *DstTy = I->getType();
1011  if (!isTypeLegal(DstTy, DstVT))
1012  return false;
1013 
1014  if (DstVT != MVT::f32 && DstVT != MVT::f64)
1015  return false;
1016 
1017  Value *Src = I->getOperand(0);
1018  EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1019  if (!SrcEVT.isSimple())
1020  return false;
1021 
1022  MVT SrcVT = SrcEVT.getSimpleVT();
1023 
1024  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1025  SrcVT != MVT::i32 && SrcVT != MVT::i64)
1026  return false;
1027 
1028  unsigned SrcReg = getRegForValue(Src);
1029  if (SrcReg == 0)
1030  return false;
1031 
1032  // We can only lower an unsigned convert if we have the newer
1033  // floating-point conversion operations.
1034  if (!IsSigned && !PPCSubTarget->hasFPCVT())
1035  return false;
1036 
1037  // FIXME: For now we require the newer floating-point conversion operations
1038  // (which are present only on P7 and A2 server models) when converting
1039  // to single-precision float. Otherwise we have to generate a lot of
1040  // fiddly code to avoid double rounding. If necessary, the fiddly code
1041  // can be found in PPCTargetLowering::LowerINT_TO_FP().
1042  if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
1043  return false;
1044 
1045  // Extend the input if necessary.
1046  if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1047  unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
1048  if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1049  return false;
1050  SrcVT = MVT::i64;
1051  SrcReg = TmpReg;
1052  }
1053 
1054  // Move the integer value to an FPR.
1055  unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1056  if (FPReg == 0)
1057  return false;
1058 
1059  // Determine the opcode for the conversion.
1060  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1061  unsigned DestReg = createResultReg(RC);
1062  unsigned Opc;
1063 
1064  if (DstVT == MVT::f32)
1065  Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1066  else
1067  Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1068 
1069  // Generate the convert.
1070  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1071  .addReg(FPReg);
1072 
1073  updateValueMap(I, DestReg);
1074  return true;
1075 }
1076 
1077 // Move the floating-point value in SrcReg into an integer destination
1078 // register, and return the register (or zero if we can't handle it).
1079 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
1080 // those should be used instead of moving via a stack slot when the
1081 // subtarget permits.
1082 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1083  unsigned SrcReg, bool IsSigned) {
1084  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1085  // Note that if have STFIWX available, we could use a 4-byte stack
1086  // slot for i32, but this being fast-isel we'll just go with the
1087  // easiest code gen possible.
1088  Address Addr;
1089  Addr.BaseType = Address::FrameIndexBase;
1090  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
1091 
1092  // Store the value from the FPR.
1093  if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1094  return 0;
1095 
1096  // Reload it into a GPR. If we want an i32 on big endian, modify the
1097  // address to have a 4-byte offset so we load from the right place.
1098  if (VT == MVT::i32)
1099  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
1100 
1101  // Look at the currently assigned register for this instruction
1102  // to determine the required register class.
1103  unsigned AssignedReg = FuncInfo.ValueMap[I];
1104  const TargetRegisterClass *RC =
1105  AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1106 
1107  unsigned ResultReg = 0;
1108  if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1109  return 0;
1110 
1111  return ResultReg;
1112 }
1113 
1114 // Attempt to fast-select a floating-point-to-integer conversion.
1115 // FIXME: Once fast-isel has better support for VSX, conversions using
1116 // direct moves should be implemented.
1117 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1118  MVT DstVT, SrcVT;
1119  Type *DstTy = I->getType();
1120  if (!isTypeLegal(DstTy, DstVT))
1121  return false;
1122 
1123  if (DstVT != MVT::i32 && DstVT != MVT::i64)
1124  return false;
1125 
1126  // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
1127  if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
1128  return false;
1129 
1130  Value *Src = I->getOperand(0);
1131  Type *SrcTy = Src->getType();
1132  if (!isTypeLegal(SrcTy, SrcVT))
1133  return false;
1134 
1135  if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1136  return false;
1137 
1138  unsigned SrcReg = getRegForValue(Src);
1139  if (SrcReg == 0)
1140  return false;
1141 
1142  // Convert f32 to f64 if necessary. This is just a meaningless copy
1143  // to get the register class right.
1144  const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1145  if (InRC == &PPC::F4RCRegClass) {
1146  unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
1147  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1148  TII.get(TargetOpcode::COPY), TmpReg)
1149  .addReg(SrcReg);
1150  SrcReg = TmpReg;
1151  }
1152 
1153  // Determine the opcode for the conversion, which takes place
1154  // entirely within FPRs.
1155  unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
1156  unsigned Opc;
1157 
1158  if (DstVT == MVT::i32)
1159  if (IsSigned)
1160  Opc = PPC::FCTIWZ;
1161  else
1162  Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1163  else
1164  Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1165 
1166  // Generate the convert.
1167  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1168  .addReg(SrcReg);
1169 
1170  // Now move the integer value from a float register to an integer register.
1171  unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1172  if (IntReg == 0)
1173  return false;
1174 
1175  updateValueMap(I, IntReg);
1176  return true;
1177 }
1178 
1179 // Attempt to fast-select a binary integer operation that isn't already
1180 // handled automatically.
1181 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1182  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1183 
1184  // We can get here in the case when we have a binary operation on a non-legal
1185  // type and the target independent selector doesn't know how to handle it.
1186  if (DestVT != MVT::i16 && DestVT != MVT::i8)
1187  return false;
1188 
1189  // Look at the currently assigned register for this instruction
1190  // to determine the required register class. If there is no register,
1191  // make a conservative choice (don't assign R0).
1192  unsigned AssignedReg = FuncInfo.ValueMap[I];
1193  const TargetRegisterClass *RC =
1194  (AssignedReg ? MRI.getRegClass(AssignedReg) :
1195  &PPC::GPRC_and_GPRC_NOR0RegClass);
1196  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1197 
1198  unsigned Opc;
1199  switch (ISDOpcode) {
1200  default: return false;
1201  case ISD::ADD:
1202  Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1203  break;
1204  case ISD::OR:
1205  Opc = IsGPRC ? PPC::OR : PPC::OR8;
1206  break;
1207  case ISD::SUB:
1208  Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1209  break;
1210  }
1211 
1212  unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1213  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1214  if (SrcReg1 == 0) return false;
1215 
1216  // Handle case of small immediate operand.
1217  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1218  const APInt &CIVal = ConstInt->getValue();
1219  int Imm = (int)CIVal.getSExtValue();
1220  bool UseImm = true;
1221  if (isInt<16>(Imm)) {
1222  switch (Opc) {
1223  default:
1224  llvm_unreachable("Missing case!");
1225  case PPC::ADD4:
1226  Opc = PPC::ADDI;
1227  MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1228  break;
1229  case PPC::ADD8:
1230  Opc = PPC::ADDI8;
1231  MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1232  break;
1233  case PPC::OR:
1234  Opc = PPC::ORI;
1235  break;
1236  case PPC::OR8:
1237  Opc = PPC::ORI8;
1238  break;
1239  case PPC::SUBF:
1240  if (Imm == -32768)
1241  UseImm = false;
1242  else {
1243  Opc = PPC::ADDI;
1244  MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1245  Imm = -Imm;
1246  }
1247  break;
1248  case PPC::SUBF8:
1249  if (Imm == -32768)
1250  UseImm = false;
1251  else {
1252  Opc = PPC::ADDI8;
1253  MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1254  Imm = -Imm;
1255  }
1256  break;
1257  }
1258 
1259  if (UseImm) {
1260  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1261  ResultReg)
1262  .addReg(SrcReg1)
1263  .addImm(Imm);
1264  updateValueMap(I, ResultReg);
1265  return true;
1266  }
1267  }
1268  }
1269 
1270  // Reg-reg case.
1271  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1272  if (SrcReg2 == 0) return false;
1273 
1274  // Reverse operands for subtract-from.
1275  if (ISDOpcode == ISD::SUB)
1276  std::swap(SrcReg1, SrcReg2);
1277 
1278  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1279  .addReg(SrcReg1).addReg(SrcReg2);
1280  updateValueMap(I, ResultReg);
1281  return true;
1282 }
1283 
1284 // Handle arguments to a call that we're attempting to fast-select.
1285 // Return false if the arguments are too complex for us at the moment.
1286 bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1287  SmallVectorImpl<unsigned> &ArgRegs,
1288  SmallVectorImpl<MVT> &ArgVTs,
1290  SmallVectorImpl<unsigned> &RegArgs,
1291  CallingConv::ID CC,
1292  unsigned &NumBytes,
1293  bool IsVarArg) {
1295  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1296 
1297  // Reserve space for the linkage area on the stack.
1298  unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
1299  CCInfo.AllocateStack(LinkageSize, 8);
1300 
1301  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
1302 
1303  // Bail out if we can't handle any of the arguments.
1304  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1305  CCValAssign &VA = ArgLocs[I];
1306  MVT ArgVT = ArgVTs[VA.getValNo()];
1307 
1308  // Skip vector arguments for now, as well as long double and
1309  // uint128_t, and anything that isn't passed in a register.
1310  if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1311  !VA.isRegLoc() || VA.needsCustom())
1312  return false;
1313 
1314  // Skip bit-converted arguments for now.
1315  if (VA.getLocInfo() == CCValAssign::BCvt)
1316  return false;
1317  }
1318 
1319  // Get a count of how many bytes are to be pushed onto the stack.
1320  NumBytes = CCInfo.getNextStackOffset();
1321 
1322  // The prolog code of the callee may store up to 8 GPR argument registers to
1323  // the stack, allowing va_start to index over them in memory if its varargs.
1324  // Because we cannot tell if this is needed on the caller side, we have to
1325  // conservatively assume that it is needed. As such, make sure we have at
1326  // least enough stack space for the caller to store the 8 GPRs.
1327  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1328  NumBytes = std::max(NumBytes, LinkageSize + 64);
1329 
1330  // Issue CALLSEQ_START.
1331  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1332  TII.get(TII.getCallFrameSetupOpcode()))
1333  .addImm(NumBytes);
1334 
1335  // Prepare to assign register arguments. Every argument uses up a
1336  // GPR protocol register even if it's passed in a floating-point
1337  // register (unless we're using the fast calling convention).
1338  unsigned NextGPR = PPC::X3;
1339  unsigned NextFPR = PPC::F1;
1340 
1341  // Process arguments.
1342  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1343  CCValAssign &VA = ArgLocs[I];
1344  unsigned Arg = ArgRegs[VA.getValNo()];
1345  MVT ArgVT = ArgVTs[VA.getValNo()];
1346 
1347  // Handle argument promotion and bitcasts.
1348  switch (VA.getLocInfo()) {
1349  default:
1350  llvm_unreachable("Unknown loc info!");
1351  case CCValAssign::Full:
1352  break;
1353  case CCValAssign::SExt: {
1354  MVT DestVT = VA.getLocVT();
1355  const TargetRegisterClass *RC =
1356  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1357  unsigned TmpReg = createResultReg(RC);
1358  if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1359  llvm_unreachable("Failed to emit a sext!");
1360  ArgVT = DestVT;
1361  Arg = TmpReg;
1362  break;
1363  }
1364  case CCValAssign::AExt:
1365  case CCValAssign::ZExt: {
1366  MVT DestVT = VA.getLocVT();
1367  const TargetRegisterClass *RC =
1368  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1369  unsigned TmpReg = createResultReg(RC);
1370  if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1371  llvm_unreachable("Failed to emit a zext!");
1372  ArgVT = DestVT;
1373  Arg = TmpReg;
1374  break;
1375  }
1376  case CCValAssign::BCvt: {
1377  // FIXME: Not yet handled.
1378  llvm_unreachable("Should have bailed before getting here!");
1379  break;
1380  }
1381  }
1382 
1383  // Copy this argument to the appropriate register.
1384  unsigned ArgReg;
1385  if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1386  ArgReg = NextFPR++;
1387  if (CC != CallingConv::Fast)
1388  ++NextGPR;
1389  } else
1390  ArgReg = NextGPR++;
1391 
1392  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1393  TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1394  RegArgs.push_back(ArgReg);
1395  }
1396 
1397  return true;
1398 }
1399 
1400 // For a call that we've determined we can fast-select, finish the
1401 // call sequence and generate a copy to obtain the return value (if any).
1402 bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1403  CallingConv::ID CC = CLI.CallConv;
1404 
1405  // Issue CallSEQ_END.
1406  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1407  TII.get(TII.getCallFrameDestroyOpcode()))
1408  .addImm(NumBytes).addImm(0);
1409 
1410  // Next, generate a copy to obtain the return value.
1411  // FIXME: No multi-register return values yet, though I don't foresee
1412  // any real difficulties there.
1413  if (RetVT != MVT::isVoid) {
1415  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1416  CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1417  CCValAssign &VA = RVLocs[0];
1418  assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1419  assert(VA.isRegLoc() && "Can only return in registers!");
1420 
1421  MVT DestVT = VA.getValVT();
1422  MVT CopyVT = DestVT;
1423 
1424  // Ints smaller than a register still arrive in a full 64-bit
1425  // register, so make sure we recognize this.
1426  if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1427  CopyVT = MVT::i64;
1428 
1429  unsigned SourcePhysReg = VA.getLocReg();
1430  unsigned ResultReg = 0;
1431 
1432  if (RetVT == CopyVT) {
1433  const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1434  ResultReg = createResultReg(CpyRC);
1435 
1436  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1437  TII.get(TargetOpcode::COPY), ResultReg)
1438  .addReg(SourcePhysReg);
1439 
1440  // If necessary, round the floating result to single precision.
1441  } else if (CopyVT == MVT::f64) {
1442  ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
1444  ResultReg).addReg(SourcePhysReg);
1445 
1446  // If only the low half of a general register is needed, generate
1447  // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1448  // used along the fast-isel path (not lowered), and downstream logic
1449  // also doesn't like a direct subreg copy on a physical reg.)
1450  } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1451  ResultReg = createResultReg(&PPC::GPRCRegClass);
1452  // Convert physical register from G8RC to GPRC.
1453  SourcePhysReg -= PPC::X0 - PPC::R0;
1454  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1455  TII.get(TargetOpcode::COPY), ResultReg)
1456  .addReg(SourcePhysReg);
1457  }
1458 
1459  assert(ResultReg && "ResultReg unset!");
1460  CLI.InRegs.push_back(SourcePhysReg);
1461  CLI.ResultReg = ResultReg;
1462  CLI.NumResultRegs = 1;
1463  }
1464 
1465  return true;
1466 }
1467 
1468 bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1469  CallingConv::ID CC = CLI.CallConv;
1470  bool IsTailCall = CLI.IsTailCall;
1471  bool IsVarArg = CLI.IsVarArg;
1472  const Value *Callee = CLI.Callee;
1473  const MCSymbol *Symbol = CLI.Symbol;
1474 
1475  if (!Callee && !Symbol)
1476  return false;
1477 
1478  // Allow SelectionDAG isel to handle tail calls.
1479  if (IsTailCall)
1480  return false;
1481 
1482  // Let SDISel handle vararg functions.
1483  if (IsVarArg)
1484  return false;
1485 
1486  // Handle simple calls for now, with legal return types and
1487  // those that can be extended.
1488  Type *RetTy = CLI.RetTy;
1489  MVT RetVT;
1490  if (RetTy->isVoidTy())
1491  RetVT = MVT::isVoid;
1492  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1493  RetVT != MVT::i8)
1494  return false;
1495  else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
1496  // We can't handle boolean returns when CR bits are in use.
1497  return false;
1498 
1499  // FIXME: No multi-register return values yet.
1500  if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1501  RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1502  RetVT != MVT::f64) {
1504  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1505  CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1506  if (RVLocs.size() > 1)
1507  return false;
1508  }
1509 
1510  // Bail early if more than 8 arguments, as we only currently
1511  // handle arguments passed in registers.
1512  unsigned NumArgs = CLI.OutVals.size();
1513  if (NumArgs > 8)
1514  return false;
1515 
1516  // Set up the argument vectors.
1518  SmallVector<unsigned, 8> ArgRegs;
1519  SmallVector<MVT, 8> ArgVTs;
1521 
1522  Args.reserve(NumArgs);
1523  ArgRegs.reserve(NumArgs);
1524  ArgVTs.reserve(NumArgs);
1525  ArgFlags.reserve(NumArgs);
1526 
1527  for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1528  // Only handle easy calls for now. It would be reasonably easy
1529  // to handle <= 8-byte structures passed ByVal in registers, but we
1530  // have to ensure they are right-justified in the register.
1531  ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1532  if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1533  return false;
1534 
1535  Value *ArgValue = CLI.OutVals[i];
1536  Type *ArgTy = ArgValue->getType();
1537  MVT ArgVT;
1538  if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1539  return false;
1540 
1541  if (ArgVT.isVector())
1542  return false;
1543 
1544  unsigned Arg = getRegForValue(ArgValue);
1545  if (Arg == 0)
1546  return false;
1547 
1548  Args.push_back(ArgValue);
1549  ArgRegs.push_back(Arg);
1550  ArgVTs.push_back(ArgVT);
1551  ArgFlags.push_back(Flags);
1552  }
1553 
1554  // Process the arguments.
1555  SmallVector<unsigned, 8> RegArgs;
1556  unsigned NumBytes;
1557 
1558  if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1559  RegArgs, CC, NumBytes, IsVarArg))
1560  return false;
1561 
1562  MachineInstrBuilder MIB;
1563  // FIXME: No handling for function pointers yet. This requires
1564  // implementing the function descriptor (OPD) setup.
1565  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1566  if (!GV) {
1567  // patchpoints are a special case; they always dispatch to a pointer value.
1568  // However, we don't actually want to generate the indirect call sequence
1569  // here (that will be generated, as necessary, during asm printing), and
1570  // the call we generate here will be erased by FastISel::selectPatchpoint,
1571  // so don't try very hard...
1572  if (CLI.IsPatchPoint)
1573  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
1574  else
1575  return false;
1576  } else {
1577  // Build direct call with NOP for TOC restore.
1578  // FIXME: We can and should optimize away the NOP for local calls.
1579  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1580  TII.get(PPC::BL8_NOP));
1581  // Add callee.
1582  MIB.addGlobalAddress(GV);
1583  }
1584 
1585  // Add implicit physical register uses to the call.
1586  for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
1587  MIB.addReg(RegArgs[II], RegState::Implicit);
1588 
1589  // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1590  // into the call.
1591  PPCFuncInfo->setUsesTOCBasePtr();
1592  MIB.addReg(PPC::X2, RegState::Implicit);
1593 
1594  // Add a register mask with the call-preserved registers. Proper
1595  // defs for return values will be added by setPhysRegsDeadExcept().
1596  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1597 
1598  CLI.Call = MIB;
1599 
1600  // Finish off the call including any return values.
1601  return finishCall(RetVT, CLI, NumBytes);
1602 }
1603 
1604 // Attempt to fast-select a return instruction.
1605 bool PPCFastISel::SelectRet(const Instruction *I) {
1606 
1607  if (!FuncInfo.CanLowerReturn)
1608  return false;
1609 
1610  if (TLI.supportSplitCSR(FuncInfo.MF))
1611  return false;
1612 
1613  const ReturnInst *Ret = cast<ReturnInst>(I);
1614  const Function &F = *I->getParent()->getParent();
1615 
1616  // Build a list of return value registers.
1617  SmallVector<unsigned, 4> RetRegs;
1618  CallingConv::ID CC = F.getCallingConv();
1619 
1620  if (Ret->getNumOperands() > 0) {
1622  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1623 
1624  // Analyze operands of the call, assigning locations to each operand.
1626  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1627  CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1628  const Value *RV = Ret->getOperand(0);
1629 
1630  // FIXME: Only one output register for now.
1631  if (ValLocs.size() > 1)
1632  return false;
1633 
1634  // Special case for returning a constant integer of any size - materialize
1635  // the constant as an i64 and copy it to the return register.
1636  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
1637  CCValAssign &VA = ValLocs[0];
1638 
1639  unsigned RetReg = VA.getLocReg();
1640  // We still need to worry about properly extending the sign. For example,
1641  // we could have only a single bit or a constant that needs zero
1642  // extension rather than sign extension. Make sure we pass the return
1643  // value extension property to integer materialization.
1644  unsigned SrcReg =
1645  PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1646 
1647  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1648  TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1649 
1650  RetRegs.push_back(RetReg);
1651 
1652  } else {
1653  unsigned Reg = getRegForValue(RV);
1654 
1655  if (Reg == 0)
1656  return false;
1657 
1658  // Copy the result values into the output registers.
1659  for (unsigned i = 0; i < ValLocs.size(); ++i) {
1660 
1661  CCValAssign &VA = ValLocs[i];
1662  assert(VA.isRegLoc() && "Can only return in registers!");
1663  RetRegs.push_back(VA.getLocReg());
1664  unsigned SrcReg = Reg + VA.getValNo();
1665 
1666  EVT RVEVT = TLI.getValueType(DL, RV->getType());
1667  if (!RVEVT.isSimple())
1668  return false;
1669  MVT RVVT = RVEVT.getSimpleVT();
1670  MVT DestVT = VA.getLocVT();
1671 
1672  if (RVVT != DestVT && RVVT != MVT::i8 &&
1673  RVVT != MVT::i16 && RVVT != MVT::i32)
1674  return false;
1675 
1676  if (RVVT != DestVT) {
1677  switch (VA.getLocInfo()) {
1678  default:
1679  llvm_unreachable("Unknown loc info!");
1680  case CCValAssign::Full:
1681  llvm_unreachable("Full value assign but types don't match?");
1682  case CCValAssign::AExt:
1683  case CCValAssign::ZExt: {
1684  const TargetRegisterClass *RC =
1685  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1686  unsigned TmpReg = createResultReg(RC);
1687  if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1688  return false;
1689  SrcReg = TmpReg;
1690  break;
1691  }
1692  case CCValAssign::SExt: {
1693  const TargetRegisterClass *RC =
1694  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1695  unsigned TmpReg = createResultReg(RC);
1696  if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1697  return false;
1698  SrcReg = TmpReg;
1699  break;
1700  }
1701  }
1702  }
1703 
1704  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1705  TII.get(TargetOpcode::COPY), RetRegs[i])
1706  .addReg(SrcReg);
1707  }
1708  }
1709  }
1710 
1711  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1712  TII.get(PPC::BLR8));
1713 
1714  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1715  MIB.addReg(RetRegs[i], RegState::Implicit);
1716 
1717  return true;
1718 }
1719 
1720 // Attempt to emit an integer extend of SrcReg into DestReg. Both
1721 // signed and zero extensions are supported. Return false if we
1722 // can't handle it.
1723 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1724  unsigned DestReg, bool IsZExt) {
1725  if (DestVT != MVT::i32 && DestVT != MVT::i64)
1726  return false;
1727  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1728  return false;
1729 
1730  // Signed extensions use EXTSB, EXTSH, EXTSW.
1731  if (!IsZExt) {
1732  unsigned Opc;
1733  if (SrcVT == MVT::i8)
1734  Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1735  else if (SrcVT == MVT::i16)
1736  Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1737  else {
1738  assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1739  Opc = PPC::EXTSW_32_64;
1740  }
1741  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1742  .addReg(SrcReg);
1743 
1744  // Unsigned 32-bit extensions use RLWINM.
1745  } else if (DestVT == MVT::i32) {
1746  unsigned MB;
1747  if (SrcVT == MVT::i8)
1748  MB = 24;
1749  else {
1750  assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1751  MB = 16;
1752  }
1753  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
1754  DestReg)
1755  .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1756 
1757  // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1758  } else {
1759  unsigned MB;
1760  if (SrcVT == MVT::i8)
1761  MB = 56;
1762  else if (SrcVT == MVT::i16)
1763  MB = 48;
1764  else
1765  MB = 32;
1766  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1767  TII.get(PPC::RLDICL_32_64), DestReg)
1768  .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1769  }
1770 
1771  return true;
1772 }
1773 
1774 // Attempt to fast-select an indirect branch instruction.
1775 bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1776  unsigned AddrReg = getRegForValue(I->getOperand(0));
1777  if (AddrReg == 0)
1778  return false;
1779 
1780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
1781  .addReg(AddrReg);
1782  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
1783 
1784  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1785  for (const BasicBlock *SuccBB : IB->successors())
1786  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
1787 
1788  return true;
1789 }
1790 
1791 // Attempt to fast-select an integer truncate instruction.
1792 bool PPCFastISel::SelectTrunc(const Instruction *I) {
1793  Value *Src = I->getOperand(0);
1794  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1795  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1796 
1797  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1798  return false;
1799 
1800  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1801  return false;
1802 
1803  unsigned SrcReg = getRegForValue(Src);
1804  if (!SrcReg)
1805  return false;
1806 
1807  // The only interesting case is when we need to switch register classes.
1808  if (SrcVT == MVT::i64) {
1809  unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
1810  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1811  TII.get(TargetOpcode::COPY),
1812  ResultReg).addReg(SrcReg, 0, PPC::sub_32);
1813  SrcReg = ResultReg;
1814  }
1815 
1816  updateValueMap(I, SrcReg);
1817  return true;
1818 }
1819 
1820 // Attempt to fast-select an integer extend instruction.
1821 bool PPCFastISel::SelectIntExt(const Instruction *I) {
1822  Type *DestTy = I->getType();
1823  Value *Src = I->getOperand(0);
1824  Type *SrcTy = Src->getType();
1825 
1826  bool IsZExt = isa<ZExtInst>(I);
1827  unsigned SrcReg = getRegForValue(Src);
1828  if (!SrcReg) return false;
1829 
1830  EVT SrcEVT, DestEVT;
1831  SrcEVT = TLI.getValueType(DL, SrcTy, true);
1832  DestEVT = TLI.getValueType(DL, DestTy, true);
1833  if (!SrcEVT.isSimple())
1834  return false;
1835  if (!DestEVT.isSimple())
1836  return false;
1837 
1838  MVT SrcVT = SrcEVT.getSimpleVT();
1839  MVT DestVT = DestEVT.getSimpleVT();
1840 
1841  // If we know the register class needed for the result of this
1842  // instruction, use it. Otherwise pick the register class of the
1843  // correct size that does not contain X0/R0, since we don't know
1844  // whether downstream uses permit that assignment.
1845  unsigned AssignedReg = FuncInfo.ValueMap[I];
1846  const TargetRegisterClass *RC =
1847  (AssignedReg ? MRI.getRegClass(AssignedReg) :
1848  (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1849  &PPC::GPRC_and_GPRC_NOR0RegClass));
1850  unsigned ResultReg = createResultReg(RC);
1851 
1852  if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1853  return false;
1854 
1855  updateValueMap(I, ResultReg);
1856  return true;
1857 }
1858 
1859 // Attempt to fast-select an instruction that wasn't handled by
1860 // the table-generated machinery.
1861 bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1862 
1863  switch (I->getOpcode()) {
1864  case Instruction::Load:
1865  return SelectLoad(I);
1866  case Instruction::Store:
1867  return SelectStore(I);
1868  case Instruction::Br:
1869  return SelectBranch(I);
1870  case Instruction::IndirectBr:
1871  return SelectIndirectBr(I);
1872  case Instruction::FPExt:
1873  return SelectFPExt(I);
1874  case Instruction::FPTrunc:
1875  return SelectFPTrunc(I);
1876  case Instruction::SIToFP:
1877  return SelectIToFP(I, /*IsSigned*/ true);
1878  case Instruction::UIToFP:
1879  return SelectIToFP(I, /*IsSigned*/ false);
1880  case Instruction::FPToSI:
1881  return SelectFPToI(I, /*IsSigned*/ true);
1882  case Instruction::FPToUI:
1883  return SelectFPToI(I, /*IsSigned*/ false);
1884  case Instruction::Add:
1885  return SelectBinaryIntOp(I, ISD::ADD);
1886  case Instruction::Or:
1887  return SelectBinaryIntOp(I, ISD::OR);
1888  case Instruction::Sub:
1889  return SelectBinaryIntOp(I, ISD::SUB);
1890  case Instruction::Call:
1891  return selectCall(I);
1892  case Instruction::Ret:
1893  return SelectRet(I);
1894  case Instruction::Trunc:
1895  return SelectTrunc(I);
1896  case Instruction::ZExt:
1897  case Instruction::SExt:
1898  return SelectIntExt(I);
1899  // Here add other flavors of Instruction::XXX that automated
1900  // cases don't catch. For example, switches are terminators
1901  // that aren't yet handled.
1902  default:
1903  break;
1904  }
1905  return false;
1906 }
1907 
1908 // Materialize a floating-point constant into a register, and return
1909 // the register number (or zero if we failed to handle it).
1910 unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1911  // No plans to handle long double here.
1912  if (VT != MVT::f32 && VT != MVT::f64)
1913  return 0;
1914 
1915  // All FP constants are loaded from the constant pool.
1916  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
1917  assert(Align > 0 && "Unexpectedly missing alignment information!");
1918  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
1919  const TargetRegisterClass *RC =
1920  (VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass;
1921  unsigned DestReg = createResultReg(RC);
1922  CodeModel::Model CModel = TM.getCodeModel();
1923 
1924  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1926  MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
1927 
1928  unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
1929  unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1930 
1931  PPCFuncInfo->setUsesTOCBasePtr();
1932  // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
1933  if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
1934  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
1935  TmpReg)
1936  .addConstantPoolIndex(Idx).addReg(PPC::X2);
1937  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1938  .addImm(0).addReg(TmpReg).addMemOperand(MMO);
1939  } else {
1940  // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
1941  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
1942  TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
1943  // But for large code model, we must generate a LDtocL followed
1944  // by the LF[SD].
1945  if (CModel == CodeModel::Large) {
1946  unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1947  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
1948  TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
1949  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1950  .addImm(0)
1951  .addReg(TmpReg2);
1952  } else
1953  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1954  .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
1955  .addReg(TmpReg)
1956  .addMemOperand(MMO);
1957  }
1958 
1959  return DestReg;
1960 }
1961 
1962 // Materialize the address of a global value into a register, and return
1963 // the register number (or zero if we failed to handle it).
1964 unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
1965  assert(VT == MVT::i64 && "Non-address!");
1966  const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
1967  unsigned DestReg = createResultReg(RC);
1968 
1969  // Global values may be plain old object addresses, TLS object
1970  // addresses, constant pool entries, or jump tables. How we generate
1971  // code for these may depend on small, medium, or large code model.
1972  CodeModel::Model CModel = TM.getCodeModel();
1973 
1974  // FIXME: Jump tables are not yet required because fast-isel doesn't
1975  // handle switches; if that changes, we need them as well. For now,
1976  // what follows assumes everything's a generic (or TLS) global address.
1977 
1978  // FIXME: We don't yet handle the complexity of TLS.
1979  if (GV->isThreadLocal())
1980  return 0;
1981 
1982  PPCFuncInfo->setUsesTOCBasePtr();
1983  // For small code model, generate a simple TOC load.
1984  if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
1985  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
1986  DestReg)
1987  .addGlobalAddress(GV)
1988  .addReg(PPC::X2);
1989  else {
1990  // If the address is an externally defined symbol, a symbol with common
1991  // or externally available linkage, a non-local function address, or a
1992  // jump table address (not yet needed), or if we are generating code
1993  // for large code model, we generate:
1994  // LDtocL(GV, ADDIStocHA(%X2, GV))
1995  // Otherwise we generate:
1996  // ADDItocL(ADDIStocHA(%X2, GV), GV)
1997  // Either way, start with the ADDIStocHA:
1998  unsigned HighPartReg = createResultReg(RC);
1999  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
2000  HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2001 
2002  unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
2003  if (GVFlags & PPCII::MO_NLP_FLAG) {
2004  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
2005  DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2006  } else {
2007  // Otherwise generate the ADDItocL.
2008  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
2009  DestReg).addReg(HighPartReg).addGlobalAddress(GV);
2010  }
2011  }
2012 
2013  return DestReg;
2014 }
2015 
2016 // Materialize a 32-bit integer constant into a register, and return
2017 // the register number (or zero if we failed to handle it).
2018 unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2019  const TargetRegisterClass *RC) {
2020  unsigned Lo = Imm & 0xFFFF;
2021  unsigned Hi = (Imm >> 16) & 0xFFFF;
2022 
2023  unsigned ResultReg = createResultReg(RC);
2024  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2025 
2026  if (isInt<16>(Imm))
2027  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2028  TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2029  .addImm(Imm);
2030  else if (Lo) {
2031  // Both Lo and Hi have nonzero bits.
2032  unsigned TmpReg = createResultReg(RC);
2033  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2034  TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2035  .addImm(Hi);
2036  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2037  TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2038  .addReg(TmpReg).addImm(Lo);
2039  } else
2040  // Just Hi bits.
2041  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2042  TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2043  .addImm(Hi);
2044 
2045  return ResultReg;
2046 }
2047 
2048 // Materialize a 64-bit integer constant into a register, and return
2049 // the register number (or zero if we failed to handle it).
2050 unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2051  const TargetRegisterClass *RC) {
2052  unsigned Remainder = 0;
2053  unsigned Shift = 0;
2054 
2055  // If the value doesn't fit in 32 bits, see if we can shift it
2056  // so that it fits in 32 bits.
2057  if (!isInt<32>(Imm)) {
2058  Shift = countTrailingZeros<uint64_t>(Imm);
2059  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2060 
2061  if (isInt<32>(ImmSh))
2062  Imm = ImmSh;
2063  else {
2064  Remainder = Imm;
2065  Shift = 32;
2066  Imm >>= 32;
2067  }
2068  }
2069 
2070  // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2071  // (if not shifted).
2072  unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2073  if (!Shift)
2074  return TmpReg1;
2075 
2076  // If upper 32 bits were not zero, we've built them and need to shift
2077  // them into place.
2078  unsigned TmpReg2;
2079  if (Imm) {
2080  TmpReg2 = createResultReg(RC);
2081  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
2082  TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2083  } else
2084  TmpReg2 = TmpReg1;
2085 
2086  unsigned TmpReg3, Hi, Lo;
2087  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2088  TmpReg3 = createResultReg(RC);
2089  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
2090  TmpReg3).addReg(TmpReg2).addImm(Hi);
2091  } else
2092  TmpReg3 = TmpReg2;
2093 
2094  if ((Lo = Remainder & 0xFFFF)) {
2095  unsigned ResultReg = createResultReg(RC);
2096  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
2097  ResultReg).addReg(TmpReg3).addImm(Lo);
2098  return ResultReg;
2099  }
2100 
2101  return TmpReg3;
2102 }
2103 
2104 // Materialize an integer constant into a register, and return
2105 // the register number (or zero if we failed to handle it).
2106 unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2107  bool UseSExt) {
2108  // If we're using CR bit registers for i1 values, handle that as a special
2109  // case first.
2110  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2111  unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2112  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2113  TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2114  return ImmReg;
2115  }
2116 
2117  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2118  VT != MVT::i1)
2119  return 0;
2120 
2121  const TargetRegisterClass *RC =
2122  ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2123  int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2124 
2125  // If the constant is in range, use a load-immediate.
2126  // Since LI will sign extend the constant we need to make sure that for
2127  // our zeroext constants that the sign extended constant fits into 16-bits -
2128  // a range of 0..0x7fff.
2129  if (isInt<16>(Imm)) {
2130  unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2131  unsigned ImmReg = createResultReg(RC);
2132  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
2133  .addImm(Imm);
2134  return ImmReg;
2135  }
2136 
2137  // Construct the constant piecewise.
2138  if (VT == MVT::i64)
2139  return PPCMaterialize64BitInt(Imm, RC);
2140  else if (VT == MVT::i32)
2141  return PPCMaterialize32BitInt(Imm, RC);
2142 
2143  return 0;
2144 }
2145 
2146 // Materialize a constant into a register, and return the register
2147 // number (or zero if we failed to handle it).
2148 unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
2149  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2150 
2151  // Only handle simple types.
2152  if (!CEVT.isSimple()) return 0;
2153  MVT VT = CEVT.getSimpleVT();
2154 
2155  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2156  return PPCMaterializeFP(CFP, VT);
2157  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2158  return PPCMaterializeGV(GV, VT);
2159  else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2160  // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2161  // assumes that constant PHI operands will be zero extended, and failure to
2162  // match that assumption will cause problems if we sign extend here but
2163  // some user of a PHI is in a block for which we fall back to full SDAG
2164  // instruction selection.
2165  return PPCMaterializeInt(CI, VT, false);
2166 
2167  return 0;
2168 }
2169 
2170 // Materialize the address created by an alloca into a register, and
2171 // return the register number (or zero if we failed to handle it).
2172 unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2173  // Don't handle dynamic allocas.
2174  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
2175 
2176  MVT VT;
2177  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
2178 
2180  FuncInfo.StaticAllocaMap.find(AI);
2181 
2182  if (SI != FuncInfo.StaticAllocaMap.end()) {
2183  unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2184  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
2185  ResultReg).addFrameIndex(SI->second).addImm(0);
2186  return ResultReg;
2187  }
2188 
2189  return 0;
2190 }
2191 
2192 // Fold loads into extends when possible.
2193 // FIXME: We can have multiple redundant extend/trunc instructions
2194 // following a load. The folding only picks up one. Extend this
2195 // to check subsequent instructions for the same pattern and remove
2196 // them. Thus ResultReg should be the def reg for the last redundant
2197 // instruction in a chain, and all intervening instructions can be
2198 // removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2199 // to add ELF64-NOT: rldicl to the appropriate tests when this works.
2200 bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2201  const LoadInst *LI) {
2202  // Verify we have a legal type before going any further.
2203  MVT VT;
2204  if (!isLoadTypeLegal(LI->getType(), VT))
2205  return false;
2206 
2207  // Combine load followed by zero- or sign-extend.
2208  bool IsZExt = false;
2209  switch(MI->getOpcode()) {
2210  default:
2211  return false;
2212 
2213  case PPC::RLDICL:
2214  case PPC::RLDICL_32_64: {
2215  IsZExt = true;
2216  unsigned MB = MI->getOperand(3).getImm();
2217  if ((VT == MVT::i8 && MB <= 56) ||
2218  (VT == MVT::i16 && MB <= 48) ||
2219  (VT == MVT::i32 && MB <= 32))
2220  break;
2221  return false;
2222  }
2223 
2224  case PPC::RLWINM:
2225  case PPC::RLWINM8: {
2226  IsZExt = true;
2227  unsigned MB = MI->getOperand(3).getImm();
2228  if ((VT == MVT::i8 && MB <= 24) ||
2229  (VT == MVT::i16 && MB <= 16))
2230  break;
2231  return false;
2232  }
2233 
2234  case PPC::EXTSB:
2235  case PPC::EXTSB8:
2236  case PPC::EXTSB8_32_64:
2237  /* There is no sign-extending load-byte instruction. */
2238  return false;
2239 
2240  case PPC::EXTSH:
2241  case PPC::EXTSH8:
2242  case PPC::EXTSH8_32_64: {
2243  if (VT != MVT::i16 && VT != MVT::i8)
2244  return false;
2245  break;
2246  }
2247 
2248  case PPC::EXTSW:
2249  case PPC::EXTSW_32_64: {
2250  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2251  return false;
2252  break;
2253  }
2254  }
2255 
2256  // See if we can handle this address.
2257  Address Addr;
2258  if (!PPCComputeAddress(LI->getOperand(0), Addr))
2259  return false;
2260 
2261  unsigned ResultReg = MI->getOperand(0).getReg();
2262 
2263  if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
2264  return false;
2265 
2266  MI->eraseFromParent();
2267  return true;
2268 }
2269 
2270 // Attempt to lower call arguments in a faster way than done by
2271 // the selection DAG code.
2272 bool PPCFastISel::fastLowerArguments() {
2273  // Defer to normal argument lowering for now. It's reasonably
2274  // efficient. Consider doing something like ARM to handle the
2275  // case where all args fit in registers, no varargs, no float
2276  // or vector args.
2277  return false;
2278 }
2279 
2280 // Handle materializing integer constants into a register. This is not
2281 // automatically generated for PowerPC, so must be explicitly created here.
2282 unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2283 
2284  if (Opc != ISD::Constant)
2285  return 0;
2286 
2287  // If we're using CR bit registers for i1 values, handle that as a special
2288  // case first.
2289  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2290  unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2291  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2292  TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2293  return ImmReg;
2294  }
2295 
2296  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2297  VT != MVT::i1)
2298  return 0;
2299 
2300  const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2301  &PPC::GPRCRegClass);
2302  if (VT == MVT::i64)
2303  return PPCMaterialize64BitInt(Imm, RC);
2304  else
2305  return PPCMaterialize32BitInt(Imm, RC);
2306 }
2307 
2308 // Override for ADDI and ADDI8 to set the correct register class
2309 // on RHS operand 0. The automatic infrastructure naively assumes
2310 // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2311 // for these cases. At the moment, none of the other automatically
2312 // generated RI instructions require special treatment. However, once
2313 // SelectSelect is implemented, "isel" requires similar handling.
2314 //
2315 // Also be conservative about the output register class. Avoid
2316 // assigning R0 or X0 to the output register for GPRC and G8RC
2317 // register classes, as any such result could be used in ADDI, etc.,
2318 // where those regs have another meaning.
2319 unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2320  const TargetRegisterClass *RC,
2321  unsigned Op0, bool Op0IsKill,
2322  uint64_t Imm) {
2323  if (MachineInstOpcode == PPC::ADDI)
2324  MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2325  else if (MachineInstOpcode == PPC::ADDI8)
2326  MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2327 
2328  const TargetRegisterClass *UseRC =
2329  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2330  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2331 
2332  return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
2333  Op0, Op0IsKill, Imm);
2334 }
2335 
2336 // Override for instructions with one register operand to avoid use of
2337 // R0/X0. The automatic infrastructure isn't aware of the context so
2338 // we must be conservative.
2339 unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2340  const TargetRegisterClass* RC,
2341  unsigned Op0, bool Op0IsKill) {
2342  const TargetRegisterClass *UseRC =
2343  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2344  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2345 
2346  return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
2347 }
2348 
2349 // Override for instructions with two register operands to avoid use
2350 // of R0/X0. The automatic infrastructure isn't aware of the context
2351 // so we must be conservative.
2352 unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2353  const TargetRegisterClass* RC,
2354  unsigned Op0, bool Op0IsKill,
2355  unsigned Op1, bool Op1IsKill) {
2356  const TargetRegisterClass *UseRC =
2357  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2358  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2359 
2360  return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
2361  Op1, Op1IsKill);
2362 }
2363 
2364 namespace llvm {
2365  // Create the fast instruction selector for PowerPC64 ELF.
2367  const TargetLibraryInfo *LibInfo) {
2368  // Only available on 64-bit ELF for now.
2369  const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2370  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
2371  return new PPCFastISel(FuncInfo, LibInfo);
2372  return nullptr;
2373  }
2374 }
Return a value (possibly void), from a function.
void push_back(const T &Elt)
Definition: SmallVector.h:211
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:870
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
MVT getValVT() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
LLVMContext & Context
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
size_t i
LocInfo getLocInfo() const
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
unsigned getNumOperands() const
Definition: User.h:167
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
unsigned less or equal
Definition: InstrTypes.h:906
unsigned less than
Definition: InstrTypes.h:905
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:886
unsigned getID() const
Return the register class ID number.
unsigned getSizeInBits() const
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:896
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:271
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:170
void reserve(size_type N)
Definition: SmallVector.h:377
unsigned getValNo() const
op_iterator op_begin()
Definition: User.h:205
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
bool isRegLoc() const
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:891
unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Definition: FastISel.cpp:1855
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:496
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:890
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
struct fuzzer::@269 Flags
const HexagonInstrInfo * TII
Class to represent struct types.
Definition: DerivedTypes.h:199
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill)
Emit a MachineInstr with one register operand and a result register in the given register class...
Definition: FastISel.cpp:1782
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:887
SimpleValueType SimpleTy
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
unsigned getLocReg() const
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:31
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:873
#define F(x, y, z)
Definition: MD5.cpp:51
void GetReturnInfo(Type *ReturnType, AttributeSet attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:121
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
BasicBlock * getSuccessor(unsigned i) const
int64_t getImm() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
TargetInstrInfo - Interface to description of machine instruction set.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:517
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
MVT - Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
Conditional or Unconditional Branch instruction.
MVT getLocVT() const
This is an important base class in LLVM.
Definition: Constant.h:42
MO_NLP_FLAG - If this bit is set, the symbol reference is actually to the non_lazy_ptr for the global...
Definition: PPC.h:74
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
bool isVector() const
isVector - Return true if this is a vector value type.
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1321
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
Indirect Branch Instruction.
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1947
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
op_iterator op_end()
Definition: User.h:207
uint32_t Offset
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:150
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:880
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Value * getOperand(unsigned i) const
Definition: User.h:145
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:889
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
Class to represent integer types.
Definition: DerivedTypes.h:39
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:232
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:897
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static bool isAtomic(Instruction *I)
bool isSVR4ABI() const
Definition: PPCSubtarget.h:298
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:895
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:907
bool needsCustom() const
The memory access writes data.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:884
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:274
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:894
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
signed less than
Definition: InstrTypes.h:909
unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill)
Emit a MachineInstr with two register operands and a result register in the given register class...
Definition: FastISel.cpp:1803
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:572
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:198
const MachineInstrBuilder & addFrameIndex(int Idx) const
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
signed less or equal
Definition: InstrTypes.h:910
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:77
This file defines the FastISel class.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Representation of each machine instruction.
Definition: MachineInstr.h:52
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
Value * getCondition() const
unsigned greater or equal
Definition: InstrTypes.h:904
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
#define I(x, y, z)
Definition: MD5.cpp:54
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:888
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:892
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:312
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:883
LLVM Value Representation.
Definition: Value.h:71
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:893
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:111
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
Primary interface to the complete machine description for the target machine.
IRTranslator LLVM IR MI
unsigned greater than
Definition: InstrTypes.h:903
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
static Optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:162
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:885
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
const BasicBlock * getParent() const
Definition: Instruction.h:62
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:882
signed greater or equal
Definition: InstrTypes.h:908
This file describes how to lower LLVM code to machine code.
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)