LLVM  3.7.0
PPCFastISel.cpp
Go to the documentation of this file.
1 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the PowerPC-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // PPCGenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "PPC.h"
18 #include "PPCCallingConv.h"
19 #include "PPCISelLowering.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCSubtarget.h"
22 #include "PPCTargetMachine.h"
23 #include "llvm/ADT/Optional.h"
25 #include "llvm/CodeGen/FastISel.h"
31 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/GlobalVariable.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Operator.h"
37 #include "llvm/Support/Debug.h"
40 
41 //===----------------------------------------------------------------------===//
42 //
43 // TBD:
44 // fastLowerArguments: Handle simple cases.
45 // PPCMaterializeGV: Handle TLS.
46 // SelectCall: Handle function pointers.
47 // SelectCall: Handle multi-register return values.
48 // SelectCall: Optimize away nops for local calls.
49 // processCallArgs: Handle bit-converted arguments.
50 // finishCall: Handle multi-register return values.
51 // PPCComputeAddress: Handle parameter references as FrameIndex's.
52 // PPCEmitCmp: Handle immediate as operand 1.
53 // SelectCall: Handle small byval arguments.
54 // SelectIntrinsicCall: Implement.
55 // SelectSelect: Implement.
56 // Consider factoring isTypeLegal into the base class.
57 // Implement switches and jump tables.
58 //
59 //===----------------------------------------------------------------------===//
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "ppcfastisel"
63 
64 namespace {
65 
66 typedef struct Address {
67  enum {
68  RegBase,
69  FrameIndexBase
70  } BaseType;
71 
72  union {
73  unsigned Reg;
74  int FI;
75  } Base;
76 
77  long Offset;
78 
79  // Innocuous defaults for our address.
80  Address()
81  : BaseType(RegBase), Offset(0) {
82  Base.Reg = 0;
83  }
84 } Address;
85 
86 class PPCFastISel final : public FastISel {
87 
88  const TargetMachine &TM;
89  const PPCSubtarget *PPCSubTarget;
90  PPCFunctionInfo *PPCFuncInfo;
91  const TargetInstrInfo &TII;
92  const TargetLowering &TLI;
93  LLVMContext *Context;
94 
95  public:
96  explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
97  const TargetLibraryInfo *LibInfo)
98  : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
99  PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
100  PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
101  TII(*PPCSubTarget->getInstrInfo()),
102  TLI(*PPCSubTarget->getTargetLowering()),
103  Context(&FuncInfo.Fn->getContext()) {}
104 
105  // Backend specific FastISel code.
106  private:
107  bool fastSelectInstruction(const Instruction *I) override;
108  unsigned fastMaterializeConstant(const Constant *C) override;
109  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
110  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
111  const LoadInst *LI) override;
112  bool fastLowerArguments() override;
113  unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
114  unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
115  const TargetRegisterClass *RC,
116  unsigned Op0, bool Op0IsKill,
117  uint64_t Imm);
118  unsigned fastEmitInst_r(unsigned MachineInstOpcode,
119  const TargetRegisterClass *RC,
120  unsigned Op0, bool Op0IsKill);
121  unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
122  const TargetRegisterClass *RC,
123  unsigned Op0, bool Op0IsKill,
124  unsigned Op1, bool Op1IsKill);
125 
126  bool fastLowerCall(CallLoweringInfo &CLI) override;
127 
128  // Instruction selection routines.
129  private:
130  bool SelectLoad(const Instruction *I);
131  bool SelectStore(const Instruction *I);
132  bool SelectBranch(const Instruction *I);
133  bool SelectIndirectBr(const Instruction *I);
134  bool SelectFPExt(const Instruction *I);
135  bool SelectFPTrunc(const Instruction *I);
136  bool SelectIToFP(const Instruction *I, bool IsSigned);
137  bool SelectFPToI(const Instruction *I, bool IsSigned);
138  bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
139  bool SelectRet(const Instruction *I);
140  bool SelectTrunc(const Instruction *I);
141  bool SelectIntExt(const Instruction *I);
142 
143  // Utility routines.
144  private:
145  bool isTypeLegal(Type *Ty, MVT &VT);
146  bool isLoadTypeLegal(Type *Ty, MVT &VT);
147  bool isValueAvailable(const Value *V) const;
148  bool isVSFRCRegister(unsigned Register) const {
149  return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
150  }
151  bool isVSSRCRegister(unsigned Register) const {
152  return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
153  }
154  bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
155  bool isZExt, unsigned DestReg);
156  bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
157  const TargetRegisterClass *RC, bool IsZExt = true,
158  unsigned FP64LoadOpc = PPC::LFD);
159  bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
160  bool PPCComputeAddress(const Value *Obj, Address &Addr);
161  void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
162  unsigned &IndexReg);
163  bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
164  unsigned DestReg, bool IsZExt);
165  unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
166  unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
167  unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true);
168  unsigned PPCMaterialize32BitInt(int64_t Imm,
169  const TargetRegisterClass *RC);
170  unsigned PPCMaterialize64BitInt(int64_t Imm,
171  const TargetRegisterClass *RC);
172  unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
173  unsigned SrcReg, bool IsSigned);
174  unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
175 
176  // Call handling routines.
177  private:
178  bool processCallArgs(SmallVectorImpl<Value*> &Args,
179  SmallVectorImpl<unsigned> &ArgRegs,
180  SmallVectorImpl<MVT> &ArgVTs,
182  SmallVectorImpl<unsigned> &RegArgs,
183  CallingConv::ID CC,
184  unsigned &NumBytes,
185  bool IsVarArg);
186  bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
187  CCAssignFn *usePPC32CCs(unsigned Flag);
188 
189  private:
190  #include "PPCGenFastISel.inc"
191 
192 };
193 
194 } // end anonymous namespace
195 
196 #include "PPCGenCallingConv.inc"
197 
198 // Function whose sole purpose is to kill compiler warnings
199 // stemming from unused functions included from PPCGenCallingConv.inc.
200 CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
201  if (Flag == 1)
202  return CC_PPC32_SVR4;
203  else if (Flag == 2)
204  return CC_PPC32_SVR4_ByVal;
205  else if (Flag == 3)
206  return CC_PPC32_SVR4_VarArg;
207  else
208  return RetCC_PPC;
209 }
210 
212  switch (Pred) {
213  // These are not representable with any single compare.
214  case CmpInst::FCMP_FALSE:
215  case CmpInst::FCMP_UEQ:
216  case CmpInst::FCMP_UGT:
217  case CmpInst::FCMP_UGE:
218  case CmpInst::FCMP_ULT:
219  case CmpInst::FCMP_ULE:
220  case CmpInst::FCMP_UNE:
221  case CmpInst::FCMP_TRUE:
222  default:
223  return Optional<PPC::Predicate>();
224 
225  case CmpInst::FCMP_OEQ:
226  case CmpInst::ICMP_EQ:
227  return PPC::PRED_EQ;
228 
229  case CmpInst::FCMP_OGT:
230  case CmpInst::ICMP_UGT:
231  case CmpInst::ICMP_SGT:
232  return PPC::PRED_GT;
233 
234  case CmpInst::FCMP_OGE:
235  case CmpInst::ICMP_UGE:
236  case CmpInst::ICMP_SGE:
237  return PPC::PRED_GE;
238 
239  case CmpInst::FCMP_OLT:
240  case CmpInst::ICMP_ULT:
241  case CmpInst::ICMP_SLT:
242  return PPC::PRED_LT;
243 
244  case CmpInst::FCMP_OLE:
245  case CmpInst::ICMP_ULE:
246  case CmpInst::ICMP_SLE:
247  return PPC::PRED_LE;
248 
249  case CmpInst::FCMP_ONE:
250  case CmpInst::ICMP_NE:
251  return PPC::PRED_NE;
252 
253  case CmpInst::FCMP_ORD:
254  return PPC::PRED_NU;
255 
256  case CmpInst::FCMP_UNO:
257  return PPC::PRED_UN;
258  }
259 }
260 
261 // Determine whether the type Ty is simple enough to be handled by
262 // fast-isel, and return its equivalent machine type in VT.
263 // FIXME: Copied directly from ARM -- factor into base class?
264 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
265  EVT Evt = TLI.getValueType(DL, Ty, true);
266 
267  // Only handle simple types.
268  if (Evt == MVT::Other || !Evt.isSimple()) return false;
269  VT = Evt.getSimpleVT();
270 
271  // Handle all legal types, i.e. a register that will directly hold this
272  // value.
273  return TLI.isTypeLegal(VT);
274 }
275 
276 // Determine whether the type Ty is simple enough to be handled by
277 // fast-isel as a load target, and return its equivalent machine type in VT.
278 bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
279  if (isTypeLegal(Ty, VT)) return true;
280 
281  // If this is a type than can be sign or zero-extended to a basic operation
282  // go ahead and accept it now.
283  if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
284  return true;
285  }
286 
287  return false;
288 }
289 
290 bool PPCFastISel::isValueAvailable(const Value *V) const {
291  if (!isa<Instruction>(V))
292  return true;
293 
294  const auto *I = cast<Instruction>(V);
295  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
296  return true;
297 
298  return false;
299 }
300 
301 // Given a value Obj, create an Address object Addr that represents its
302 // address. Return false if we can't handle it.
303 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
304  const User *U = nullptr;
305  unsigned Opcode = Instruction::UserOp1;
306  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
307  // Don't walk into other basic blocks unless the object is an alloca from
308  // another block, otherwise it may not have a virtual register assigned.
309  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
310  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
311  Opcode = I->getOpcode();
312  U = I;
313  }
314  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
315  Opcode = C->getOpcode();
316  U = C;
317  }
318 
319  switch (Opcode) {
320  default:
321  break;
322  case Instruction::BitCast:
323  // Look through bitcasts.
324  return PPCComputeAddress(U->getOperand(0), Addr);
325  case Instruction::IntToPtr:
326  // Look past no-op inttoptrs.
327  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
328  TLI.getPointerTy(DL))
329  return PPCComputeAddress(U->getOperand(0), Addr);
330  break;
331  case Instruction::PtrToInt:
332  // Look past no-op ptrtoints.
333  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
334  return PPCComputeAddress(U->getOperand(0), Addr);
335  break;
336  case Instruction::GetElementPtr: {
337  Address SavedAddr = Addr;
338  long TmpOffset = Addr.Offset;
339 
340  // Iterate through the GEP folding the constants into offsets where
341  // we can.
343  for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
344  II != IE; ++II, ++GTI) {
345  const Value *Op = *II;
346  if (StructType *STy = dyn_cast<StructType>(*GTI)) {
347  const StructLayout *SL = DL.getStructLayout(STy);
348  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
349  TmpOffset += SL->getElementOffset(Idx);
350  } else {
351  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
352  for (;;) {
353  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
354  // Constant-offset addressing.
355  TmpOffset += CI->getSExtValue() * S;
356  break;
357  }
358  if (canFoldAddIntoGEP(U, Op)) {
359  // A compatible add with a constant operand. Fold the constant.
360  ConstantInt *CI =
361  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
362  TmpOffset += CI->getSExtValue() * S;
363  // Iterate on the other operand.
364  Op = cast<AddOperator>(Op)->getOperand(0);
365  continue;
366  }
367  // Unsupported
368  goto unsupported_gep;
369  }
370  }
371  }
372 
373  // Try to grab the base operand now.
374  Addr.Offset = TmpOffset;
375  if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
376 
377  // We failed, restore everything and try the other options.
378  Addr = SavedAddr;
379 
380  unsupported_gep:
381  break;
382  }
383  case Instruction::Alloca: {
384  const AllocaInst *AI = cast<AllocaInst>(Obj);
386  FuncInfo.StaticAllocaMap.find(AI);
387  if (SI != FuncInfo.StaticAllocaMap.end()) {
388  Addr.BaseType = Address::FrameIndexBase;
389  Addr.Base.FI = SI->second;
390  return true;
391  }
392  break;
393  }
394  }
395 
396  // FIXME: References to parameters fall through to the behavior
397  // below. They should be able to reference a frame index since
398  // they are stored to the stack, so we can get "ld rx, offset(r1)"
399  // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
400  // just contain the parameter. Try to handle this with a FI.
401 
402  // Try to get this in a register if nothing else has worked.
403  if (Addr.Base.Reg == 0)
404  Addr.Base.Reg = getRegForValue(Obj);
405 
406  // Prevent assignment of base register to X0, which is inappropriate
407  // for loads and stores alike.
408  if (Addr.Base.Reg != 0)
409  MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
410 
411  return Addr.Base.Reg != 0;
412 }
413 
414 // Fix up some addresses that can't be used directly. For example, if
415 // an offset won't fit in an instruction field, we may need to move it
416 // into an index register.
417 void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
418  unsigned &IndexReg) {
419 
420  // Check whether the offset fits in the instruction field.
421  if (!isInt<16>(Addr.Offset))
422  UseOffset = false;
423 
424  // If this is a stack pointer and the offset needs to be simplified then
425  // put the alloca address into a register, set the base type back to
426  // register and continue. This should almost never happen.
427  if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
428  unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
429  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
430  ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
431  Addr.Base.Reg = ResultReg;
432  Addr.BaseType = Address::RegBase;
433  }
434 
435  if (!UseOffset) {
436  IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context)
437  : Type::getInt64Ty(*Context));
438  const ConstantInt *Offset =
439  ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
440  IndexReg = PPCMaterializeInt(Offset, MVT::i64);
441  assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
442  }
443 }
444 
445 // Emit a load instruction if possible, returning true if we succeeded,
446 // otherwise false. See commentary below for how the register class of
447 // the load is determined.
448 bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
449  const TargetRegisterClass *RC,
450  bool IsZExt, unsigned FP64LoadOpc) {
451  unsigned Opc;
452  bool UseOffset = true;
453 
454  // If ResultReg is given, it determines the register class of the load.
455  // Otherwise, RC is the register class to use. If the result of the
456  // load isn't anticipated in this block, both may be zero, in which
457  // case we must make a conservative guess. In particular, don't assign
458  // R0 or X0 to the result register, as the result may be used in a load,
459  // store, add-immediate, or isel that won't permit this. (Though
460  // perhaps the spill and reload of live-exit values would handle this?)
461  const TargetRegisterClass *UseRC =
462  (ResultReg ? MRI.getRegClass(ResultReg) :
463  (RC ? RC :
464  (VT == MVT::f64 ? &PPC::F8RCRegClass :
465  (VT == MVT::f32 ? &PPC::F4RCRegClass :
466  (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
467  &PPC::GPRC_and_GPRC_NOR0RegClass)))));
468 
469  bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
470 
471  switch (VT.SimpleTy) {
472  default: // e.g., vector types not handled
473  return false;
474  case MVT::i8:
475  Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
476  break;
477  case MVT::i16:
478  Opc = (IsZExt ?
479  (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
480  (Is32BitInt ? PPC::LHA : PPC::LHA8));
481  break;
482  case MVT::i32:
483  Opc = (IsZExt ?
484  (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
485  (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
486  if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
487  UseOffset = false;
488  break;
489  case MVT::i64:
490  Opc = PPC::LD;
491  assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
492  "64-bit load with 32-bit target??");
493  UseOffset = ((Addr.Offset & 3) == 0);
494  break;
495  case MVT::f32:
496  Opc = PPC::LFS;
497  break;
498  case MVT::f64:
499  Opc = FP64LoadOpc;
500  break;
501  }
502 
503  // If necessary, materialize the offset into a register and use
504  // the indexed form. Also handle stack pointers with special needs.
505  unsigned IndexReg = 0;
506  PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
507 
508  // If this is a potential VSX load with an offset of 0, a VSX indexed load can
509  // be used.
510  bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
511  bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
512  bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
513  bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
514  if ((Is32VSXLoad || Is64VSXLoad) &&
515  (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
516  (Addr.Offset == 0)) {
517  UseOffset = false;
518  }
519 
520  if (ResultReg == 0)
521  ResultReg = createResultReg(UseRC);
522 
523  // Note: If we still have a frame index here, we know the offset is
524  // in range, as otherwise PPCSimplifyAddress would have converted it
525  // into a RegBase.
526  if (Addr.BaseType == Address::FrameIndexBase) {
527  // VSX only provides an indexed load.
528  if (Is32VSXLoad || Is64VSXLoad) return false;
529 
530  MachineMemOperand *MMO =
531  FuncInfo.MF->getMachineMemOperand(
532  MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
533  MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
534  MFI.getObjectAlignment(Addr.Base.FI));
535 
536  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
537  .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
538 
539  // Base reg with offset in range.
540  } else if (UseOffset) {
541  // VSX only provides an indexed load.
542  if (Is32VSXLoad || Is64VSXLoad) return false;
543 
544  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
545  .addImm(Addr.Offset).addReg(Addr.Base.Reg);
546 
547  // Indexed form.
548  } else {
549  // Get the RR opcode corresponding to the RI one. FIXME: It would be
550  // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
551  // is hard to get at.
552  switch (Opc) {
553  default: llvm_unreachable("Unexpected opcode!");
554  case PPC::LBZ: Opc = PPC::LBZX; break;
555  case PPC::LBZ8: Opc = PPC::LBZX8; break;
556  case PPC::LHZ: Opc = PPC::LHZX; break;
557  case PPC::LHZ8: Opc = PPC::LHZX8; break;
558  case PPC::LHA: Opc = PPC::LHAX; break;
559  case PPC::LHA8: Opc = PPC::LHAX8; break;
560  case PPC::LWZ: Opc = PPC::LWZX; break;
561  case PPC::LWZ8: Opc = PPC::LWZX8; break;
562  case PPC::LWA: Opc = PPC::LWAX; break;
563  case PPC::LWA_32: Opc = PPC::LWAX_32; break;
564  case PPC::LD: Opc = PPC::LDX; break;
565  case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
566  case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
567  }
568  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
569  .addReg(Addr.Base.Reg).addReg(IndexReg);
570  }
571 
572  return true;
573 }
574 
575 // Attempt to fast-select a load instruction.
576 bool PPCFastISel::SelectLoad(const Instruction *I) {
577  // FIXME: No atomic loads are supported.
578  if (cast<LoadInst>(I)->isAtomic())
579  return false;
580 
581  // Verify we have a legal type before going any further.
582  MVT VT;
583  if (!isLoadTypeLegal(I->getType(), VT))
584  return false;
585 
586  // See if we can handle this address.
587  Address Addr;
588  if (!PPCComputeAddress(I->getOperand(0), Addr))
589  return false;
590 
591  // Look at the currently assigned register for this instruction
592  // to determine the required register class. This is necessary
593  // to constrain RA from using R0/X0 when this is not legal.
594  unsigned AssignedReg = FuncInfo.ValueMap[I];
595  const TargetRegisterClass *RC =
596  AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
597 
598  unsigned ResultReg = 0;
599  if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
600  return false;
601  updateValueMap(I, ResultReg);
602  return true;
603 }
604 
605 // Emit a store instruction to store SrcReg at Addr.
606 bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
607  assert(SrcReg && "Nothing to store!");
608  unsigned Opc;
609  bool UseOffset = true;
610 
611  const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
612  bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
613 
614  switch (VT.SimpleTy) {
615  default: // e.g., vector types not handled
616  return false;
617  case MVT::i8:
618  Opc = Is32BitInt ? PPC::STB : PPC::STB8;
619  break;
620  case MVT::i16:
621  Opc = Is32BitInt ? PPC::STH : PPC::STH8;
622  break;
623  case MVT::i32:
624  assert(Is32BitInt && "Not GPRC for i32??");
625  Opc = PPC::STW;
626  break;
627  case MVT::i64:
628  Opc = PPC::STD;
629  UseOffset = ((Addr.Offset & 3) == 0);
630  break;
631  case MVT::f32:
632  Opc = PPC::STFS;
633  break;
634  case MVT::f64:
635  Opc = PPC::STFD;
636  break;
637  }
638 
639  // If necessary, materialize the offset into a register and use
640  // the indexed form. Also handle stack pointers with special needs.
641  unsigned IndexReg = 0;
642  PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
643 
644  // If this is a potential VSX store with an offset of 0, a VSX indexed store
645  // can be used.
646  bool IsVSSRC = isVSSRCRegister(SrcReg);
647  bool IsVSFRC = isVSFRCRegister(SrcReg);
648  bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
649  bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
650  if ((Is32VSXStore || Is64VSXStore) &&
651  (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
652  (Addr.Offset == 0)) {
653  UseOffset = false;
654  }
655 
656  // Note: If we still have a frame index here, we know the offset is
657  // in range, as otherwise PPCSimplifyAddress would have converted it
658  // into a RegBase.
659  if (Addr.BaseType == Address::FrameIndexBase) {
660  // VSX only provides an indexed store.
661  if (Is32VSXStore || Is64VSXStore) return false;
662 
663  MachineMemOperand *MMO =
664  FuncInfo.MF->getMachineMemOperand(
665  MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
666  MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
667  MFI.getObjectAlignment(Addr.Base.FI));
668 
669  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
670  .addReg(SrcReg)
671  .addImm(Addr.Offset)
672  .addFrameIndex(Addr.Base.FI)
673  .addMemOperand(MMO);
674 
675  // Base reg with offset in range.
676  } else if (UseOffset) {
677  // VSX only provides an indexed store.
678  if (Is32VSXStore || Is64VSXStore) return false;
679 
680  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
681  .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
682 
683  // Indexed form.
684  } else {
685  // Get the RR opcode corresponding to the RI one. FIXME: It would be
686  // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
687  // is hard to get at.
688  switch (Opc) {
689  default: llvm_unreachable("Unexpected opcode!");
690  case PPC::STB: Opc = PPC::STBX; break;
691  case PPC::STH : Opc = PPC::STHX; break;
692  case PPC::STW : Opc = PPC::STWX; break;
693  case PPC::STB8: Opc = PPC::STBX8; break;
694  case PPC::STH8: Opc = PPC::STHX8; break;
695  case PPC::STW8: Opc = PPC::STWX8; break;
696  case PPC::STD: Opc = PPC::STDX; break;
697  case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
698  case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
699  }
700 
701  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
702  .addReg(SrcReg);
703 
704  // If we have an index register defined we use it in the store inst,
705  // otherwise we use X0 as base as it makes the vector instructions to
706  // use zero in the computation of the effective address regardless the
707  // content of the register.
708  if (IndexReg)
709  MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
710  else
711  MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
712  }
713 
714  return true;
715 }
716 
717 // Attempt to fast-select a store instruction.
718 bool PPCFastISel::SelectStore(const Instruction *I) {
719  Value *Op0 = I->getOperand(0);
720  unsigned SrcReg = 0;
721 
722  // FIXME: No atomics loads are supported.
723  if (cast<StoreInst>(I)->isAtomic())
724  return false;
725 
726  // Verify we have a legal type before going any further.
727  MVT VT;
728  if (!isLoadTypeLegal(Op0->getType(), VT))
729  return false;
730 
731  // Get the value to be stored into a register.
732  SrcReg = getRegForValue(Op0);
733  if (SrcReg == 0)
734  return false;
735 
736  // See if we can handle this address.
737  Address Addr;
738  if (!PPCComputeAddress(I->getOperand(1), Addr))
739  return false;
740 
741  if (!PPCEmitStore(VT, SrcReg, Addr))
742  return false;
743 
744  return true;
745 }
746 
747 // Attempt to fast-select a branch instruction.
748 bool PPCFastISel::SelectBranch(const Instruction *I) {
749  const BranchInst *BI = cast<BranchInst>(I);
750  MachineBasicBlock *BrBB = FuncInfo.MBB;
751  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
752  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
753 
754  // For now, just try the simplest case where it's fed by a compare.
755  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
756  if (isValueAvailable(CI)) {
757  Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
758  if (!OptPPCPred)
759  return false;
760 
761  PPC::Predicate PPCPred = OptPPCPred.getValue();
762 
763  // Take advantage of fall-through opportunities.
764  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
765  std::swap(TBB, FBB);
766  PPCPred = PPC::InvertPredicate(PPCPred);
767  }
768 
769  unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
770 
771  if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
772  CondReg))
773  return false;
774 
775  BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
776  .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
777  fastEmitBranch(FBB, DbgLoc);
778  FuncInfo.MBB->addSuccessor(TBB);
779  return true;
780  }
781  } else if (const ConstantInt *CI =
782  dyn_cast<ConstantInt>(BI->getCondition())) {
783  uint64_t Imm = CI->getZExtValue();
784  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
785  fastEmitBranch(Target, DbgLoc);
786  return true;
787  }
788 
789  // FIXME: ARM looks for a case where the block containing the compare
790  // has been split from the block containing the branch. If this happens,
791  // there is a vreg available containing the result of the compare. I'm
792  // not sure we can do much, as we've lost the predicate information with
793  // the compare instruction -- we have a 4-bit CR but don't know which bit
794  // to test here.
795  return false;
796 }
797 
798 // Attempt to emit a compare of the two source values. Signed and unsigned
799 // comparisons are supported. Return false if we can't handle it.
800 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
801  bool IsZExt, unsigned DestReg) {
802  Type *Ty = SrcValue1->getType();
803  EVT SrcEVT = TLI.getValueType(DL, Ty, true);
804  if (!SrcEVT.isSimple())
805  return false;
806  MVT SrcVT = SrcEVT.getSimpleVT();
807 
808  if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
809  return false;
810 
811  // See if operand 2 is an immediate encodeable in the compare.
812  // FIXME: Operands are not in canonical order at -O0, so an immediate
813  // operand in position 1 is a lost opportunity for now. We are
814  // similar to ARM in this regard.
815  long Imm = 0;
816  bool UseImm = false;
817 
818  // Only 16-bit integer constants can be represented in compares for
819  // PowerPC. Others will be materialized into a register.
820  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
821  if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
822  SrcVT == MVT::i8 || SrcVT == MVT::i1) {
823  const APInt &CIVal = ConstInt->getValue();
824  Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
825  if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
826  UseImm = true;
827  }
828  }
829 
830  unsigned CmpOpc;
831  bool NeedsExt = false;
832  switch (SrcVT.SimpleTy) {
833  default: return false;
834  case MVT::f32:
835  CmpOpc = PPC::FCMPUS;
836  break;
837  case MVT::f64:
838  CmpOpc = PPC::FCMPUD;
839  break;
840  case MVT::i1:
841  case MVT::i8:
842  case MVT::i16:
843  NeedsExt = true;
844  // Intentional fall-through.
845  case MVT::i32:
846  if (!UseImm)
847  CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
848  else
849  CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
850  break;
851  case MVT::i64:
852  if (!UseImm)
853  CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
854  else
855  CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
856  break;
857  }
858 
859  unsigned SrcReg1 = getRegForValue(SrcValue1);
860  if (SrcReg1 == 0)
861  return false;
862 
863  unsigned SrcReg2 = 0;
864  if (!UseImm) {
865  SrcReg2 = getRegForValue(SrcValue2);
866  if (SrcReg2 == 0)
867  return false;
868  }
869 
870  if (NeedsExt) {
871  unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
872  if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
873  return false;
874  SrcReg1 = ExtReg;
875 
876  if (!UseImm) {
877  unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
878  if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
879  return false;
880  SrcReg2 = ExtReg;
881  }
882  }
883 
884  if (!UseImm)
885  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
886  .addReg(SrcReg1).addReg(SrcReg2);
887  else
888  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
889  .addReg(SrcReg1).addImm(Imm);
890 
891  return true;
892 }
893 
894 // Attempt to fast-select a floating-point extend instruction.
895 bool PPCFastISel::SelectFPExt(const Instruction *I) {
896  Value *Src = I->getOperand(0);
897  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
898  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
899 
900  if (SrcVT != MVT::f32 || DestVT != MVT::f64)
901  return false;
902 
903  unsigned SrcReg = getRegForValue(Src);
904  if (!SrcReg)
905  return false;
906 
907  // No code is generated for a FP extend.
908  updateValueMap(I, SrcReg);
909  return true;
910 }
911 
912 // Attempt to fast-select a floating-point truncate instruction.
913 bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
914  Value *Src = I->getOperand(0);
915  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
916  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
917 
918  if (SrcVT != MVT::f64 || DestVT != MVT::f32)
919  return false;
920 
921  unsigned SrcReg = getRegForValue(Src);
922  if (!SrcReg)
923  return false;
924 
925  // Round the result to single precision.
926  unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
927  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
928  .addReg(SrcReg);
929 
930  updateValueMap(I, DestReg);
931  return true;
932 }
933 
934 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
935 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
936 // those should be used instead of moving via a stack slot when the
937 // subtarget permits.
938 // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
939 // stack slot and 4-byte store/load sequence. Or just sext the 4-byte
940 // case to 8 bytes which produces tighter code but wastes stack space.
941 unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
942  bool IsSigned) {
943 
944  // If necessary, extend 32-bit int to 64-bit.
945  if (SrcVT == MVT::i32) {
946  unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
947  if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
948  return 0;
949  SrcReg = TmpReg;
950  }
951 
952  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
953  Address Addr;
954  Addr.BaseType = Address::FrameIndexBase;
955  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
956 
957  // Store the value from the GPR.
958  if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
959  return 0;
960 
961  // Load the integer value into an FPR. The kind of load used depends
962  // on a number of conditions.
963  unsigned LoadOpc = PPC::LFD;
964 
965  if (SrcVT == MVT::i32) {
966  if (!IsSigned) {
967  LoadOpc = PPC::LFIWZX;
968  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
969  } else if (PPCSubTarget->hasLFIWAX()) {
970  LoadOpc = PPC::LFIWAX;
971  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
972  }
973  }
974 
975  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
976  unsigned ResultReg = 0;
977  if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
978  return 0;
979 
980  return ResultReg;
981 }
982 
983 // Attempt to fast-select an integer-to-floating-point conversion.
984 // FIXME: Once fast-isel has better support for VSX, conversions using
985 // direct moves should be implemented.
986 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
987  MVT DstVT;
988  Type *DstTy = I->getType();
989  if (!isTypeLegal(DstTy, DstVT))
990  return false;
991 
992  if (DstVT != MVT::f32 && DstVT != MVT::f64)
993  return false;
994 
995  Value *Src = I->getOperand(0);
996  EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
997  if (!SrcEVT.isSimple())
998  return false;
999 
1000  MVT SrcVT = SrcEVT.getSimpleVT();
1001 
1002  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1003  SrcVT != MVT::i32 && SrcVT != MVT::i64)
1004  return false;
1005 
1006  unsigned SrcReg = getRegForValue(Src);
1007  if (SrcReg == 0)
1008  return false;
1009 
1010  // We can only lower an unsigned convert if we have the newer
1011  // floating-point conversion operations.
1012  if (!IsSigned && !PPCSubTarget->hasFPCVT())
1013  return false;
1014 
1015  // FIXME: For now we require the newer floating-point conversion operations
1016  // (which are present only on P7 and A2 server models) when converting
1017  // to single-precision float. Otherwise we have to generate a lot of
1018  // fiddly code to avoid double rounding. If necessary, the fiddly code
1019  // can be found in PPCTargetLowering::LowerINT_TO_FP().
1020  if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
1021  return false;
1022 
1023  // Extend the input if necessary.
1024  if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1025  unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
1026  if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1027  return false;
1028  SrcVT = MVT::i64;
1029  SrcReg = TmpReg;
1030  }
1031 
1032  // Move the integer value to an FPR.
1033  unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1034  if (FPReg == 0)
1035  return false;
1036 
1037  // Determine the opcode for the conversion.
1038  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1039  unsigned DestReg = createResultReg(RC);
1040  unsigned Opc;
1041 
1042  if (DstVT == MVT::f32)
1043  Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1044  else
1045  Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1046 
1047  // Generate the convert.
1048  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1049  .addReg(FPReg);
1050 
1051  updateValueMap(I, DestReg);
1052  return true;
1053 }
1054 
1055 // Move the floating-point value in SrcReg into an integer destination
1056 // register, and return the register (or zero if we can't handle it).
1057 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
1058 // those should be used instead of moving via a stack slot when the
1059 // subtarget permits.
1060 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1061  unsigned SrcReg, bool IsSigned) {
1062  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1063  // Note that if have STFIWX available, we could use a 4-byte stack
1064  // slot for i32, but this being fast-isel we'll just go with the
1065  // easiest code gen possible.
1066  Address Addr;
1067  Addr.BaseType = Address::FrameIndexBase;
1068  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
1069 
1070  // Store the value from the FPR.
1071  if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1072  return 0;
1073 
1074  // Reload it into a GPR. If we want an i32, modify the address
1075  // to have a 4-byte offset so we load from the right place.
1076  if (VT == MVT::i32)
1077  Addr.Offset = 4;
1078 
1079  // Look at the currently assigned register for this instruction
1080  // to determine the required register class.
1081  unsigned AssignedReg = FuncInfo.ValueMap[I];
1082  const TargetRegisterClass *RC =
1083  AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1084 
1085  unsigned ResultReg = 0;
1086  if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1087  return 0;
1088 
1089  return ResultReg;
1090 }
1091 
1092 // Attempt to fast-select a floating-point-to-integer conversion.
1093 // FIXME: Once fast-isel has better support for VSX, conversions using
1094 // direct moves should be implemented.
1095 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1096  MVT DstVT, SrcVT;
1097  Type *DstTy = I->getType();
1098  if (!isTypeLegal(DstTy, DstVT))
1099  return false;
1100 
1101  if (DstVT != MVT::i32 && DstVT != MVT::i64)
1102  return false;
1103 
1104  // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
1105  if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
1106  return false;
1107 
1108  Value *Src = I->getOperand(0);
1109  Type *SrcTy = Src->getType();
1110  if (!isTypeLegal(SrcTy, SrcVT))
1111  return false;
1112 
1113  if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1114  return false;
1115 
1116  unsigned SrcReg = getRegForValue(Src);
1117  if (SrcReg == 0)
1118  return false;
1119 
1120  // Convert f32 to f64 if necessary. This is just a meaningless copy
1121  // to get the register class right. COPY_TO_REGCLASS is needed since
1122  // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
1123  const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1124  if (InRC == &PPC::F4RCRegClass) {
1125  unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
1126  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1127  TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
1128  .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
1129  SrcReg = TmpReg;
1130  }
1131 
1132  // Determine the opcode for the conversion, which takes place
1133  // entirely within FPRs.
1134  unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
1135  unsigned Opc;
1136 
1137  if (DstVT == MVT::i32)
1138  if (IsSigned)
1139  Opc = PPC::FCTIWZ;
1140  else
1141  Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1142  else
1143  Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1144 
1145  // Generate the convert.
1146  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1147  .addReg(SrcReg);
1148 
1149  // Now move the integer value from a float register to an integer register.
1150  unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1151  if (IntReg == 0)
1152  return false;
1153 
1154  updateValueMap(I, IntReg);
1155  return true;
1156 }
1157 
1158 // Attempt to fast-select a binary integer operation that isn't already
1159 // handled automatically.
1160 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1161  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1162 
1163  // We can get here in the case when we have a binary operation on a non-legal
1164  // type and the target independent selector doesn't know how to handle it.
1165  if (DestVT != MVT::i16 && DestVT != MVT::i8)
1166  return false;
1167 
1168  // Look at the currently assigned register for this instruction
1169  // to determine the required register class. If there is no register,
1170  // make a conservative choice (don't assign R0).
1171  unsigned AssignedReg = FuncInfo.ValueMap[I];
1172  const TargetRegisterClass *RC =
1173  (AssignedReg ? MRI.getRegClass(AssignedReg) :
1174  &PPC::GPRC_and_GPRC_NOR0RegClass);
1175  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1176 
1177  unsigned Opc;
1178  switch (ISDOpcode) {
1179  default: return false;
1180  case ISD::ADD:
1181  Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1182  break;
1183  case ISD::OR:
1184  Opc = IsGPRC ? PPC::OR : PPC::OR8;
1185  break;
1186  case ISD::SUB:
1187  Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1188  break;
1189  }
1190 
1191  unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1192  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1193  if (SrcReg1 == 0) return false;
1194 
1195  // Handle case of small immediate operand.
1196  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1197  const APInt &CIVal = ConstInt->getValue();
1198  int Imm = (int)CIVal.getSExtValue();
1199  bool UseImm = true;
1200  if (isInt<16>(Imm)) {
1201  switch (Opc) {
1202  default:
1203  llvm_unreachable("Missing case!");
1204  case PPC::ADD4:
1205  Opc = PPC::ADDI;
1206  MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1207  break;
1208  case PPC::ADD8:
1209  Opc = PPC::ADDI8;
1210  MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1211  break;
1212  case PPC::OR:
1213  Opc = PPC::ORI;
1214  break;
1215  case PPC::OR8:
1216  Opc = PPC::ORI8;
1217  break;
1218  case PPC::SUBF:
1219  if (Imm == -32768)
1220  UseImm = false;
1221  else {
1222  Opc = PPC::ADDI;
1223  MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1224  Imm = -Imm;
1225  }
1226  break;
1227  case PPC::SUBF8:
1228  if (Imm == -32768)
1229  UseImm = false;
1230  else {
1231  Opc = PPC::ADDI8;
1232  MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1233  Imm = -Imm;
1234  }
1235  break;
1236  }
1237 
1238  if (UseImm) {
1239  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1240  ResultReg)
1241  .addReg(SrcReg1)
1242  .addImm(Imm);
1243  updateValueMap(I, ResultReg);
1244  return true;
1245  }
1246  }
1247  }
1248 
1249  // Reg-reg case.
1250  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1251  if (SrcReg2 == 0) return false;
1252 
1253  // Reverse operands for subtract-from.
1254  if (ISDOpcode == ISD::SUB)
1255  std::swap(SrcReg1, SrcReg2);
1256 
1257  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1258  .addReg(SrcReg1).addReg(SrcReg2);
1259  updateValueMap(I, ResultReg);
1260  return true;
1261 }
1262 
1263 // Handle arguments to a call that we're attempting to fast-select.
1264 // Return false if the arguments are too complex for us at the moment.
1265 bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1266  SmallVectorImpl<unsigned> &ArgRegs,
1267  SmallVectorImpl<MVT> &ArgVTs,
1269  SmallVectorImpl<unsigned> &RegArgs,
1270  CallingConv::ID CC,
1271  unsigned &NumBytes,
1272  bool IsVarArg) {
1274  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1275 
1276  // Reserve space for the linkage area on the stack.
1277  unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
1278  CCInfo.AllocateStack(LinkageSize, 8);
1279 
1280  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
1281 
1282  // Bail out if we can't handle any of the arguments.
1283  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1284  CCValAssign &VA = ArgLocs[I];
1285  MVT ArgVT = ArgVTs[VA.getValNo()];
1286 
1287  // Skip vector arguments for now, as well as long double and
1288  // uint128_t, and anything that isn't passed in a register.
1289  if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1290  !VA.isRegLoc() || VA.needsCustom())
1291  return false;
1292 
1293  // Skip bit-converted arguments for now.
1294  if (VA.getLocInfo() == CCValAssign::BCvt)
1295  return false;
1296  }
1297 
1298  // Get a count of how many bytes are to be pushed onto the stack.
1299  NumBytes = CCInfo.getNextStackOffset();
1300 
1301  // The prolog code of the callee may store up to 8 GPR argument registers to
1302  // the stack, allowing va_start to index over them in memory if its varargs.
1303  // Because we cannot tell if this is needed on the caller side, we have to
1304  // conservatively assume that it is needed. As such, make sure we have at
1305  // least enough stack space for the caller to store the 8 GPRs.
1306  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1307  NumBytes = std::max(NumBytes, LinkageSize + 64);
1308 
1309  // Issue CALLSEQ_START.
1310  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1311  TII.get(TII.getCallFrameSetupOpcode()))
1312  .addImm(NumBytes);
1313 
1314  // Prepare to assign register arguments. Every argument uses up a
1315  // GPR protocol register even if it's passed in a floating-point
1316  // register (unless we're using the fast calling convention).
1317  unsigned NextGPR = PPC::X3;
1318  unsigned NextFPR = PPC::F1;
1319 
1320  // Process arguments.
1321  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1322  CCValAssign &VA = ArgLocs[I];
1323  unsigned Arg = ArgRegs[VA.getValNo()];
1324  MVT ArgVT = ArgVTs[VA.getValNo()];
1325 
1326  // Handle argument promotion and bitcasts.
1327  switch (VA.getLocInfo()) {
1328  default:
1329  llvm_unreachable("Unknown loc info!");
1330  case CCValAssign::Full:
1331  break;
1332  case CCValAssign::SExt: {
1333  MVT DestVT = VA.getLocVT();
1334  const TargetRegisterClass *RC =
1335  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1336  unsigned TmpReg = createResultReg(RC);
1337  if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1338  llvm_unreachable("Failed to emit a sext!");
1339  ArgVT = DestVT;
1340  Arg = TmpReg;
1341  break;
1342  }
1343  case CCValAssign::AExt:
1344  case CCValAssign::ZExt: {
1345  MVT DestVT = VA.getLocVT();
1346  const TargetRegisterClass *RC =
1347  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1348  unsigned TmpReg = createResultReg(RC);
1349  if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1350  llvm_unreachable("Failed to emit a zext!");
1351  ArgVT = DestVT;
1352  Arg = TmpReg;
1353  break;
1354  }
1355  case CCValAssign::BCvt: {
1356  // FIXME: Not yet handled.
1357  llvm_unreachable("Should have bailed before getting here!");
1358  break;
1359  }
1360  }
1361 
1362  // Copy this argument to the appropriate register.
1363  unsigned ArgReg;
1364  if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1365  ArgReg = NextFPR++;
1366  if (CC != CallingConv::Fast)
1367  ++NextGPR;
1368  } else
1369  ArgReg = NextGPR++;
1370 
1371  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1372  TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1373  RegArgs.push_back(ArgReg);
1374  }
1375 
1376  return true;
1377 }
1378 
1379 // For a call that we've determined we can fast-select, finish the
1380 // call sequence and generate a copy to obtain the return value (if any).
1381 bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1382  CallingConv::ID CC = CLI.CallConv;
1383 
1384  // Issue CallSEQ_END.
1385  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1386  TII.get(TII.getCallFrameDestroyOpcode()))
1387  .addImm(NumBytes).addImm(0);
1388 
1389  // Next, generate a copy to obtain the return value.
1390  // FIXME: No multi-register return values yet, though I don't foresee
1391  // any real difficulties there.
1392  if (RetVT != MVT::isVoid) {
1394  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1395  CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1396  CCValAssign &VA = RVLocs[0];
1397  assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1398  assert(VA.isRegLoc() && "Can only return in registers!");
1399 
1400  MVT DestVT = VA.getValVT();
1401  MVT CopyVT = DestVT;
1402 
1403  // Ints smaller than a register still arrive in a full 64-bit
1404  // register, so make sure we recognize this.
1405  if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1406  CopyVT = MVT::i64;
1407 
1408  unsigned SourcePhysReg = VA.getLocReg();
1409  unsigned ResultReg = 0;
1410 
1411  if (RetVT == CopyVT) {
1412  const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1413  ResultReg = createResultReg(CpyRC);
1414 
1415  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1416  TII.get(TargetOpcode::COPY), ResultReg)
1417  .addReg(SourcePhysReg);
1418 
1419  // If necessary, round the floating result to single precision.
1420  } else if (CopyVT == MVT::f64) {
1421  ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1422  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
1423  ResultReg).addReg(SourcePhysReg);
1424 
1425  // If only the low half of a general register is needed, generate
1426  // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1427  // used along the fast-isel path (not lowered), and downstream logic
1428  // also doesn't like a direct subreg copy on a physical reg.)
1429  } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1430  ResultReg = createResultReg(&PPC::GPRCRegClass);
1431  // Convert physical register from G8RC to GPRC.
1432  SourcePhysReg -= PPC::X0 - PPC::R0;
1433  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1434  TII.get(TargetOpcode::COPY), ResultReg)
1435  .addReg(SourcePhysReg);
1436  }
1437 
1438  assert(ResultReg && "ResultReg unset!");
1439  CLI.InRegs.push_back(SourcePhysReg);
1440  CLI.ResultReg = ResultReg;
1441  CLI.NumResultRegs = 1;
1442  }
1443 
1444  return true;
1445 }
1446 
1447 bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1448  CallingConv::ID CC = CLI.CallConv;
1449  bool IsTailCall = CLI.IsTailCall;
1450  bool IsVarArg = CLI.IsVarArg;
1451  const Value *Callee = CLI.Callee;
1452  const MCSymbol *Symbol = CLI.Symbol;
1453 
1454  if (!Callee && !Symbol)
1455  return false;
1456 
1457  // Allow SelectionDAG isel to handle tail calls.
1458  if (IsTailCall)
1459  return false;
1460 
1461  // Let SDISel handle vararg functions.
1462  if (IsVarArg)
1463  return false;
1464 
1465  // Handle simple calls for now, with legal return types and
1466  // those that can be extended.
1467  Type *RetTy = CLI.RetTy;
1468  MVT RetVT;
1469  if (RetTy->isVoidTy())
1470  RetVT = MVT::isVoid;
1471  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1472  RetVT != MVT::i8)
1473  return false;
1474  else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
1475  // We can't handle boolean returns when CR bits are in use.
1476  return false;
1477 
1478  // FIXME: No multi-register return values yet.
1479  if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1480  RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1481  RetVT != MVT::f64) {
1483  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1484  CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1485  if (RVLocs.size() > 1)
1486  return false;
1487  }
1488 
1489  // Bail early if more than 8 arguments, as we only currently
1490  // handle arguments passed in registers.
1491  unsigned NumArgs = CLI.OutVals.size();
1492  if (NumArgs > 8)
1493  return false;
1494 
1495  // Set up the argument vectors.
1497  SmallVector<unsigned, 8> ArgRegs;
1498  SmallVector<MVT, 8> ArgVTs;
1500 
1501  Args.reserve(NumArgs);
1502  ArgRegs.reserve(NumArgs);
1503  ArgVTs.reserve(NumArgs);
1504  ArgFlags.reserve(NumArgs);
1505 
1506  for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1507  // Only handle easy calls for now. It would be reasonably easy
1508  // to handle <= 8-byte structures passed ByVal in registers, but we
1509  // have to ensure they are right-justified in the register.
1510  ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1511  if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1512  return false;
1513 
1514  Value *ArgValue = CLI.OutVals[i];
1515  Type *ArgTy = ArgValue->getType();
1516  MVT ArgVT;
1517  if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1518  return false;
1519 
1520  if (ArgVT.isVector())
1521  return false;
1522 
1523  unsigned Arg = getRegForValue(ArgValue);
1524  if (Arg == 0)
1525  return false;
1526 
1527  Args.push_back(ArgValue);
1528  ArgRegs.push_back(Arg);
1529  ArgVTs.push_back(ArgVT);
1530  ArgFlags.push_back(Flags);
1531  }
1532 
1533  // Process the arguments.
1534  SmallVector<unsigned, 8> RegArgs;
1535  unsigned NumBytes;
1536 
1537  if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1538  RegArgs, CC, NumBytes, IsVarArg))
1539  return false;
1540 
1541  MachineInstrBuilder MIB;
1542  // FIXME: No handling for function pointers yet. This requires
1543  // implementing the function descriptor (OPD) setup.
1544  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1545  if (!GV) {
1546  // patchpoints are a special case; they always dispatch to a pointer value.
1547  // However, we don't actually want to generate the indirect call sequence
1548  // here (that will be generated, as necessary, during asm printing), and
1549  // the call we generate here will be erased by FastISel::selectPatchpoint,
1550  // so don't try very hard...
1551  if (CLI.IsPatchPoint)
1552  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
1553  else
1554  return false;
1555  } else {
1556  // Build direct call with NOP for TOC restore.
1557  // FIXME: We can and should optimize away the NOP for local calls.
1558  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1559  TII.get(PPC::BL8_NOP));
1560  // Add callee.
1561  MIB.addGlobalAddress(GV);
1562  }
1563 
1564  // Add implicit physical register uses to the call.
1565  for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
1566  MIB.addReg(RegArgs[II], RegState::Implicit);
1567 
1568  // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1569  // into the call.
1570  PPCFuncInfo->setUsesTOCBasePtr();
1571  MIB.addReg(PPC::X2, RegState::Implicit);
1572 
1573  // Add a register mask with the call-preserved registers. Proper
1574  // defs for return values will be added by setPhysRegsDeadExcept().
1575  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1576 
1577  CLI.Call = MIB;
1578 
1579  // Finish off the call including any return values.
1580  return finishCall(RetVT, CLI, NumBytes);
1581 }
1582 
1583 // Attempt to fast-select a return instruction.
1584 bool PPCFastISel::SelectRet(const Instruction *I) {
1585 
1586  if (!FuncInfo.CanLowerReturn)
1587  return false;
1588 
1589  const ReturnInst *Ret = cast<ReturnInst>(I);
1590  const Function &F = *I->getParent()->getParent();
1591 
1592  // Build a list of return value registers.
1593  SmallVector<unsigned, 4> RetRegs;
1594  CallingConv::ID CC = F.getCallingConv();
1595 
1596  if (Ret->getNumOperands() > 0) {
1598  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1599 
1600  // Analyze operands of the call, assigning locations to each operand.
1602  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1603  CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1604  const Value *RV = Ret->getOperand(0);
1605 
1606  // FIXME: Only one output register for now.
1607  if (ValLocs.size() > 1)
1608  return false;
1609 
1610  // Special case for returning a constant integer of any size.
1611  // Materialize the constant as an i64 and copy it to the return
1612  // register. We still need to worry about properly extending the sign. E.g:
1613  // If the constant has only one bit, it means it is a boolean. Therefore
1614  // we can't use PPCMaterializeInt because it extends the sign which will
1615  // cause negations of the returned value to be incorrect as they are
1616  // implemented as the flip of the least significant bit.
1617  if (isa<ConstantInt>(*RV)) {
1618  const Constant *C = cast<Constant>(RV);
1619 
1620  CCValAssign &VA = ValLocs[0];
1621 
1622  unsigned RetReg = VA.getLocReg();
1623  unsigned SrcReg = PPCMaterializeInt(C, MVT::i64,
1624  VA.getLocInfo() == CCValAssign::SExt);
1625 
1626  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1627  TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1628 
1629  RetRegs.push_back(RetReg);
1630 
1631  } else {
1632  unsigned Reg = getRegForValue(RV);
1633 
1634  if (Reg == 0)
1635  return false;
1636 
1637  // Copy the result values into the output registers.
1638  for (unsigned i = 0; i < ValLocs.size(); ++i) {
1639 
1640  CCValAssign &VA = ValLocs[i];
1641  assert(VA.isRegLoc() && "Can only return in registers!");
1642  RetRegs.push_back(VA.getLocReg());
1643  unsigned SrcReg = Reg + VA.getValNo();
1644 
1645  EVT RVEVT = TLI.getValueType(DL, RV->getType());
1646  if (!RVEVT.isSimple())
1647  return false;
1648  MVT RVVT = RVEVT.getSimpleVT();
1649  MVT DestVT = VA.getLocVT();
1650 
1651  if (RVVT != DestVT && RVVT != MVT::i8 &&
1652  RVVT != MVT::i16 && RVVT != MVT::i32)
1653  return false;
1654 
1655  if (RVVT != DestVT) {
1656  switch (VA.getLocInfo()) {
1657  default:
1658  llvm_unreachable("Unknown loc info!");
1659  case CCValAssign::Full:
1660  llvm_unreachable("Full value assign but types don't match?");
1661  case CCValAssign::AExt:
1662  case CCValAssign::ZExt: {
1663  const TargetRegisterClass *RC =
1664  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1665  unsigned TmpReg = createResultReg(RC);
1666  if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1667  return false;
1668  SrcReg = TmpReg;
1669  break;
1670  }
1671  case CCValAssign::SExt: {
1672  const TargetRegisterClass *RC =
1673  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1674  unsigned TmpReg = createResultReg(RC);
1675  if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1676  return false;
1677  SrcReg = TmpReg;
1678  break;
1679  }
1680  }
1681  }
1682 
1683  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1684  TII.get(TargetOpcode::COPY), RetRegs[i])
1685  .addReg(SrcReg);
1686  }
1687  }
1688  }
1689 
1690  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1691  TII.get(PPC::BLR8));
1692 
1693  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1694  MIB.addReg(RetRegs[i], RegState::Implicit);
1695 
1696  return true;
1697 }
1698 
1699 // Attempt to emit an integer extend of SrcReg into DestReg. Both
1700 // signed and zero extensions are supported. Return false if we
1701 // can't handle it.
1702 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1703  unsigned DestReg, bool IsZExt) {
1704  if (DestVT != MVT::i32 && DestVT != MVT::i64)
1705  return false;
1706  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1707  return false;
1708 
1709  // Signed extensions use EXTSB, EXTSH, EXTSW.
1710  if (!IsZExt) {
1711  unsigned Opc;
1712  if (SrcVT == MVT::i8)
1713  Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1714  else if (SrcVT == MVT::i16)
1715  Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1716  else {
1717  assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1718  Opc = PPC::EXTSW_32_64;
1719  }
1720  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1721  .addReg(SrcReg);
1722 
1723  // Unsigned 32-bit extensions use RLWINM.
1724  } else if (DestVT == MVT::i32) {
1725  unsigned MB;
1726  if (SrcVT == MVT::i8)
1727  MB = 24;
1728  else {
1729  assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1730  MB = 16;
1731  }
1732  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
1733  DestReg)
1734  .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1735 
1736  // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1737  } else {
1738  unsigned MB;
1739  if (SrcVT == MVT::i8)
1740  MB = 56;
1741  else if (SrcVT == MVT::i16)
1742  MB = 48;
1743  else
1744  MB = 32;
1745  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1746  TII.get(PPC::RLDICL_32_64), DestReg)
1747  .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1748  }
1749 
1750  return true;
1751 }
1752 
1753 // Attempt to fast-select an indirect branch instruction.
1754 bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1755  unsigned AddrReg = getRegForValue(I->getOperand(0));
1756  if (AddrReg == 0)
1757  return false;
1758 
1759  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
1760  .addReg(AddrReg);
1761  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
1762 
1763  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1764  for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
1765  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
1766 
1767  return true;
1768 }
1769 
1770 // Attempt to fast-select an integer truncate instruction.
1771 bool PPCFastISel::SelectTrunc(const Instruction *I) {
1772  Value *Src = I->getOperand(0);
1773  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1774  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1775 
1776  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1777  return false;
1778 
1779  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1780  return false;
1781 
1782  unsigned SrcReg = getRegForValue(Src);
1783  if (!SrcReg)
1784  return false;
1785 
1786  // The only interesting case is when we need to switch register classes.
1787  if (SrcVT == MVT::i64) {
1788  unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
1789  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1790  TII.get(TargetOpcode::COPY),
1791  ResultReg).addReg(SrcReg, 0, PPC::sub_32);
1792  SrcReg = ResultReg;
1793  }
1794 
1795  updateValueMap(I, SrcReg);
1796  return true;
1797 }
1798 
1799 // Attempt to fast-select an integer extend instruction.
1800 bool PPCFastISel::SelectIntExt(const Instruction *I) {
1801  Type *DestTy = I->getType();
1802  Value *Src = I->getOperand(0);
1803  Type *SrcTy = Src->getType();
1804 
1805  bool IsZExt = isa<ZExtInst>(I);
1806  unsigned SrcReg = getRegForValue(Src);
1807  if (!SrcReg) return false;
1808 
1809  EVT SrcEVT, DestEVT;
1810  SrcEVT = TLI.getValueType(DL, SrcTy, true);
1811  DestEVT = TLI.getValueType(DL, DestTy, true);
1812  if (!SrcEVT.isSimple())
1813  return false;
1814  if (!DestEVT.isSimple())
1815  return false;
1816 
1817  MVT SrcVT = SrcEVT.getSimpleVT();
1818  MVT DestVT = DestEVT.getSimpleVT();
1819 
1820  // If we know the register class needed for the result of this
1821  // instruction, use it. Otherwise pick the register class of the
1822  // correct size that does not contain X0/R0, since we don't know
1823  // whether downstream uses permit that assignment.
1824  unsigned AssignedReg = FuncInfo.ValueMap[I];
1825  const TargetRegisterClass *RC =
1826  (AssignedReg ? MRI.getRegClass(AssignedReg) :
1827  (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1828  &PPC::GPRC_and_GPRC_NOR0RegClass));
1829  unsigned ResultReg = createResultReg(RC);
1830 
1831  if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1832  return false;
1833 
1834  updateValueMap(I, ResultReg);
1835  return true;
1836 }
1837 
1838 // Attempt to fast-select an instruction that wasn't handled by
1839 // the table-generated machinery.
1840 bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1841 
1842  switch (I->getOpcode()) {
1843  case Instruction::Load:
1844  return SelectLoad(I);
1845  case Instruction::Store:
1846  return SelectStore(I);
1847  case Instruction::Br:
1848  return SelectBranch(I);
1849  case Instruction::IndirectBr:
1850  return SelectIndirectBr(I);
1851  case Instruction::FPExt:
1852  return SelectFPExt(I);
1853  case Instruction::FPTrunc:
1854  return SelectFPTrunc(I);
1855  case Instruction::SIToFP:
1856  return SelectIToFP(I, /*IsSigned*/ true);
1857  case Instruction::UIToFP:
1858  return SelectIToFP(I, /*IsSigned*/ false);
1859  case Instruction::FPToSI:
1860  return SelectFPToI(I, /*IsSigned*/ true);
1861  case Instruction::FPToUI:
1862  return SelectFPToI(I, /*IsSigned*/ false);
1863  case Instruction::Add:
1864  return SelectBinaryIntOp(I, ISD::ADD);
1865  case Instruction::Or:
1866  return SelectBinaryIntOp(I, ISD::OR);
1867  case Instruction::Sub:
1868  return SelectBinaryIntOp(I, ISD::SUB);
1869  case Instruction::Call:
1870  return selectCall(I);
1871  case Instruction::Ret:
1872  return SelectRet(I);
1873  case Instruction::Trunc:
1874  return SelectTrunc(I);
1875  case Instruction::ZExt:
1876  case Instruction::SExt:
1877  return SelectIntExt(I);
1878  // Here add other flavors of Instruction::XXX that automated
1879  // cases don't catch. For example, switches are terminators
1880  // that aren't yet handled.
1881  default:
1882  break;
1883  }
1884  return false;
1885 }
1886 
1887 // Materialize a floating-point constant into a register, and return
1888 // the register number (or zero if we failed to handle it).
1889 unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1890  // No plans to handle long double here.
1891  if (VT != MVT::f32 && VT != MVT::f64)
1892  return 0;
1893 
1894  // All FP constants are loaded from the constant pool.
1895  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
1896  assert(Align > 0 && "Unexpectedly missing alignment information!");
1897  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
1898  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
1899  CodeModel::Model CModel = TM.getCodeModel();
1900 
1901  MachineMemOperand *MMO =
1902  FuncInfo.MF->getMachineMemOperand(
1904  (VT == MVT::f32) ? 4 : 8, Align);
1905 
1906  unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
1907  unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1908 
1909  PPCFuncInfo->setUsesTOCBasePtr();
1910  // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
1911  if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
1912  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
1913  TmpReg)
1914  .addConstantPoolIndex(Idx).addReg(PPC::X2);
1915  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1916  .addImm(0).addReg(TmpReg).addMemOperand(MMO);
1917  } else {
1918  // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
1919  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
1920  TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
1921  // But for large code model, we must generate a LDtocL followed
1922  // by the LF[SD].
1923  if (CModel == CodeModel::Large) {
1924  unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1925  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
1926  TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
1927  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1928  .addImm(0).addReg(TmpReg2);
1929  } else
1930  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1931  .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
1932  .addReg(TmpReg)
1933  .addMemOperand(MMO);
1934  }
1935 
1936  return DestReg;
1937 }
1938 
1939 // Materialize the address of a global value into a register, and return
1940 // the register number (or zero if we failed to handle it).
1941 unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
1942  assert(VT == MVT::i64 && "Non-address!");
1943  const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
1944  unsigned DestReg = createResultReg(RC);
1945 
1946  // Global values may be plain old object addresses, TLS object
1947  // addresses, constant pool entries, or jump tables. How we generate
1948  // code for these may depend on small, medium, or large code model.
1949  CodeModel::Model CModel = TM.getCodeModel();
1950 
1951  // FIXME: Jump tables are not yet required because fast-isel doesn't
1952  // handle switches; if that changes, we need them as well. For now,
1953  // what follows assumes everything's a generic (or TLS) global address.
1954 
1955  // FIXME: We don't yet handle the complexity of TLS.
1956  if (GV->isThreadLocal())
1957  return 0;
1958 
1959  PPCFuncInfo->setUsesTOCBasePtr();
1960  // For small code model, generate a simple TOC load.
1961  if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
1962  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
1963  DestReg)
1964  .addGlobalAddress(GV)
1965  .addReg(PPC::X2);
1966  else {
1967  // If the address is an externally defined symbol, a symbol with common
1968  // or externally available linkage, a non-local function address, or a
1969  // jump table address (not yet needed), or if we are generating code
1970  // for large code model, we generate:
1971  // LDtocL(GV, ADDIStocHA(%X2, GV))
1972  // Otherwise we generate:
1973  // ADDItocL(ADDIStocHA(%X2, GV), GV)
1974  // Either way, start with the ADDIStocHA:
1975  unsigned HighPartReg = createResultReg(RC);
1976  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
1977  HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
1978 
1979  // If/when switches are implemented, jump tables should be handled
1980  // on the "if" path here.
1981  if (CModel == CodeModel::Large ||
1982  (GV->getType()->getElementType()->isFunctionTy() &&
1983  !GV->isStrongDefinitionForLinker()) ||
1984  GV->isDeclaration() || GV->hasCommonLinkage() ||
1986  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
1987  DestReg).addGlobalAddress(GV).addReg(HighPartReg);
1988  else
1989  // Otherwise generate the ADDItocL.
1990  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
1991  DestReg).addReg(HighPartReg).addGlobalAddress(GV);
1992  }
1993 
1994  return DestReg;
1995 }
1996 
1997 // Materialize a 32-bit integer constant into a register, and return
1998 // the register number (or zero if we failed to handle it).
1999 unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2000  const TargetRegisterClass *RC) {
2001  unsigned Lo = Imm & 0xFFFF;
2002  unsigned Hi = (Imm >> 16) & 0xFFFF;
2003 
2004  unsigned ResultReg = createResultReg(RC);
2005  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2006 
2007  if (isInt<16>(Imm))
2008  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2009  TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2010  .addImm(Imm);
2011  else if (Lo) {
2012  // Both Lo and Hi have nonzero bits.
2013  unsigned TmpReg = createResultReg(RC);
2014  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2015  TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2016  .addImm(Hi);
2017  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2018  TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2019  .addReg(TmpReg).addImm(Lo);
2020  } else
2021  // Just Hi bits.
2022  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2023  TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2024  .addImm(Hi);
2025 
2026  return ResultReg;
2027 }
2028 
2029 // Materialize a 64-bit integer constant into a register, and return
2030 // the register number (or zero if we failed to handle it).
2031 unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2032  const TargetRegisterClass *RC) {
2033  unsigned Remainder = 0;
2034  unsigned Shift = 0;
2035 
2036  // If the value doesn't fit in 32 bits, see if we can shift it
2037  // so that it fits in 32 bits.
2038  if (!isInt<32>(Imm)) {
2039  Shift = countTrailingZeros<uint64_t>(Imm);
2040  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2041 
2042  if (isInt<32>(ImmSh))
2043  Imm = ImmSh;
2044  else {
2045  Remainder = Imm;
2046  Shift = 32;
2047  Imm >>= 32;
2048  }
2049  }
2050 
2051  // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2052  // (if not shifted).
2053  unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2054  if (!Shift)
2055  return TmpReg1;
2056 
2057  // If upper 32 bits were not zero, we've built them and need to shift
2058  // them into place.
2059  unsigned TmpReg2;
2060  if (Imm) {
2061  TmpReg2 = createResultReg(RC);
2062  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
2063  TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2064  } else
2065  TmpReg2 = TmpReg1;
2066 
2067  unsigned TmpReg3, Hi, Lo;
2068  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2069  TmpReg3 = createResultReg(RC);
2070  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
2071  TmpReg3).addReg(TmpReg2).addImm(Hi);
2072  } else
2073  TmpReg3 = TmpReg2;
2074 
2075  if ((Lo = Remainder & 0xFFFF)) {
2076  unsigned ResultReg = createResultReg(RC);
2077  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
2078  ResultReg).addReg(TmpReg3).addImm(Lo);
2079  return ResultReg;
2080  }
2081 
2082  return TmpReg3;
2083 }
2084 
2085 
2086 // Materialize an integer constant into a register, and return
2087 // the register number (or zero if we failed to handle it).
2088 unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
2089  bool UseSExt) {
2090  // If we're using CR bit registers for i1 values, handle that as a special
2091  // case first.
2092  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2093  const ConstantInt *CI = cast<ConstantInt>(C);
2094  unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2095  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2096  TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2097  return ImmReg;
2098  }
2099 
2100  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
2101  VT != MVT::i8 && VT != MVT::i1)
2102  return 0;
2103 
2104  const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2105  &PPC::GPRCRegClass);
2106 
2107  // If the constant is in range, use a load-immediate.
2108  const ConstantInt *CI = cast<ConstantInt>(C);
2109  if (isInt<16>(CI->getSExtValue())) {
2110  unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2111  unsigned ImmReg = createResultReg(RC);
2112  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
2113  .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() );
2114  return ImmReg;
2115  }
2116 
2117  // Construct the constant piecewise.
2118  int64_t Imm = CI->getZExtValue();
2119 
2120  if (VT == MVT::i64)
2121  return PPCMaterialize64BitInt(Imm, RC);
2122  else if (VT == MVT::i32)
2123  return PPCMaterialize32BitInt(Imm, RC);
2124 
2125  return 0;
2126 }
2127 
2128 // Materialize a constant into a register, and return the register
2129 // number (or zero if we failed to handle it).
2130 unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
2131  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2132 
2133  // Only handle simple types.
2134  if (!CEVT.isSimple()) return 0;
2135  MVT VT = CEVT.getSimpleVT();
2136 
2137  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2138  return PPCMaterializeFP(CFP, VT);
2139  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2140  return PPCMaterializeGV(GV, VT);
2141  else if (isa<ConstantInt>(C))
2142  return PPCMaterializeInt(C, VT, VT != MVT::i1);
2143 
2144  return 0;
2145 }
2146 
2147 // Materialize the address created by an alloca into a register, and
2148 // return the register number (or zero if we failed to handle it).
2149 unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2150  // Don't handle dynamic allocas.
2151  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
2152 
2153  MVT VT;
2154  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
2155 
2157  FuncInfo.StaticAllocaMap.find(AI);
2158 
2159  if (SI != FuncInfo.StaticAllocaMap.end()) {
2160  unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2161  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
2162  ResultReg).addFrameIndex(SI->second).addImm(0);
2163  return ResultReg;
2164  }
2165 
2166  return 0;
2167 }
2168 
2169 // Fold loads into extends when possible.
2170 // FIXME: We can have multiple redundant extend/trunc instructions
2171 // following a load. The folding only picks up one. Extend this
2172 // to check subsequent instructions for the same pattern and remove
2173 // them. Thus ResultReg should be the def reg for the last redundant
2174 // instruction in a chain, and all intervening instructions can be
2175 // removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2176 // to add ELF64-NOT: rldicl to the appropriate tests when this works.
2177 bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2178  const LoadInst *LI) {
2179  // Verify we have a legal type before going any further.
2180  MVT VT;
2181  if (!isLoadTypeLegal(LI->getType(), VT))
2182  return false;
2183 
2184  // Combine load followed by zero- or sign-extend.
2185  bool IsZExt = false;
2186  switch(MI->getOpcode()) {
2187  default:
2188  return false;
2189 
2190  case PPC::RLDICL:
2191  case PPC::RLDICL_32_64: {
2192  IsZExt = true;
2193  unsigned MB = MI->getOperand(3).getImm();
2194  if ((VT == MVT::i8 && MB <= 56) ||
2195  (VT == MVT::i16 && MB <= 48) ||
2196  (VT == MVT::i32 && MB <= 32))
2197  break;
2198  return false;
2199  }
2200 
2201  case PPC::RLWINM:
2202  case PPC::RLWINM8: {
2203  IsZExt = true;
2204  unsigned MB = MI->getOperand(3).getImm();
2205  if ((VT == MVT::i8 && MB <= 24) ||
2206  (VT == MVT::i16 && MB <= 16))
2207  break;
2208  return false;
2209  }
2210 
2211  case PPC::EXTSB:
2212  case PPC::EXTSB8:
2213  case PPC::EXTSB8_32_64:
2214  /* There is no sign-extending load-byte instruction. */
2215  return false;
2216 
2217  case PPC::EXTSH:
2218  case PPC::EXTSH8:
2219  case PPC::EXTSH8_32_64: {
2220  if (VT != MVT::i16 && VT != MVT::i8)
2221  return false;
2222  break;
2223  }
2224 
2225  case PPC::EXTSW:
2226  case PPC::EXTSW_32_64: {
2227  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2228  return false;
2229  break;
2230  }
2231  }
2232 
2233  // See if we can handle this address.
2234  Address Addr;
2235  if (!PPCComputeAddress(LI->getOperand(0), Addr))
2236  return false;
2237 
2238  unsigned ResultReg = MI->getOperand(0).getReg();
2239 
2240  if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
2241  return false;
2242 
2243  MI->eraseFromParent();
2244  return true;
2245 }
2246 
2247 // Attempt to lower call arguments in a faster way than done by
2248 // the selection DAG code.
2249 bool PPCFastISel::fastLowerArguments() {
2250  // Defer to normal argument lowering for now. It's reasonably
2251  // efficient. Consider doing something like ARM to handle the
2252  // case where all args fit in registers, no varargs, no float
2253  // or vector args.
2254  return false;
2255 }
2256 
2257 // Handle materializing integer constants into a register. This is not
2258 // automatically generated for PowerPC, so must be explicitly created here.
2259 unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2260 
2261  if (Opc != ISD::Constant)
2262  return 0;
2263 
2264  // If we're using CR bit registers for i1 values, handle that as a special
2265  // case first.
2266  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2267  unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2268  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2269  TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2270  return ImmReg;
2271  }
2272 
2273  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
2274  VT != MVT::i8 && VT != MVT::i1)
2275  return 0;
2276 
2277  const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2278  &PPC::GPRCRegClass);
2279  if (VT == MVT::i64)
2280  return PPCMaterialize64BitInt(Imm, RC);
2281  else
2282  return PPCMaterialize32BitInt(Imm, RC);
2283 }
2284 
2285 // Override for ADDI and ADDI8 to set the correct register class
2286 // on RHS operand 0. The automatic infrastructure naively assumes
2287 // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2288 // for these cases. At the moment, none of the other automatically
2289 // generated RI instructions require special treatment. However, once
2290 // SelectSelect is implemented, "isel" requires similar handling.
2291 //
2292 // Also be conservative about the output register class. Avoid
2293 // assigning R0 or X0 to the output register for GPRC and G8RC
2294 // register classes, as any such result could be used in ADDI, etc.,
2295 // where those regs have another meaning.
2296 unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2297  const TargetRegisterClass *RC,
2298  unsigned Op0, bool Op0IsKill,
2299  uint64_t Imm) {
2300  if (MachineInstOpcode == PPC::ADDI)
2301  MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2302  else if (MachineInstOpcode == PPC::ADDI8)
2303  MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2304 
2305  const TargetRegisterClass *UseRC =
2306  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2307  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2308 
2309  return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
2310  Op0, Op0IsKill, Imm);
2311 }
2312 
2313 // Override for instructions with one register operand to avoid use of
2314 // R0/X0. The automatic infrastructure isn't aware of the context so
2315 // we must be conservative.
2316 unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2317  const TargetRegisterClass* RC,
2318  unsigned Op0, bool Op0IsKill) {
2319  const TargetRegisterClass *UseRC =
2320  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2321  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2322 
2323  return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
2324 }
2325 
2326 // Override for instructions with two register operands to avoid use
2327 // of R0/X0. The automatic infrastructure isn't aware of the context
2328 // so we must be conservative.
2329 unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2330  const TargetRegisterClass* RC,
2331  unsigned Op0, bool Op0IsKill,
2332  unsigned Op1, bool Op1IsKill) {
2333  const TargetRegisterClass *UseRC =
2334  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2335  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2336 
2337  return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
2338  Op1, Op1IsKill);
2339 }
2340 
2341 namespace llvm {
2342  // Create the fast instruction selector for PowerPC64 ELF.
2344  const TargetLibraryInfo *LibInfo) {
2345  // Only available on 64-bit ELF for now.
2346  const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2347  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
2348  return new PPCFastISel(FuncInfo, LibInfo);
2349  return nullptr;
2350  }
2351 }
bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:276
ReturnInst - Return a value (possibly void), from a function.
void push_back(const T &Elt)
Definition: SmallVector.h:222
The memory access reads data.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:679
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers.
The memory access writes data.
MVT getValVT() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
LocInfo getLocInfo() const
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
unsigned getNumOperands() const
Definition: User.h:138
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
unsigned less or equal
Definition: InstrTypes.h:723
unsigned less than
Definition: InstrTypes.h:722
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:703
unsigned getSizeInBits() const
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:261
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:713
bool hasSuperClassEq(const TargetRegisterClass *RC) const
hasSuperClassEq - Returns true if RC is a super-class of or equal to this class.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
F(f)
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
static MachinePointerInfo getConstantPool()
getConstantPool - Return a MachinePointerInfo record that refers to the constant pool.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
void reserve(size_type N)
Definition: SmallVector.h:401
unsigned getValNo() const
op_iterator op_begin()
Definition: User.h:183
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
bool isRegLoc() const
static MachinePointerInfo getFixedStack(int FI, int64_t offset=0)
getFixedStack - Return a MachinePointerInfo record that refers to the the specified FrameIndex...
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:708
unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Definition: FastISel.cpp:1820
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:475
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:707
MachineMemOperand - A description of a memory reference used in the backend.
bool hasCommonLinkage() const
Definition: GlobalValue.h:282
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const HexagonInstrInfo * TII
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill)
Emit a MachineInstr with one register operand and a result register in the given register class...
Definition: FastISel.cpp:1747
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:704
SimpleValueType SimpleTy
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:353
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:117
unsigned getLocReg() const
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:30
ConstantExpr - a constant value that is initialized with an expression using other constant values...
Definition: Constants.h:852
void GetReturnInfo(Type *ReturnType, AttributeSet attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:121
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
BasicBlock * getSuccessor(unsigned i) const
int64_t getImm() const
Type * getElementType() const
Definition: DerivedTypes.h:323
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:97
TargetInstrInfo - Interface to description of machine instruction set.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:491
MVT - Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
BranchInst - Conditional or Unconditional Branch instruction.
MVT getLocVT() const
This is an important base class in LLVM.
Definition: Constant.h:41
PointerType * getType() const
getType - Overload to return most specific pointer type
Definition: Instructions.h:115
bool isVector() const
isVector - Return true if this is a vector value type.
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
IndirectBrInst - Indirect Branch Instruction.
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1895
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:233
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
op_iterator op_end()
Definition: User.h:185
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:697
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Value * getOperand(unsigned i) const
Definition: User.h:118
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:706
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
Class to represent integer types.
Definition: DerivedTypes.h:37
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:152
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:714
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static bool isAtomic(Instruction *I)
bool isSVR4ABI() const
Definition: PPCSubtarget.h:269
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:712
signed greater than
Definition: InstrTypes.h:724
bool needsCustom() const
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:701
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
bool isFunctionTy() const
isFunctionTy - True if this is an instance of FunctionType.
Definition: Type.h:205
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:711
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
signed less than
Definition: InstrTypes.h:726
Promote Memory to Register
Definition: Mem2Reg.cpp:58
unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill)
Emit a MachineInstr with two register operands and a result register in the given register class...
Definition: FastISel.cpp:1768
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:597
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:161
const MachineInstrBuilder & addFrameIndex(int Idx) const
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
signed less or equal
Definition: InstrTypes.h:727
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:73
This file defines the FastISel class.
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:51
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:185
Value * getCondition() const
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:128
unsigned greater or equal
Definition: InstrTypes.h:721
COPY_TO_REGCLASS - This instruction is a placeholder for a plain register-to-register copy into a spe...
Definition: TargetOpcodes.h:66
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
#define I(x, y, z)
Definition: MD5.cpp:54
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:705
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:272
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:709
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
unsigned getReg() const
getReg - Returns the register number.
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:700
LLVM Value Representation.
Definition: Value.h:69
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:710
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:112
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:298
Primary interface to the complete machine description for the target machine.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned greater than
Definition: InstrTypes.h:720
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
static Optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:125
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:702
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
const BasicBlock * getParent() const
Definition: Instruction.h:72
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:699
signed greater or equal
Definition: InstrTypes.h:725
This file describes how to lower LLVM code to machine code.
bool isVoidTy() const
isVoidTy - Return true if this is 'void'.
Definition: Type.h:137
AllocaInst - an instruction to allocate memory on the stack.
Definition: Instructions.h:76
gep_type_iterator gep_type_begin(const User *GEP)