LLVM  7.0.0svn
PPCFastISel.cpp
Go to the documentation of this file.
1 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the PowerPC-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // PPCGenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "PPC.h"
18 #include "PPCCCState.h"
19 #include "PPCCallingConv.h"
20 #include "PPCISelLowering.h"
21 #include "PPCMachineFunctionInfo.h"
22 #include "PPCSubtarget.h"
23 #include "PPCTargetMachine.h"
24 #include "llvm/ADT/Optional.h"
26 #include "llvm/CodeGen/FastISel.h"
33 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/Support/Debug.h"
41 
42 //===----------------------------------------------------------------------===//
43 //
44 // TBD:
45 // fastLowerArguments: Handle simple cases.
46 // PPCMaterializeGV: Handle TLS.
47 // SelectCall: Handle function pointers.
48 // SelectCall: Handle multi-register return values.
49 // SelectCall: Optimize away nops for local calls.
50 // processCallArgs: Handle bit-converted arguments.
51 // finishCall: Handle multi-register return values.
52 // PPCComputeAddress: Handle parameter references as FrameIndex's.
53 // PPCEmitCmp: Handle immediate as operand 1.
54 // SelectCall: Handle small byval arguments.
55 // SelectIntrinsicCall: Implement.
56 // SelectSelect: Implement.
57 // Consider factoring isTypeLegal into the base class.
58 // Implement switches and jump tables.
59 //
60 //===----------------------------------------------------------------------===//
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "ppcfastisel"
64 
65 namespace {
66 
67 typedef struct Address {
68  enum {
69  RegBase,
70  FrameIndexBase
71  } BaseType;
72 
73  union {
74  unsigned Reg;
75  int FI;
76  } Base;
77 
78  long Offset;
79 
80  // Innocuous defaults for our address.
81  Address()
82  : BaseType(RegBase), Offset(0) {
83  Base.Reg = 0;
84  }
85 } Address;
86 
87 class PPCFastISel final : public FastISel {
88 
89  const TargetMachine &TM;
90  const PPCSubtarget *PPCSubTarget;
91  PPCFunctionInfo *PPCFuncInfo;
92  const TargetInstrInfo &TII;
93  const TargetLowering &TLI;
95 
96  public:
97  explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
98  const TargetLibraryInfo *LibInfo)
99  : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
100  PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
101  PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
102  TII(*PPCSubTarget->getInstrInfo()),
103  TLI(*PPCSubTarget->getTargetLowering()),
104  Context(&FuncInfo.Fn->getContext()) {}
105 
106  // Backend specific FastISel code.
107  private:
108  bool fastSelectInstruction(const Instruction *I) override;
109  unsigned fastMaterializeConstant(const Constant *C) override;
110  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
111  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
112  const LoadInst *LI) override;
113  bool fastLowerArguments() override;
114  unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
115  unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
116  const TargetRegisterClass *RC,
117  unsigned Op0, bool Op0IsKill,
118  uint64_t Imm);
119  unsigned fastEmitInst_r(unsigned MachineInstOpcode,
120  const TargetRegisterClass *RC,
121  unsigned Op0, bool Op0IsKill);
122  unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
123  const TargetRegisterClass *RC,
124  unsigned Op0, bool Op0IsKill,
125  unsigned Op1, bool Op1IsKill);
126 
127  bool fastLowerCall(CallLoweringInfo &CLI) override;
128 
129  // Instruction selection routines.
130  private:
131  bool SelectLoad(const Instruction *I);
132  bool SelectStore(const Instruction *I);
133  bool SelectBranch(const Instruction *I);
134  bool SelectIndirectBr(const Instruction *I);
135  bool SelectFPExt(const Instruction *I);
136  bool SelectFPTrunc(const Instruction *I);
137  bool SelectIToFP(const Instruction *I, bool IsSigned);
138  bool SelectFPToI(const Instruction *I, bool IsSigned);
139  bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
140  bool SelectRet(const Instruction *I);
141  bool SelectTrunc(const Instruction *I);
142  bool SelectIntExt(const Instruction *I);
143 
144  // Utility routines.
145  private:
146  bool isTypeLegal(Type *Ty, MVT &VT);
147  bool isLoadTypeLegal(Type *Ty, MVT &VT);
148  bool isValueAvailable(const Value *V) const;
149  bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
150  return RC->getID() == PPC::VSFRCRegClassID;
151  }
152  bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
153  return RC->getID() == PPC::VSSRCRegClassID;
154  }
155  bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
156  bool isZExt, unsigned DestReg);
157  bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
158  const TargetRegisterClass *RC, bool IsZExt = true,
159  unsigned FP64LoadOpc = PPC::LFD);
160  bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
161  bool PPCComputeAddress(const Value *Obj, Address &Addr);
162  void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
163  unsigned &IndexReg);
164  bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
165  unsigned DestReg, bool IsZExt);
166  unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
167  unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
168  unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
169  bool UseSExt = true);
170  unsigned PPCMaterialize32BitInt(int64_t Imm,
171  const TargetRegisterClass *RC);
172  unsigned PPCMaterialize64BitInt(int64_t Imm,
173  const TargetRegisterClass *RC);
174  unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
175  unsigned SrcReg, bool IsSigned);
176  unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
177 
178  // Call handling routines.
179  private:
180  bool processCallArgs(SmallVectorImpl<Value*> &Args,
181  SmallVectorImpl<unsigned> &ArgRegs,
182  SmallVectorImpl<MVT> &ArgVTs,
184  SmallVectorImpl<unsigned> &RegArgs,
185  CallingConv::ID CC,
186  unsigned &NumBytes,
187  bool IsVarArg);
188  bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
189  LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag);
190 
191  private:
192  #include "PPCGenFastISel.inc"
193 
194 };
195 
196 } // end anonymous namespace
197 
198 #include "PPCGenCallingConv.inc"
199 
200 // Function whose sole purpose is to kill compiler warnings
201 // stemming from unused functions included from PPCGenCallingConv.inc.
202 CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
203  if (Flag == 1)
204  return CC_PPC32_SVR4;
205  else if (Flag == 2)
206  return CC_PPC32_SVR4_ByVal;
207  else if (Flag == 3)
208  return CC_PPC32_SVR4_VarArg;
209  else if (Flag == 4)
210  return RetCC_PPC_Cold;
211  else
212  return RetCC_PPC;
213 }
214 
216  switch (Pred) {
217  // These are not representable with any single compare.
218  case CmpInst::FCMP_FALSE:
219  case CmpInst::FCMP_TRUE:
220  // Major concern about the following 6 cases is NaN result. The comparison
221  // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
222  // only one of which will be set. The result is generated by fcmpu
223  // instruction. However, bc instruction only inspects one of the first 3
224  // bits, so when un is set, bc instruction may jump to an undesired
225  // place.
226  //
227  // More specifically, if we expect an unordered comparison and un is set, we
228  // expect to always go to true branch; in such case UEQ, UGT and ULT still
229  // give false, which are undesired; but UNE, UGE, ULE happen to give true,
230  // since they are tested by inspecting !eq, !lt, !gt, respectively.
231  //
232  // Similarly, for ordered comparison, when un is set, we always expect the
233  // result to be false. In such case OGT, OLT and OEQ is good, since they are
234  // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
235  // and ONE are tested through !lt, !gt and !eq, and these are true.
236  case CmpInst::FCMP_UEQ:
237  case CmpInst::FCMP_UGT:
238  case CmpInst::FCMP_ULT:
239  case CmpInst::FCMP_OGE:
240  case CmpInst::FCMP_OLE:
241  case CmpInst::FCMP_ONE:
242  default:
243  return Optional<PPC::Predicate>();
244 
245  case CmpInst::FCMP_OEQ:
246  case CmpInst::ICMP_EQ:
247  return PPC::PRED_EQ;
248 
249  case CmpInst::FCMP_OGT:
250  case CmpInst::ICMP_UGT:
251  case CmpInst::ICMP_SGT:
252  return PPC::PRED_GT;
253 
254  case CmpInst::FCMP_UGE:
255  case CmpInst::ICMP_UGE:
256  case CmpInst::ICMP_SGE:
257  return PPC::PRED_GE;
258 
259  case CmpInst::FCMP_OLT:
260  case CmpInst::ICMP_ULT:
261  case CmpInst::ICMP_SLT:
262  return PPC::PRED_LT;
263 
264  case CmpInst::FCMP_ULE:
265  case CmpInst::ICMP_ULE:
266  case CmpInst::ICMP_SLE:
267  return PPC::PRED_LE;
268 
269  case CmpInst::FCMP_UNE:
270  case CmpInst::ICMP_NE:
271  return PPC::PRED_NE;
272 
273  case CmpInst::FCMP_ORD:
274  return PPC::PRED_NU;
275 
276  case CmpInst::FCMP_UNO:
277  return PPC::PRED_UN;
278  }
279 }
280 
281 // Determine whether the type Ty is simple enough to be handled by
282 // fast-isel, and return its equivalent machine type in VT.
283 // FIXME: Copied directly from ARM -- factor into base class?
284 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
285  EVT Evt = TLI.getValueType(DL, Ty, true);
286 
287  // Only handle simple types.
288  if (Evt == MVT::Other || !Evt.isSimple()) return false;
289  VT = Evt.getSimpleVT();
290 
291  // Handle all legal types, i.e. a register that will directly hold this
292  // value.
293  return TLI.isTypeLegal(VT);
294 }
295 
296 // Determine whether the type Ty is simple enough to be handled by
297 // fast-isel as a load target, and return its equivalent machine type in VT.
298 bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
299  if (isTypeLegal(Ty, VT)) return true;
300 
301  // If this is a type than can be sign or zero-extended to a basic operation
302  // go ahead and accept it now.
303  if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
304  return true;
305  }
306 
307  return false;
308 }
309 
310 bool PPCFastISel::isValueAvailable(const Value *V) const {
311  if (!isa<Instruction>(V))
312  return true;
313 
314  const auto *I = cast<Instruction>(V);
315  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
316 }
317 
318 // Given a value Obj, create an Address object Addr that represents its
319 // address. Return false if we can't handle it.
320 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
321  const User *U = nullptr;
322  unsigned Opcode = Instruction::UserOp1;
323  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
324  // Don't walk into other basic blocks unless the object is an alloca from
325  // another block, otherwise it may not have a virtual register assigned.
326  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
327  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
328  Opcode = I->getOpcode();
329  U = I;
330  }
331  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
332  Opcode = C->getOpcode();
333  U = C;
334  }
335 
336  switch (Opcode) {
337  default:
338  break;
339  case Instruction::BitCast:
340  // Look through bitcasts.
341  return PPCComputeAddress(U->getOperand(0), Addr);
342  case Instruction::IntToPtr:
343  // Look past no-op inttoptrs.
344  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
345  TLI.getPointerTy(DL))
346  return PPCComputeAddress(U->getOperand(0), Addr);
347  break;
348  case Instruction::PtrToInt:
349  // Look past no-op ptrtoints.
350  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
351  return PPCComputeAddress(U->getOperand(0), Addr);
352  break;
353  case Instruction::GetElementPtr: {
354  Address SavedAddr = Addr;
355  long TmpOffset = Addr.Offset;
356 
357  // Iterate through the GEP folding the constants into offsets where
358  // we can.
360  for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
361  II != IE; ++II, ++GTI) {
362  const Value *Op = *II;
363  if (StructType *STy = GTI.getStructTypeOrNull()) {
364  const StructLayout *SL = DL.getStructLayout(STy);
365  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
366  TmpOffset += SL->getElementOffset(Idx);
367  } else {
368  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
369  for (;;) {
370  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
371  // Constant-offset addressing.
372  TmpOffset += CI->getSExtValue() * S;
373  break;
374  }
375  if (canFoldAddIntoGEP(U, Op)) {
376  // A compatible add with a constant operand. Fold the constant.
377  ConstantInt *CI =
378  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
379  TmpOffset += CI->getSExtValue() * S;
380  // Iterate on the other operand.
381  Op = cast<AddOperator>(Op)->getOperand(0);
382  continue;
383  }
384  // Unsupported
385  goto unsupported_gep;
386  }
387  }
388  }
389 
390  // Try to grab the base operand now.
391  Addr.Offset = TmpOffset;
392  if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
393 
394  // We failed, restore everything and try the other options.
395  Addr = SavedAddr;
396 
397  unsupported_gep:
398  break;
399  }
400  case Instruction::Alloca: {
401  const AllocaInst *AI = cast<AllocaInst>(Obj);
403  FuncInfo.StaticAllocaMap.find(AI);
404  if (SI != FuncInfo.StaticAllocaMap.end()) {
405  Addr.BaseType = Address::FrameIndexBase;
406  Addr.Base.FI = SI->second;
407  return true;
408  }
409  break;
410  }
411  }
412 
413  // FIXME: References to parameters fall through to the behavior
414  // below. They should be able to reference a frame index since
415  // they are stored to the stack, so we can get "ld rx, offset(r1)"
416  // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
417  // just contain the parameter. Try to handle this with a FI.
418 
419  // Try to get this in a register if nothing else has worked.
420  if (Addr.Base.Reg == 0)
421  Addr.Base.Reg = getRegForValue(Obj);
422 
423  // Prevent assignment of base register to X0, which is inappropriate
424  // for loads and stores alike.
425  if (Addr.Base.Reg != 0)
426  MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
427 
428  return Addr.Base.Reg != 0;
429 }
430 
431 // Fix up some addresses that can't be used directly. For example, if
432 // an offset won't fit in an instruction field, we may need to move it
433 // into an index register.
434 void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
435  unsigned &IndexReg) {
436 
437  // Check whether the offset fits in the instruction field.
438  if (!isInt<16>(Addr.Offset))
439  UseOffset = false;
440 
441  // If this is a stack pointer and the offset needs to be simplified then
442  // put the alloca address into a register, set the base type back to
443  // register and continue. This should almost never happen.
444  if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
445  unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
446  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
447  ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
448  Addr.Base.Reg = ResultReg;
449  Addr.BaseType = Address::RegBase;
450  }
451 
452  if (!UseOffset) {
453  IntegerType *OffsetTy = Type::getInt64Ty(*Context);
454  const ConstantInt *Offset =
455  ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
456  IndexReg = PPCMaterializeInt(Offset, MVT::i64);
457  assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
458  }
459 }
460 
461 // Emit a load instruction if possible, returning true if we succeeded,
462 // otherwise false. See commentary below for how the register class of
463 // the load is determined.
464 bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
465  const TargetRegisterClass *RC,
466  bool IsZExt, unsigned FP64LoadOpc) {
467  unsigned Opc;
468  bool UseOffset = true;
469 
470  // If ResultReg is given, it determines the register class of the load.
471  // Otherwise, RC is the register class to use. If the result of the
472  // load isn't anticipated in this block, both may be zero, in which
473  // case we must make a conservative guess. In particular, don't assign
474  // R0 or X0 to the result register, as the result may be used in a load,
475  // store, add-immediate, or isel that won't permit this. (Though
476  // perhaps the spill and reload of live-exit values would handle this?)
477  const TargetRegisterClass *UseRC =
478  (ResultReg ? MRI.getRegClass(ResultReg) :
479  (RC ? RC :
480  (VT == MVT::f64 ? &PPC::F8RCRegClass :
481  (VT == MVT::f32 ? &PPC::F4RCRegClass :
482  (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
483  &PPC::GPRC_and_GPRC_NOR0RegClass)))));
484 
485  bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
486 
487  switch (VT.SimpleTy) {
488  default: // e.g., vector types not handled
489  return false;
490  case MVT::i8:
491  Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
492  break;
493  case MVT::i16:
494  Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
495  : (Is32BitInt ? PPC::LHA : PPC::LHA8));
496  break;
497  case MVT::i32:
498  Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
499  : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
500  if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
501  UseOffset = false;
502  break;
503  case MVT::i64:
504  Opc = PPC::LD;
505  assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
506  "64-bit load with 32-bit target??");
507  UseOffset = ((Addr.Offset & 3) == 0);
508  break;
509  case MVT::f32:
510  Opc = PPC::LFS;
511  break;
512  case MVT::f64:
513  Opc = FP64LoadOpc;
514  break;
515  }
516 
517  // If necessary, materialize the offset into a register and use
518  // the indexed form. Also handle stack pointers with special needs.
519  unsigned IndexReg = 0;
520  PPCSimplifyAddress(Addr, UseOffset, IndexReg);
521 
522  // If this is a potential VSX load with an offset of 0, a VSX indexed load can
523  // be used.
524  bool IsVSSRC = isVSSRCRegClass(UseRC);
525  bool IsVSFRC = isVSFRCRegClass(UseRC);
526  bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
527  bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
528  if ((Is32VSXLoad || Is64VSXLoad) &&
529  (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
530  (Addr.Offset == 0)) {
531  UseOffset = false;
532  }
533 
534  if (ResultReg == 0)
535  ResultReg = createResultReg(UseRC);
536 
537  // Note: If we still have a frame index here, we know the offset is
538  // in range, as otherwise PPCSimplifyAddress would have converted it
539  // into a RegBase.
540  if (Addr.BaseType == Address::FrameIndexBase) {
541  // VSX only provides an indexed load.
542  if (Is32VSXLoad || Is64VSXLoad) return false;
543 
544  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
545  MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
546  Addr.Offset),
547  MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
548  MFI.getObjectAlignment(Addr.Base.FI));
549 
550  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
551  .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
552 
553  // Base reg with offset in range.
554  } else if (UseOffset) {
555  // VSX only provides an indexed load.
556  if (Is32VSXLoad || Is64VSXLoad) return false;
557 
558  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
559  .addImm(Addr.Offset).addReg(Addr.Base.Reg);
560 
561  // Indexed form.
562  } else {
563  // Get the RR opcode corresponding to the RI one. FIXME: It would be
564  // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
565  // is hard to get at.
566  switch (Opc) {
567  default: llvm_unreachable("Unexpected opcode!");
568  case PPC::LBZ: Opc = PPC::LBZX; break;
569  case PPC::LBZ8: Opc = PPC::LBZX8; break;
570  case PPC::LHZ: Opc = PPC::LHZX; break;
571  case PPC::LHZ8: Opc = PPC::LHZX8; break;
572  case PPC::LHA: Opc = PPC::LHAX; break;
573  case PPC::LHA8: Opc = PPC::LHAX8; break;
574  case PPC::LWZ: Opc = PPC::LWZX; break;
575  case PPC::LWZ8: Opc = PPC::LWZX8; break;
576  case PPC::LWA: Opc = PPC::LWAX; break;
577  case PPC::LWA_32: Opc = PPC::LWAX_32; break;
578  case PPC::LD: Opc = PPC::LDX; break;
579  case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
580  case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
581  }
582 
583  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
584  ResultReg);
585 
586  // If we have an index register defined we use it in the store inst,
587  // otherwise we use X0 as base as it makes the vector instructions to
588  // use zero in the computation of the effective address regardless the
589  // content of the register.
590  if (IndexReg)
591  MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
592  else
593  MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
594  }
595 
596  return true;
597 }
598 
599 // Attempt to fast-select a load instruction.
600 bool PPCFastISel::SelectLoad(const Instruction *I) {
601  // FIXME: No atomic loads are supported.
602  if (cast<LoadInst>(I)->isAtomic())
603  return false;
604 
605  // Verify we have a legal type before going any further.
606  MVT VT;
607  if (!isLoadTypeLegal(I->getType(), VT))
608  return false;
609 
610  // See if we can handle this address.
611  Address Addr;
612  if (!PPCComputeAddress(I->getOperand(0), Addr))
613  return false;
614 
615  // Look at the currently assigned register for this instruction
616  // to determine the required register class. This is necessary
617  // to constrain RA from using R0/X0 when this is not legal.
618  unsigned AssignedReg = FuncInfo.ValueMap[I];
619  const TargetRegisterClass *RC =
620  AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
621 
622  unsigned ResultReg = 0;
623  if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
624  return false;
625  updateValueMap(I, ResultReg);
626  return true;
627 }
628 
629 // Emit a store instruction to store SrcReg at Addr.
630 bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
631  assert(SrcReg && "Nothing to store!");
632  unsigned Opc;
633  bool UseOffset = true;
634 
635  const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
636  bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
637 
638  switch (VT.SimpleTy) {
639  default: // e.g., vector types not handled
640  return false;
641  case MVT::i8:
642  Opc = Is32BitInt ? PPC::STB : PPC::STB8;
643  break;
644  case MVT::i16:
645  Opc = Is32BitInt ? PPC::STH : PPC::STH8;
646  break;
647  case MVT::i32:
648  assert(Is32BitInt && "Not GPRC for i32??");
649  Opc = PPC::STW;
650  break;
651  case MVT::i64:
652  Opc = PPC::STD;
653  UseOffset = ((Addr.Offset & 3) == 0);
654  break;
655  case MVT::f32:
656  Opc = PPC::STFS;
657  break;
658  case MVT::f64:
659  Opc = PPC::STFD;
660  break;
661  }
662 
663  // If necessary, materialize the offset into a register and use
664  // the indexed form. Also handle stack pointers with special needs.
665  unsigned IndexReg = 0;
666  PPCSimplifyAddress(Addr, UseOffset, IndexReg);
667 
668  // If this is a potential VSX store with an offset of 0, a VSX indexed store
669  // can be used.
670  bool IsVSSRC = isVSSRCRegClass(RC);
671  bool IsVSFRC = isVSFRCRegClass(RC);
672  bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
673  bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
674  if ((Is32VSXStore || Is64VSXStore) &&
675  (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
676  (Addr.Offset == 0)) {
677  UseOffset = false;
678  }
679 
680  // Note: If we still have a frame index here, we know the offset is
681  // in range, as otherwise PPCSimplifyAddress would have converted it
682  // into a RegBase.
683  if (Addr.BaseType == Address::FrameIndexBase) {
684  // VSX only provides an indexed store.
685  if (Is32VSXStore || Is64VSXStore) return false;
686 
687  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
688  MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
689  Addr.Offset),
690  MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
691  MFI.getObjectAlignment(Addr.Base.FI));
692 
693  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
694  .addReg(SrcReg)
695  .addImm(Addr.Offset)
696  .addFrameIndex(Addr.Base.FI)
697  .addMemOperand(MMO);
698 
699  // Base reg with offset in range.
700  } else if (UseOffset) {
701  // VSX only provides an indexed store.
702  if (Is32VSXStore || Is64VSXStore)
703  return false;
704 
705  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
706  .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
707 
708  // Indexed form.
709  } else {
710  // Get the RR opcode corresponding to the RI one. FIXME: It would be
711  // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
712  // is hard to get at.
713  switch (Opc) {
714  default: llvm_unreachable("Unexpected opcode!");
715  case PPC::STB: Opc = PPC::STBX; break;
716  case PPC::STH : Opc = PPC::STHX; break;
717  case PPC::STW : Opc = PPC::STWX; break;
718  case PPC::STB8: Opc = PPC::STBX8; break;
719  case PPC::STH8: Opc = PPC::STHX8; break;
720  case PPC::STW8: Opc = PPC::STWX8; break;
721  case PPC::STD: Opc = PPC::STDX; break;
722  case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
723  case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
724  }
725 
726  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
727  .addReg(SrcReg);
728 
729  // If we have an index register defined we use it in the store inst,
730  // otherwise we use X0 as base as it makes the vector instructions to
731  // use zero in the computation of the effective address regardless the
732  // content of the register.
733  if (IndexReg)
734  MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
735  else
736  MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
737  }
738 
739  return true;
740 }
741 
742 // Attempt to fast-select a store instruction.
743 bool PPCFastISel::SelectStore(const Instruction *I) {
744  Value *Op0 = I->getOperand(0);
745  unsigned SrcReg = 0;
746 
747  // FIXME: No atomics loads are supported.
748  if (cast<StoreInst>(I)->isAtomic())
749  return false;
750 
751  // Verify we have a legal type before going any further.
752  MVT VT;
753  if (!isLoadTypeLegal(Op0->getType(), VT))
754  return false;
755 
756  // Get the value to be stored into a register.
757  SrcReg = getRegForValue(Op0);
758  if (SrcReg == 0)
759  return false;
760 
761  // See if we can handle this address.
762  Address Addr;
763  if (!PPCComputeAddress(I->getOperand(1), Addr))
764  return false;
765 
766  if (!PPCEmitStore(VT, SrcReg, Addr))
767  return false;
768 
769  return true;
770 }
771 
772 // Attempt to fast-select a branch instruction.
773 bool PPCFastISel::SelectBranch(const Instruction *I) {
774  const BranchInst *BI = cast<BranchInst>(I);
775  MachineBasicBlock *BrBB = FuncInfo.MBB;
776  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
777  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
778 
779  // For now, just try the simplest case where it's fed by a compare.
780  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
781  if (isValueAvailable(CI)) {
782  Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
783  if (!OptPPCPred)
784  return false;
785 
786  PPC::Predicate PPCPred = OptPPCPred.getValue();
787 
788  // Take advantage of fall-through opportunities.
789  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
790  std::swap(TBB, FBB);
791  PPCPred = PPC::InvertPredicate(PPCPred);
792  }
793 
794  unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
795 
796  if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
797  CondReg))
798  return false;
799 
800  BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
801  .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
802  finishCondBranch(BI->getParent(), TBB, FBB);
803  return true;
804  }
805  } else if (const ConstantInt *CI =
806  dyn_cast<ConstantInt>(BI->getCondition())) {
807  uint64_t Imm = CI->getZExtValue();
808  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
809  fastEmitBranch(Target, DbgLoc);
810  return true;
811  }
812 
813  // FIXME: ARM looks for a case where the block containing the compare
814  // has been split from the block containing the branch. If this happens,
815  // there is a vreg available containing the result of the compare. I'm
816  // not sure we can do much, as we've lost the predicate information with
817  // the compare instruction -- we have a 4-bit CR but don't know which bit
818  // to test here.
819  return false;
820 }
821 
822 // Attempt to emit a compare of the two source values. Signed and unsigned
823 // comparisons are supported. Return false if we can't handle it.
824 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
825  bool IsZExt, unsigned DestReg) {
826  Type *Ty = SrcValue1->getType();
827  EVT SrcEVT = TLI.getValueType(DL, Ty, true);
828  if (!SrcEVT.isSimple())
829  return false;
830  MVT SrcVT = SrcEVT.getSimpleVT();
831 
832  if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
833  return false;
834 
835  // See if operand 2 is an immediate encodeable in the compare.
836  // FIXME: Operands are not in canonical order at -O0, so an immediate
837  // operand in position 1 is a lost opportunity for now. We are
838  // similar to ARM in this regard.
839  long Imm = 0;
840  bool UseImm = false;
841 
842  // Only 16-bit integer constants can be represented in compares for
843  // PowerPC. Others will be materialized into a register.
844  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
845  if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
846  SrcVT == MVT::i8 || SrcVT == MVT::i1) {
847  const APInt &CIVal = ConstInt->getValue();
848  Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
849  if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
850  UseImm = true;
851  }
852  }
853 
854  unsigned CmpOpc;
855  bool NeedsExt = false;
856  switch (SrcVT.SimpleTy) {
857  default: return false;
858  case MVT::f32:
859  CmpOpc = PPC::FCMPUS;
860  break;
861  case MVT::f64:
862  CmpOpc = PPC::FCMPUD;
863  break;
864  case MVT::i1:
865  case MVT::i8:
866  case MVT::i16:
867  NeedsExt = true;
868  // Intentional fall-through.
869  case MVT::i32:
870  if (!UseImm)
871  CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
872  else
873  CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
874  break;
875  case MVT::i64:
876  if (!UseImm)
877  CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
878  else
879  CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
880  break;
881  }
882 
883  unsigned SrcReg1 = getRegForValue(SrcValue1);
884  if (SrcReg1 == 0)
885  return false;
886 
887  unsigned SrcReg2 = 0;
888  if (!UseImm) {
889  SrcReg2 = getRegForValue(SrcValue2);
890  if (SrcReg2 == 0)
891  return false;
892  }
893 
894  if (NeedsExt) {
895  unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
896  if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
897  return false;
898  SrcReg1 = ExtReg;
899 
900  if (!UseImm) {
901  unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
902  if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
903  return false;
904  SrcReg2 = ExtReg;
905  }
906  }
907 
908  if (!UseImm)
909  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
910  .addReg(SrcReg1).addReg(SrcReg2);
911  else
912  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
913  .addReg(SrcReg1).addImm(Imm);
914 
915  return true;
916 }
917 
918 // Attempt to fast-select a floating-point extend instruction.
919 bool PPCFastISel::SelectFPExt(const Instruction *I) {
920  Value *Src = I->getOperand(0);
921  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
922  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
923 
924  if (SrcVT != MVT::f32 || DestVT != MVT::f64)
925  return false;
926 
927  unsigned SrcReg = getRegForValue(Src);
928  if (!SrcReg)
929  return false;
930 
931  // No code is generated for a FP extend.
932  updateValueMap(I, SrcReg);
933  return true;
934 }
935 
936 // Attempt to fast-select a floating-point truncate instruction.
937 bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
938  Value *Src = I->getOperand(0);
939  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
940  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
941 
942  if (SrcVT != MVT::f64 || DestVT != MVT::f32)
943  return false;
944 
945  unsigned SrcReg = getRegForValue(Src);
946  if (!SrcReg)
947  return false;
948 
949  // Round the result to single precision.
950  unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
951  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
952  .addReg(SrcReg);
953 
954  updateValueMap(I, DestReg);
955  return true;
956 }
957 
958 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
959 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
960 // those should be used instead of moving via a stack slot when the
961 // subtarget permits.
962 // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
963 // stack slot and 4-byte store/load sequence. Or just sext the 4-byte
964 // case to 8 bytes which produces tighter code but wastes stack space.
965 unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
966  bool IsSigned) {
967 
968  // If necessary, extend 32-bit int to 64-bit.
969  if (SrcVT == MVT::i32) {
970  unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
971  if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
972  return 0;
973  SrcReg = TmpReg;
974  }
975 
976  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
977  Address Addr;
978  Addr.BaseType = Address::FrameIndexBase;
979  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
980 
981  // Store the value from the GPR.
982  if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
983  return 0;
984 
985  // Load the integer value into an FPR. The kind of load used depends
986  // on a number of conditions.
987  unsigned LoadOpc = PPC::LFD;
988 
989  if (SrcVT == MVT::i32) {
990  if (!IsSigned) {
991  LoadOpc = PPC::LFIWZX;
992  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
993  } else if (PPCSubTarget->hasLFIWAX()) {
994  LoadOpc = PPC::LFIWAX;
995  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
996  }
997  }
998 
999  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1000  unsigned ResultReg = 0;
1001  if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1002  return 0;
1003 
1004  return ResultReg;
1005 }
1006 
1007 // Attempt to fast-select an integer-to-floating-point conversion.
1008 // FIXME: Once fast-isel has better support for VSX, conversions using
1009 // direct moves should be implemented.
1010 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1011  MVT DstVT;
1012  Type *DstTy = I->getType();
1013  if (!isTypeLegal(DstTy, DstVT))
1014  return false;
1015 
1016  if (DstVT != MVT::f32 && DstVT != MVT::f64)
1017  return false;
1018 
1019  Value *Src = I->getOperand(0);
1020  EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1021  if (!SrcEVT.isSimple())
1022  return false;
1023 
1024  MVT SrcVT = SrcEVT.getSimpleVT();
1025 
1026  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1027  SrcVT != MVT::i32 && SrcVT != MVT::i64)
1028  return false;
1029 
1030  unsigned SrcReg = getRegForValue(Src);
1031  if (SrcReg == 0)
1032  return false;
1033 
1034  // We can only lower an unsigned convert if we have the newer
1035  // floating-point conversion operations.
1036  if (!IsSigned && !PPCSubTarget->hasFPCVT())
1037  return false;
1038 
1039  // FIXME: For now we require the newer floating-point conversion operations
1040  // (which are present only on P7 and A2 server models) when converting
1041  // to single-precision float. Otherwise we have to generate a lot of
1042  // fiddly code to avoid double rounding. If necessary, the fiddly code
1043  // can be found in PPCTargetLowering::LowerINT_TO_FP().
1044  if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
1045  return false;
1046 
1047  // Extend the input if necessary.
1048  if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1049  unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
1050  if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1051  return false;
1052  SrcVT = MVT::i64;
1053  SrcReg = TmpReg;
1054  }
1055 
1056  // Move the integer value to an FPR.
1057  unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1058  if (FPReg == 0)
1059  return false;
1060 
1061  // Determine the opcode for the conversion.
1062  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1063  unsigned DestReg = createResultReg(RC);
1064  unsigned Opc;
1065 
1066  if (DstVT == MVT::f32)
1067  Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1068  else
1069  Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1070 
1071  // Generate the convert.
1072  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1073  .addReg(FPReg);
1074 
1075  updateValueMap(I, DestReg);
1076  return true;
1077 }
1078 
1079 // Move the floating-point value in SrcReg into an integer destination
1080 // register, and return the register (or zero if we can't handle it).
1081 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
1082 // those should be used instead of moving via a stack slot when the
1083 // subtarget permits.
1084 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1085  unsigned SrcReg, bool IsSigned) {
1086  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1087  // Note that if have STFIWX available, we could use a 4-byte stack
1088  // slot for i32, but this being fast-isel we'll just go with the
1089  // easiest code gen possible.
1090  Address Addr;
1091  Addr.BaseType = Address::FrameIndexBase;
1092  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
1093 
1094  // Store the value from the FPR.
1095  if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1096  return 0;
1097 
1098  // Reload it into a GPR. If we want an i32 on big endian, modify the
1099  // address to have a 4-byte offset so we load from the right place.
1100  if (VT == MVT::i32)
1101  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
1102 
1103  // Look at the currently assigned register for this instruction
1104  // to determine the required register class.
1105  unsigned AssignedReg = FuncInfo.ValueMap[I];
1106  const TargetRegisterClass *RC =
1107  AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1108 
1109  unsigned ResultReg = 0;
1110  if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1111  return 0;
1112 
1113  return ResultReg;
1114 }
1115 
1116 // Attempt to fast-select a floating-point-to-integer conversion.
1117 // FIXME: Once fast-isel has better support for VSX, conversions using
1118 // direct moves should be implemented.
1119 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1120  MVT DstVT, SrcVT;
1121  Type *DstTy = I->getType();
1122  if (!isTypeLegal(DstTy, DstVT))
1123  return false;
1124 
1125  if (DstVT != MVT::i32 && DstVT != MVT::i64)
1126  return false;
1127 
1128  // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
1129  if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
1130  return false;
1131 
1132  Value *Src = I->getOperand(0);
1133  Type *SrcTy = Src->getType();
1134  if (!isTypeLegal(SrcTy, SrcVT))
1135  return false;
1136 
1137  if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1138  return false;
1139 
1140  unsigned SrcReg = getRegForValue(Src);
1141  if (SrcReg == 0)
1142  return false;
1143 
1144  // Convert f32 to f64 if necessary. This is just a meaningless copy
1145  // to get the register class right.
1146  const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1147  if (InRC == &PPC::F4RCRegClass) {
1148  unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
1149  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1150  TII.get(TargetOpcode::COPY), TmpReg)
1151  .addReg(SrcReg);
1152  SrcReg = TmpReg;
1153  }
1154 
1155  // Determine the opcode for the conversion, which takes place
1156  // entirely within FPRs.
1157  unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
1158  unsigned Opc;
1159 
1160  if (DstVT == MVT::i32)
1161  if (IsSigned)
1162  Opc = PPC::FCTIWZ;
1163  else
1164  Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1165  else
1166  Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1167 
1168  // Generate the convert.
1169  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1170  .addReg(SrcReg);
1171 
1172  // Now move the integer value from a float register to an integer register.
1173  unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1174  if (IntReg == 0)
1175  return false;
1176 
1177  updateValueMap(I, IntReg);
1178  return true;
1179 }
1180 
1181 // Attempt to fast-select a binary integer operation that isn't already
1182 // handled automatically.
1183 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1184  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1185 
1186  // We can get here in the case when we have a binary operation on a non-legal
1187  // type and the target independent selector doesn't know how to handle it.
1188  if (DestVT != MVT::i16 && DestVT != MVT::i8)
1189  return false;
1190 
1191  // Look at the currently assigned register for this instruction
1192  // to determine the required register class. If there is no register,
1193  // make a conservative choice (don't assign R0).
1194  unsigned AssignedReg = FuncInfo.ValueMap[I];
1195  const TargetRegisterClass *RC =
1196  (AssignedReg ? MRI.getRegClass(AssignedReg) :
1197  &PPC::GPRC_and_GPRC_NOR0RegClass);
1198  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1199 
1200  unsigned Opc;
1201  switch (ISDOpcode) {
1202  default: return false;
1203  case ISD::ADD:
1204  Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1205  break;
1206  case ISD::OR:
1207  Opc = IsGPRC ? PPC::OR : PPC::OR8;
1208  break;
1209  case ISD::SUB:
1210  Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1211  break;
1212  }
1213 
1214  unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1215  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1216  if (SrcReg1 == 0) return false;
1217 
1218  // Handle case of small immediate operand.
1219  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1220  const APInt &CIVal = ConstInt->getValue();
1221  int Imm = (int)CIVal.getSExtValue();
1222  bool UseImm = true;
1223  if (isInt<16>(Imm)) {
1224  switch (Opc) {
1225  default:
1226  llvm_unreachable("Missing case!");
1227  case PPC::ADD4:
1228  Opc = PPC::ADDI;
1229  MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1230  break;
1231  case PPC::ADD8:
1232  Opc = PPC::ADDI8;
1233  MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1234  break;
1235  case PPC::OR:
1236  Opc = PPC::ORI;
1237  break;
1238  case PPC::OR8:
1239  Opc = PPC::ORI8;
1240  break;
1241  case PPC::SUBF:
1242  if (Imm == -32768)
1243  UseImm = false;
1244  else {
1245  Opc = PPC::ADDI;
1246  MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1247  Imm = -Imm;
1248  }
1249  break;
1250  case PPC::SUBF8:
1251  if (Imm == -32768)
1252  UseImm = false;
1253  else {
1254  Opc = PPC::ADDI8;
1255  MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1256  Imm = -Imm;
1257  }
1258  break;
1259  }
1260 
1261  if (UseImm) {
1262  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1263  ResultReg)
1264  .addReg(SrcReg1)
1265  .addImm(Imm);
1266  updateValueMap(I, ResultReg);
1267  return true;
1268  }
1269  }
1270  }
1271 
1272  // Reg-reg case.
1273  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1274  if (SrcReg2 == 0) return false;
1275 
1276  // Reverse operands for subtract-from.
1277  if (ISDOpcode == ISD::SUB)
1278  std::swap(SrcReg1, SrcReg2);
1279 
1280  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1281  .addReg(SrcReg1).addReg(SrcReg2);
1282  updateValueMap(I, ResultReg);
1283  return true;
1284 }
1285 
1286 // Handle arguments to a call that we're attempting to fast-select.
1287 // Return false if the arguments are too complex for us at the moment.
1288 bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1289  SmallVectorImpl<unsigned> &ArgRegs,
1290  SmallVectorImpl<MVT> &ArgVTs,
1292  SmallVectorImpl<unsigned> &RegArgs,
1293  CallingConv::ID CC,
1294  unsigned &NumBytes,
1295  bool IsVarArg) {
1297  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1298 
1299  // Reserve space for the linkage area on the stack.
1300  unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
1301  CCInfo.AllocateStack(LinkageSize, 8);
1302 
1303  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
1304 
1305  // Bail out if we can't handle any of the arguments.
1306  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1307  CCValAssign &VA = ArgLocs[I];
1308  MVT ArgVT = ArgVTs[VA.getValNo()];
1309 
1310  // Skip vector arguments for now, as well as long double and
1311  // uint128_t, and anything that isn't passed in a register.
1312  if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1313  !VA.isRegLoc() || VA.needsCustom())
1314  return false;
1315 
1316  // Skip bit-converted arguments for now.
1317  if (VA.getLocInfo() == CCValAssign::BCvt)
1318  return false;
1319  }
1320 
1321  // Get a count of how many bytes are to be pushed onto the stack.
1322  NumBytes = CCInfo.getNextStackOffset();
1323 
1324  // The prolog code of the callee may store up to 8 GPR argument registers to
1325  // the stack, allowing va_start to index over them in memory if its varargs.
1326  // Because we cannot tell if this is needed on the caller side, we have to
1327  // conservatively assume that it is needed. As such, make sure we have at
1328  // least enough stack space for the caller to store the 8 GPRs.
1329  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1330  NumBytes = std::max(NumBytes, LinkageSize + 64);
1331 
1332  // Issue CALLSEQ_START.
1333  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1334  TII.get(TII.getCallFrameSetupOpcode()))
1335  .addImm(NumBytes).addImm(0);
1336 
1337  // Prepare to assign register arguments. Every argument uses up a
1338  // GPR protocol register even if it's passed in a floating-point
1339  // register (unless we're using the fast calling convention).
1340  unsigned NextGPR = PPC::X3;
1341  unsigned NextFPR = PPC::F1;
1342 
1343  // Process arguments.
1344  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1345  CCValAssign &VA = ArgLocs[I];
1346  unsigned Arg = ArgRegs[VA.getValNo()];
1347  MVT ArgVT = ArgVTs[VA.getValNo()];
1348 
1349  // Handle argument promotion and bitcasts.
1350  switch (VA.getLocInfo()) {
1351  default:
1352  llvm_unreachable("Unknown loc info!");
1353  case CCValAssign::Full:
1354  break;
1355  case CCValAssign::SExt: {
1356  MVT DestVT = VA.getLocVT();
1357  const TargetRegisterClass *RC =
1358  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1359  unsigned TmpReg = createResultReg(RC);
1360  if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1361  llvm_unreachable("Failed to emit a sext!");
1362  ArgVT = DestVT;
1363  Arg = TmpReg;
1364  break;
1365  }
1366  case CCValAssign::AExt:
1367  case CCValAssign::ZExt: {
1368  MVT DestVT = VA.getLocVT();
1369  const TargetRegisterClass *RC =
1370  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1371  unsigned TmpReg = createResultReg(RC);
1372  if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1373  llvm_unreachable("Failed to emit a zext!");
1374  ArgVT = DestVT;
1375  Arg = TmpReg;
1376  break;
1377  }
1378  case CCValAssign::BCvt: {
1379  // FIXME: Not yet handled.
1380  llvm_unreachable("Should have bailed before getting here!");
1381  break;
1382  }
1383  }
1384 
1385  // Copy this argument to the appropriate register.
1386  unsigned ArgReg;
1387  if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1388  ArgReg = NextFPR++;
1389  if (CC != CallingConv::Fast)
1390  ++NextGPR;
1391  } else
1392  ArgReg = NextGPR++;
1393 
1394  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1395  TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1396  RegArgs.push_back(ArgReg);
1397  }
1398 
1399  return true;
1400 }
1401 
1402 // For a call that we've determined we can fast-select, finish the
1403 // call sequence and generate a copy to obtain the return value (if any).
1404 bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1405  CallingConv::ID CC = CLI.CallConv;
1406 
1407  // Issue CallSEQ_END.
1408  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1409  TII.get(TII.getCallFrameDestroyOpcode()))
1410  .addImm(NumBytes).addImm(0);
1411 
1412  // Next, generate a copy to obtain the return value.
1413  // FIXME: No multi-register return values yet, though I don't foresee
1414  // any real difficulties there.
1415  if (RetVT != MVT::isVoid) {
1417  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1418  CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1419  CCValAssign &VA = RVLocs[0];
1420  assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1421  assert(VA.isRegLoc() && "Can only return in registers!");
1422 
1423  MVT DestVT = VA.getValVT();
1424  MVT CopyVT = DestVT;
1425 
1426  // Ints smaller than a register still arrive in a full 64-bit
1427  // register, so make sure we recognize this.
1428  if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1429  CopyVT = MVT::i64;
1430 
1431  unsigned SourcePhysReg = VA.getLocReg();
1432  unsigned ResultReg = 0;
1433 
1434  if (RetVT == CopyVT) {
1435  const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1436  ResultReg = createResultReg(CpyRC);
1437 
1438  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1439  TII.get(TargetOpcode::COPY), ResultReg)
1440  .addReg(SourcePhysReg);
1441 
1442  // If necessary, round the floating result to single precision.
1443  } else if (CopyVT == MVT::f64) {
1444  ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
1446  ResultReg).addReg(SourcePhysReg);
1447 
1448  // If only the low half of a general register is needed, generate
1449  // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1450  // used along the fast-isel path (not lowered), and downstream logic
1451  // also doesn't like a direct subreg copy on a physical reg.)
1452  } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1453  ResultReg = createResultReg(&PPC::GPRCRegClass);
1454  // Convert physical register from G8RC to GPRC.
1455  SourcePhysReg -= PPC::X0 - PPC::R0;
1456  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1457  TII.get(TargetOpcode::COPY), ResultReg)
1458  .addReg(SourcePhysReg);
1459  }
1460 
1461  assert(ResultReg && "ResultReg unset!");
1462  CLI.InRegs.push_back(SourcePhysReg);
1463  CLI.ResultReg = ResultReg;
1464  CLI.NumResultRegs = 1;
1465  }
1466 
1467  return true;
1468 }
1469 
1470 bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1471  CallingConv::ID CC = CLI.CallConv;
1472  bool IsTailCall = CLI.IsTailCall;
1473  bool IsVarArg = CLI.IsVarArg;
1474  const Value *Callee = CLI.Callee;
1475  const MCSymbol *Symbol = CLI.Symbol;
1476 
1477  if (!Callee && !Symbol)
1478  return false;
1479 
1480  // Allow SelectionDAG isel to handle tail calls.
1481  if (IsTailCall)
1482  return false;
1483 
1484  // Let SDISel handle vararg functions.
1485  if (IsVarArg)
1486  return false;
1487 
1488  // Handle simple calls for now, with legal return types and
1489  // those that can be extended.
1490  Type *RetTy = CLI.RetTy;
1491  MVT RetVT;
1492  if (RetTy->isVoidTy())
1493  RetVT = MVT::isVoid;
1494  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1495  RetVT != MVT::i8)
1496  return false;
1497  else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
1498  // We can't handle boolean returns when CR bits are in use.
1499  return false;
1500 
1501  // FIXME: No multi-register return values yet.
1502  if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1503  RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1504  RetVT != MVT::f64) {
1506  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1507  CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1508  if (RVLocs.size() > 1)
1509  return false;
1510  }
1511 
1512  // Bail early if more than 8 arguments, as we only currently
1513  // handle arguments passed in registers.
1514  unsigned NumArgs = CLI.OutVals.size();
1515  if (NumArgs > 8)
1516  return false;
1517 
1518  // Set up the argument vectors.
1520  SmallVector<unsigned, 8> ArgRegs;
1521  SmallVector<MVT, 8> ArgVTs;
1523 
1524  Args.reserve(NumArgs);
1525  ArgRegs.reserve(NumArgs);
1526  ArgVTs.reserve(NumArgs);
1527  ArgFlags.reserve(NumArgs);
1528 
1529  for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1530  // Only handle easy calls for now. It would be reasonably easy
1531  // to handle <= 8-byte structures passed ByVal in registers, but we
1532  // have to ensure they are right-justified in the register.
1533  ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1534  if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1535  return false;
1536 
1537  Value *ArgValue = CLI.OutVals[i];
1538  Type *ArgTy = ArgValue->getType();
1539  MVT ArgVT;
1540  if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1541  return false;
1542 
1543  if (ArgVT.isVector())
1544  return false;
1545 
1546  unsigned Arg = getRegForValue(ArgValue);
1547  if (Arg == 0)
1548  return false;
1549 
1550  Args.push_back(ArgValue);
1551  ArgRegs.push_back(Arg);
1552  ArgVTs.push_back(ArgVT);
1553  ArgFlags.push_back(Flags);
1554  }
1555 
1556  // Process the arguments.
1557  SmallVector<unsigned, 8> RegArgs;
1558  unsigned NumBytes;
1559 
1560  if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1561  RegArgs, CC, NumBytes, IsVarArg))
1562  return false;
1563 
1564  MachineInstrBuilder MIB;
1565  // FIXME: No handling for function pointers yet. This requires
1566  // implementing the function descriptor (OPD) setup.
1567  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1568  if (!GV) {
1569  // patchpoints are a special case; they always dispatch to a pointer value.
1570  // However, we don't actually want to generate the indirect call sequence
1571  // here (that will be generated, as necessary, during asm printing), and
1572  // the call we generate here will be erased by FastISel::selectPatchpoint,
1573  // so don't try very hard...
1574  if (CLI.IsPatchPoint)
1575  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
1576  else
1577  return false;
1578  } else {
1579  // Build direct call with NOP for TOC restore.
1580  // FIXME: We can and should optimize away the NOP for local calls.
1581  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1582  TII.get(PPC::BL8_NOP));
1583  // Add callee.
1584  MIB.addGlobalAddress(GV);
1585  }
1586 
1587  // Add implicit physical register uses to the call.
1588  for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
1589  MIB.addReg(RegArgs[II], RegState::Implicit);
1590 
1591  // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1592  // into the call.
1593  PPCFuncInfo->setUsesTOCBasePtr();
1594  MIB.addReg(PPC::X2, RegState::Implicit);
1595 
1596  // Add a register mask with the call-preserved registers. Proper
1597  // defs for return values will be added by setPhysRegsDeadExcept().
1598  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1599 
1600  CLI.Call = MIB;
1601 
1602  // Finish off the call including any return values.
1603  return finishCall(RetVT, CLI, NumBytes);
1604 }
1605 
1606 // Attempt to fast-select a return instruction.
1607 bool PPCFastISel::SelectRet(const Instruction *I) {
1608 
1609  if (!FuncInfo.CanLowerReturn)
1610  return false;
1611 
1612  if (TLI.supportSplitCSR(FuncInfo.MF))
1613  return false;
1614 
1615  const ReturnInst *Ret = cast<ReturnInst>(I);
1616  const Function &F = *I->getParent()->getParent();
1617 
1618  // Build a list of return value registers.
1619  SmallVector<unsigned, 4> RetRegs;
1620  CallingConv::ID CC = F.getCallingConv();
1621 
1622  if (Ret->getNumOperands() > 0) {
1624  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1625 
1626  // Analyze operands of the call, assigning locations to each operand.
1628  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1629  CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1630  const Value *RV = Ret->getOperand(0);
1631 
1632  // FIXME: Only one output register for now.
1633  if (ValLocs.size() > 1)
1634  return false;
1635 
1636  // Special case for returning a constant integer of any size - materialize
1637  // the constant as an i64 and copy it to the return register.
1638  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
1639  CCValAssign &VA = ValLocs[0];
1640 
1641  unsigned RetReg = VA.getLocReg();
1642  // We still need to worry about properly extending the sign. For example,
1643  // we could have only a single bit or a constant that needs zero
1644  // extension rather than sign extension. Make sure we pass the return
1645  // value extension property to integer materialization.
1646  unsigned SrcReg =
1647  PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1648 
1649  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1650  TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1651 
1652  RetRegs.push_back(RetReg);
1653 
1654  } else {
1655  unsigned Reg = getRegForValue(RV);
1656 
1657  if (Reg == 0)
1658  return false;
1659 
1660  // Copy the result values into the output registers.
1661  for (unsigned i = 0; i < ValLocs.size(); ++i) {
1662 
1663  CCValAssign &VA = ValLocs[i];
1664  assert(VA.isRegLoc() && "Can only return in registers!");
1665  RetRegs.push_back(VA.getLocReg());
1666  unsigned SrcReg = Reg + VA.getValNo();
1667 
1668  EVT RVEVT = TLI.getValueType(DL, RV->getType());
1669  if (!RVEVT.isSimple())
1670  return false;
1671  MVT RVVT = RVEVT.getSimpleVT();
1672  MVT DestVT = VA.getLocVT();
1673 
1674  if (RVVT != DestVT && RVVT != MVT::i8 &&
1675  RVVT != MVT::i16 && RVVT != MVT::i32)
1676  return false;
1677 
1678  if (RVVT != DestVT) {
1679  switch (VA.getLocInfo()) {
1680  default:
1681  llvm_unreachable("Unknown loc info!");
1682  case CCValAssign::Full:
1683  llvm_unreachable("Full value assign but types don't match?");
1684  case CCValAssign::AExt:
1685  case CCValAssign::ZExt: {
1686  const TargetRegisterClass *RC =
1687  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1688  unsigned TmpReg = createResultReg(RC);
1689  if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1690  return false;
1691  SrcReg = TmpReg;
1692  break;
1693  }
1694  case CCValAssign::SExt: {
1695  const TargetRegisterClass *RC =
1696  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1697  unsigned TmpReg = createResultReg(RC);
1698  if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1699  return false;
1700  SrcReg = TmpReg;
1701  break;
1702  }
1703  }
1704  }
1705 
1706  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1707  TII.get(TargetOpcode::COPY), RetRegs[i])
1708  .addReg(SrcReg);
1709  }
1710  }
1711  }
1712 
1713  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1714  TII.get(PPC::BLR8));
1715 
1716  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1717  MIB.addReg(RetRegs[i], RegState::Implicit);
1718 
1719  return true;
1720 }
1721 
1722 // Attempt to emit an integer extend of SrcReg into DestReg. Both
1723 // signed and zero extensions are supported. Return false if we
1724 // can't handle it.
1725 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1726  unsigned DestReg, bool IsZExt) {
1727  if (DestVT != MVT::i32 && DestVT != MVT::i64)
1728  return false;
1729  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1730  return false;
1731 
1732  // Signed extensions use EXTSB, EXTSH, EXTSW.
1733  if (!IsZExt) {
1734  unsigned Opc;
1735  if (SrcVT == MVT::i8)
1736  Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1737  else if (SrcVT == MVT::i16)
1738  Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1739  else {
1740  assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1741  Opc = PPC::EXTSW_32_64;
1742  }
1743  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1744  .addReg(SrcReg);
1745 
1746  // Unsigned 32-bit extensions use RLWINM.
1747  } else if (DestVT == MVT::i32) {
1748  unsigned MB;
1749  if (SrcVT == MVT::i8)
1750  MB = 24;
1751  else {
1752  assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1753  MB = 16;
1754  }
1755  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
1756  DestReg)
1757  .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1758 
1759  // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1760  } else {
1761  unsigned MB;
1762  if (SrcVT == MVT::i8)
1763  MB = 56;
1764  else if (SrcVT == MVT::i16)
1765  MB = 48;
1766  else
1767  MB = 32;
1768  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1769  TII.get(PPC::RLDICL_32_64), DestReg)
1770  .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1771  }
1772 
1773  return true;
1774 }
1775 
1776 // Attempt to fast-select an indirect branch instruction.
1777 bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1778  unsigned AddrReg = getRegForValue(I->getOperand(0));
1779  if (AddrReg == 0)
1780  return false;
1781 
1782  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
1783  .addReg(AddrReg);
1784  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
1785 
1786  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1787  for (const BasicBlock *SuccBB : IB->successors())
1788  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
1789 
1790  return true;
1791 }
1792 
1793 // Attempt to fast-select an integer truncate instruction.
1794 bool PPCFastISel::SelectTrunc(const Instruction *I) {
1795  Value *Src = I->getOperand(0);
1796  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1797  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1798 
1799  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1800  return false;
1801 
1802  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1803  return false;
1804 
1805  unsigned SrcReg = getRegForValue(Src);
1806  if (!SrcReg)
1807  return false;
1808 
1809  // The only interesting case is when we need to switch register classes.
1810  if (SrcVT == MVT::i64) {
1811  unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
1812  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1813  TII.get(TargetOpcode::COPY),
1814  ResultReg).addReg(SrcReg, 0, PPC::sub_32);
1815  SrcReg = ResultReg;
1816  }
1817 
1818  updateValueMap(I, SrcReg);
1819  return true;
1820 }
1821 
1822 // Attempt to fast-select an integer extend instruction.
1823 bool PPCFastISel::SelectIntExt(const Instruction *I) {
1824  Type *DestTy = I->getType();
1825  Value *Src = I->getOperand(0);
1826  Type *SrcTy = Src->getType();
1827 
1828  bool IsZExt = isa<ZExtInst>(I);
1829  unsigned SrcReg = getRegForValue(Src);
1830  if (!SrcReg) return false;
1831 
1832  EVT SrcEVT, DestEVT;
1833  SrcEVT = TLI.getValueType(DL, SrcTy, true);
1834  DestEVT = TLI.getValueType(DL, DestTy, true);
1835  if (!SrcEVT.isSimple())
1836  return false;
1837  if (!DestEVT.isSimple())
1838  return false;
1839 
1840  MVT SrcVT = SrcEVT.getSimpleVT();
1841  MVT DestVT = DestEVT.getSimpleVT();
1842 
1843  // If we know the register class needed for the result of this
1844  // instruction, use it. Otherwise pick the register class of the
1845  // correct size that does not contain X0/R0, since we don't know
1846  // whether downstream uses permit that assignment.
1847  unsigned AssignedReg = FuncInfo.ValueMap[I];
1848  const TargetRegisterClass *RC =
1849  (AssignedReg ? MRI.getRegClass(AssignedReg) :
1850  (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1851  &PPC::GPRC_and_GPRC_NOR0RegClass));
1852  unsigned ResultReg = createResultReg(RC);
1853 
1854  if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1855  return false;
1856 
1857  updateValueMap(I, ResultReg);
1858  return true;
1859 }
1860 
1861 // Attempt to fast-select an instruction that wasn't handled by
1862 // the table-generated machinery.
1863 bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1864 
1865  switch (I->getOpcode()) {
1866  case Instruction::Load:
1867  return SelectLoad(I);
1868  case Instruction::Store:
1869  return SelectStore(I);
1870  case Instruction::Br:
1871  return SelectBranch(I);
1872  case Instruction::IndirectBr:
1873  return SelectIndirectBr(I);
1874  case Instruction::FPExt:
1875  return SelectFPExt(I);
1876  case Instruction::FPTrunc:
1877  return SelectFPTrunc(I);
1878  case Instruction::SIToFP:
1879  return SelectIToFP(I, /*IsSigned*/ true);
1880  case Instruction::UIToFP:
1881  return SelectIToFP(I, /*IsSigned*/ false);
1882  case Instruction::FPToSI:
1883  return SelectFPToI(I, /*IsSigned*/ true);
1884  case Instruction::FPToUI:
1885  return SelectFPToI(I, /*IsSigned*/ false);
1886  case Instruction::Add:
1887  return SelectBinaryIntOp(I, ISD::ADD);
1888  case Instruction::Or:
1889  return SelectBinaryIntOp(I, ISD::OR);
1890  case Instruction::Sub:
1891  return SelectBinaryIntOp(I, ISD::SUB);
1892  case Instruction::Call:
1893  return selectCall(I);
1894  case Instruction::Ret:
1895  return SelectRet(I);
1896  case Instruction::Trunc:
1897  return SelectTrunc(I);
1898  case Instruction::ZExt:
1899  case Instruction::SExt:
1900  return SelectIntExt(I);
1901  // Here add other flavors of Instruction::XXX that automated
1902  // cases don't catch. For example, switches are terminators
1903  // that aren't yet handled.
1904  default:
1905  break;
1906  }
1907  return false;
1908 }
1909 
1910 // Materialize a floating-point constant into a register, and return
1911 // the register number (or zero if we failed to handle it).
1912 unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1913  // No plans to handle long double here.
1914  if (VT != MVT::f32 && VT != MVT::f64)
1915  return 0;
1916 
1917  // All FP constants are loaded from the constant pool.
1918  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
1919  assert(Align > 0 && "Unexpectedly missing alignment information!");
1920  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
1921  const TargetRegisterClass *RC =
1922  (VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass;
1923  unsigned DestReg = createResultReg(RC);
1924  CodeModel::Model CModel = TM.getCodeModel();
1925 
1926  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1928  MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
1929 
1930  unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
1931  unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1932 
1933  PPCFuncInfo->setUsesTOCBasePtr();
1934  // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
1935  if (CModel == CodeModel::Small) {
1936  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
1937  TmpReg)
1938  .addConstantPoolIndex(Idx).addReg(PPC::X2);
1939  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1940  .addImm(0).addReg(TmpReg).addMemOperand(MMO);
1941  } else {
1942  // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
1943  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
1944  TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
1945  // But for large code model, we must generate a LDtocL followed
1946  // by the LF[SD].
1947  if (CModel == CodeModel::Large) {
1948  unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1949  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
1950  TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
1951  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1952  .addImm(0)
1953  .addReg(TmpReg2);
1954  } else
1955  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1956  .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
1957  .addReg(TmpReg)
1958  .addMemOperand(MMO);
1959  }
1960 
1961  return DestReg;
1962 }
1963 
1964 // Materialize the address of a global value into a register, and return
1965 // the register number (or zero if we failed to handle it).
1966 unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
1967  assert(VT == MVT::i64 && "Non-address!");
1968  const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
1969  unsigned DestReg = createResultReg(RC);
1970 
1971  // Global values may be plain old object addresses, TLS object
1972  // addresses, constant pool entries, or jump tables. How we generate
1973  // code for these may depend on small, medium, or large code model.
1974  CodeModel::Model CModel = TM.getCodeModel();
1975 
1976  // FIXME: Jump tables are not yet required because fast-isel doesn't
1977  // handle switches; if that changes, we need them as well. For now,
1978  // what follows assumes everything's a generic (or TLS) global address.
1979 
1980  // FIXME: We don't yet handle the complexity of TLS.
1981  if (GV->isThreadLocal())
1982  return 0;
1983 
1984  PPCFuncInfo->setUsesTOCBasePtr();
1985  // For small code model, generate a simple TOC load.
1986  if (CModel == CodeModel::Small)
1987  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
1988  DestReg)
1989  .addGlobalAddress(GV)
1990  .addReg(PPC::X2);
1991  else {
1992  // If the address is an externally defined symbol, a symbol with common
1993  // or externally available linkage, a non-local function address, or a
1994  // jump table address (not yet needed), or if we are generating code
1995  // for large code model, we generate:
1996  // LDtocL(GV, ADDIStocHA(%x2, GV))
1997  // Otherwise we generate:
1998  // ADDItocL(ADDIStocHA(%x2, GV), GV)
1999  // Either way, start with the ADDIStocHA:
2000  unsigned HighPartReg = createResultReg(RC);
2001  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
2002  HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2003 
2004  unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
2005  if (GVFlags & PPCII::MO_NLP_FLAG) {
2006  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
2007  DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2008  } else {
2009  // Otherwise generate the ADDItocL.
2010  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
2011  DestReg).addReg(HighPartReg).addGlobalAddress(GV);
2012  }
2013  }
2014 
2015  return DestReg;
2016 }
2017 
2018 // Materialize a 32-bit integer constant into a register, and return
2019 // the register number (or zero if we failed to handle it).
2020 unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2021  const TargetRegisterClass *RC) {
2022  unsigned Lo = Imm & 0xFFFF;
2023  unsigned Hi = (Imm >> 16) & 0xFFFF;
2024 
2025  unsigned ResultReg = createResultReg(RC);
2026  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2027 
2028  if (isInt<16>(Imm))
2029  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2030  TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2031  .addImm(Imm);
2032  else if (Lo) {
2033  // Both Lo and Hi have nonzero bits.
2034  unsigned TmpReg = createResultReg(RC);
2035  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2036  TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2037  .addImm(Hi);
2038  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2039  TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2040  .addReg(TmpReg).addImm(Lo);
2041  } else
2042  // Just Hi bits.
2043  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2044  TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2045  .addImm(Hi);
2046 
2047  return ResultReg;
2048 }
2049 
2050 // Materialize a 64-bit integer constant into a register, and return
2051 // the register number (or zero if we failed to handle it).
2052 unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2053  const TargetRegisterClass *RC) {
2054  unsigned Remainder = 0;
2055  unsigned Shift = 0;
2056 
2057  // If the value doesn't fit in 32 bits, see if we can shift it
2058  // so that it fits in 32 bits.
2059  if (!isInt<32>(Imm)) {
2060  Shift = countTrailingZeros<uint64_t>(Imm);
2061  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2062 
2063  if (isInt<32>(ImmSh))
2064  Imm = ImmSh;
2065  else {
2066  Remainder = Imm;
2067  Shift = 32;
2068  Imm >>= 32;
2069  }
2070  }
2071 
2072  // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2073  // (if not shifted).
2074  unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2075  if (!Shift)
2076  return TmpReg1;
2077 
2078  // If upper 32 bits were not zero, we've built them and need to shift
2079  // them into place.
2080  unsigned TmpReg2;
2081  if (Imm) {
2082  TmpReg2 = createResultReg(RC);
2083  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
2084  TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2085  } else
2086  TmpReg2 = TmpReg1;
2087 
2088  unsigned TmpReg3, Hi, Lo;
2089  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2090  TmpReg3 = createResultReg(RC);
2091  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
2092  TmpReg3).addReg(TmpReg2).addImm(Hi);
2093  } else
2094  TmpReg3 = TmpReg2;
2095 
2096  if ((Lo = Remainder & 0xFFFF)) {
2097  unsigned ResultReg = createResultReg(RC);
2098  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
2099  ResultReg).addReg(TmpReg3).addImm(Lo);
2100  return ResultReg;
2101  }
2102 
2103  return TmpReg3;
2104 }
2105 
2106 // Materialize an integer constant into a register, and return
2107 // the register number (or zero if we failed to handle it).
2108 unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2109  bool UseSExt) {
2110  // If we're using CR bit registers for i1 values, handle that as a special
2111  // case first.
2112  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2113  unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2114  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2115  TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2116  return ImmReg;
2117  }
2118 
2119  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2120  VT != MVT::i1)
2121  return 0;
2122 
2123  const TargetRegisterClass *RC =
2124  ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2125  int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2126 
2127  // If the constant is in range, use a load-immediate.
2128  // Since LI will sign extend the constant we need to make sure that for
2129  // our zeroext constants that the sign extended constant fits into 16-bits -
2130  // a range of 0..0x7fff.
2131  if (isInt<16>(Imm)) {
2132  unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2133  unsigned ImmReg = createResultReg(RC);
2134  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
2135  .addImm(Imm);
2136  return ImmReg;
2137  }
2138 
2139  // Construct the constant piecewise.
2140  if (VT == MVT::i64)
2141  return PPCMaterialize64BitInt(Imm, RC);
2142  else if (VT == MVT::i32)
2143  return PPCMaterialize32BitInt(Imm, RC);
2144 
2145  return 0;
2146 }
2147 
2148 // Materialize a constant into a register, and return the register
2149 // number (or zero if we failed to handle it).
2150 unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
2151  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2152 
2153  // Only handle simple types.
2154  if (!CEVT.isSimple()) return 0;
2155  MVT VT = CEVT.getSimpleVT();
2156 
2157  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2158  return PPCMaterializeFP(CFP, VT);
2159  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2160  return PPCMaterializeGV(GV, VT);
2161  else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2162  // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2163  // assumes that constant PHI operands will be zero extended, and failure to
2164  // match that assumption will cause problems if we sign extend here but
2165  // some user of a PHI is in a block for which we fall back to full SDAG
2166  // instruction selection.
2167  return PPCMaterializeInt(CI, VT, false);
2168 
2169  return 0;
2170 }
2171 
2172 // Materialize the address created by an alloca into a register, and
2173 // return the register number (or zero if we failed to handle it).
2174 unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2175  // Don't handle dynamic allocas.
2176  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
2177 
2178  MVT VT;
2179  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
2180 
2182  FuncInfo.StaticAllocaMap.find(AI);
2183 
2184  if (SI != FuncInfo.StaticAllocaMap.end()) {
2185  unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2186  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
2187  ResultReg).addFrameIndex(SI->second).addImm(0);
2188  return ResultReg;
2189  }
2190 
2191  return 0;
2192 }
2193 
2194 // Fold loads into extends when possible.
2195 // FIXME: We can have multiple redundant extend/trunc instructions
2196 // following a load. The folding only picks up one. Extend this
2197 // to check subsequent instructions for the same pattern and remove
2198 // them. Thus ResultReg should be the def reg for the last redundant
2199 // instruction in a chain, and all intervening instructions can be
2200 // removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2201 // to add ELF64-NOT: rldicl to the appropriate tests when this works.
2202 bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2203  const LoadInst *LI) {
2204  // Verify we have a legal type before going any further.
2205  MVT VT;
2206  if (!isLoadTypeLegal(LI->getType(), VT))
2207  return false;
2208 
2209  // Combine load followed by zero- or sign-extend.
2210  bool IsZExt = false;
2211  switch(MI->getOpcode()) {
2212  default:
2213  return false;
2214 
2215  case PPC::RLDICL:
2216  case PPC::RLDICL_32_64: {
2217  IsZExt = true;
2218  unsigned MB = MI->getOperand(3).getImm();
2219  if ((VT == MVT::i8 && MB <= 56) ||
2220  (VT == MVT::i16 && MB <= 48) ||
2221  (VT == MVT::i32 && MB <= 32))
2222  break;
2223  return false;
2224  }
2225 
2226  case PPC::RLWINM:
2227  case PPC::RLWINM8: {
2228  IsZExt = true;
2229  unsigned MB = MI->getOperand(3).getImm();
2230  if ((VT == MVT::i8 && MB <= 24) ||
2231  (VT == MVT::i16 && MB <= 16))
2232  break;
2233  return false;
2234  }
2235 
2236  case PPC::EXTSB:
2237  case PPC::EXTSB8:
2238  case PPC::EXTSB8_32_64:
2239  /* There is no sign-extending load-byte instruction. */
2240  return false;
2241 
2242  case PPC::EXTSH:
2243  case PPC::EXTSH8:
2244  case PPC::EXTSH8_32_64: {
2245  if (VT != MVT::i16 && VT != MVT::i8)
2246  return false;
2247  break;
2248  }
2249 
2250  case PPC::EXTSW:
2251  case PPC::EXTSW_32:
2252  case PPC::EXTSW_32_64: {
2253  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2254  return false;
2255  break;
2256  }
2257  }
2258 
2259  // See if we can handle this address.
2260  Address Addr;
2261  if (!PPCComputeAddress(LI->getOperand(0), Addr))
2262  return false;
2263 
2264  unsigned ResultReg = MI->getOperand(0).getReg();
2265 
2266  if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
2267  return false;
2268 
2269  MI->eraseFromParent();
2270  return true;
2271 }
2272 
2273 // Attempt to lower call arguments in a faster way than done by
2274 // the selection DAG code.
2275 bool PPCFastISel::fastLowerArguments() {
2276  // Defer to normal argument lowering for now. It's reasonably
2277  // efficient. Consider doing something like ARM to handle the
2278  // case where all args fit in registers, no varargs, no float
2279  // or vector args.
2280  return false;
2281 }
2282 
2283 // Handle materializing integer constants into a register. This is not
2284 // automatically generated for PowerPC, so must be explicitly created here.
2285 unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2286 
2287  if (Opc != ISD::Constant)
2288  return 0;
2289 
2290  // If we're using CR bit registers for i1 values, handle that as a special
2291  // case first.
2292  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2293  unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2294  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2295  TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2296  return ImmReg;
2297  }
2298 
2299  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2300  VT != MVT::i1)
2301  return 0;
2302 
2303  const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2304  &PPC::GPRCRegClass);
2305  if (VT == MVT::i64)
2306  return PPCMaterialize64BitInt(Imm, RC);
2307  else
2308  return PPCMaterialize32BitInt(Imm, RC);
2309 }
2310 
2311 // Override for ADDI and ADDI8 to set the correct register class
2312 // on RHS operand 0. The automatic infrastructure naively assumes
2313 // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2314 // for these cases. At the moment, none of the other automatically
2315 // generated RI instructions require special treatment. However, once
2316 // SelectSelect is implemented, "isel" requires similar handling.
2317 //
2318 // Also be conservative about the output register class. Avoid
2319 // assigning R0 or X0 to the output register for GPRC and G8RC
2320 // register classes, as any such result could be used in ADDI, etc.,
2321 // where those regs have another meaning.
2322 unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2323  const TargetRegisterClass *RC,
2324  unsigned Op0, bool Op0IsKill,
2325  uint64_t Imm) {
2326  if (MachineInstOpcode == PPC::ADDI)
2327  MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2328  else if (MachineInstOpcode == PPC::ADDI8)
2329  MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2330 
2331  const TargetRegisterClass *UseRC =
2332  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2333  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2334 
2335  return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
2336  Op0, Op0IsKill, Imm);
2337 }
2338 
2339 // Override for instructions with one register operand to avoid use of
2340 // R0/X0. The automatic infrastructure isn't aware of the context so
2341 // we must be conservative.
2342 unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2343  const TargetRegisterClass* RC,
2344  unsigned Op0, bool Op0IsKill) {
2345  const TargetRegisterClass *UseRC =
2346  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2347  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2348 
2349  return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
2350 }
2351 
2352 // Override for instructions with two register operands to avoid use
2353 // of R0/X0. The automatic infrastructure isn't aware of the context
2354 // so we must be conservative.
2355 unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2356  const TargetRegisterClass* RC,
2357  unsigned Op0, bool Op0IsKill,
2358  unsigned Op1, bool Op1IsKill) {
2359  const TargetRegisterClass *UseRC =
2360  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2361  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2362 
2363  return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
2364  Op1, Op1IsKill);
2365 }
2366 
2367 namespace llvm {
2368  // Create the fast instruction selector for PowerPC64 ELF.
2370  const TargetLibraryInfo *LibInfo) {
2371  // Only available on 64-bit ELF for now.
2372  const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2373  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
2374  return new PPCFastISel(FuncInfo, LibInfo);
2375  return nullptr;
2376  }
2377 }
uint64_t CallInst * C
Return a value (possibly void), from a function.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
void push_back(const T &Elt)
Definition: SmallVector.h:212
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:843
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isVector() const
Return true if this is a vector value type.
unsigned getReg() const
getReg - Returns the register number.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
unsigned less or equal
Definition: InstrTypes.h:879
unsigned less than
Definition: InstrTypes.h:878
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:859
unsigned getValNo() const
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:869
BasicBlock * getSuccessor(unsigned i) const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
void reserve(size_type N)
Definition: SmallVector.h:378
op_iterator op_begin()
Definition: User.h:214
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
bool needsCustom() const
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:864
unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Definition: FastISel.cpp:1899
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:511
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:863
A description of a memory reference used in the backend.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const HexagonInstrInfo * TII
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill)
Emit a MachineInstr with one register operand and a result register in the given register class...
Definition: FastISel.cpp:1826
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:860
SimpleValueType SimpleTy
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
LocInfo getLocInfo() const
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getSizeInBits() const
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:867
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1554
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:179
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:154
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
TargetInstrInfo - Interface to description of machine instruction set.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
unsigned const MachineRegisterInfo * MRI
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
MO_NLP_FLAG - If this bit is set, the symbol reference is actually to the non_lazy_ptr for the global...
Definition: PPC.h:87
Indirect Branch Instruction.
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
op_iterator op_end()
Definition: User.h:216
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:144
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:180
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:862
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
Class to represent integer types.
Definition: DerivedTypes.h:40
const MachineInstrBuilder & addFrameIndex(int Idx) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:870
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
Extended Value Type.
Definition: ValueTypes.h:34
static bool isAtomic(Instruction *I)
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:868
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:880
The memory access writes data.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:857
unsigned getNumOperands() const
Definition: User.h:176
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:179
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:867
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:882
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill)
Emit a MachineInstr with two register operands and a result register in the given register class...
Definition: FastISel.cpp:1847
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:599
int64_t getImm() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
signed less or equal
Definition: InstrTypes.h:883
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
This file defines the FastISel class.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Representation of each machine instruction.
Definition: MachineInstr.h:60
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:533
unsigned greater or equal
Definition: InstrTypes.h:877
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:861
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:865
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void GetReturnInfo(Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:856
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:866
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool isThreadLocal() const
If the value is "Thread Local", its value isn&#39;t shared by the threads.
Definition: GlobalValue.h:246
IRTranslator LLVM IR MI
unsigned greater than
Definition: InstrTypes.h:876
bool isSVR4ABI() const
Definition: PPCSubtarget.h:305
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getLocReg() const
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
static Optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:858
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:855
signed greater or equal
Definition: InstrTypes.h:881
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)