LLVM  6.0.0svn
PPCFastISel.cpp
Go to the documentation of this file.
1 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the PowerPC-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // PPCGenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "PPC.h"
18 #include "PPCCCState.h"
19 #include "PPCCallingConv.h"
20 #include "PPCISelLowering.h"
21 #include "PPCMachineFunctionInfo.h"
22 #include "PPCSubtarget.h"
23 #include "PPCTargetMachine.h"
24 #include "llvm/ADT/Optional.h"
26 #include "llvm/CodeGen/FastISel.h"
33 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/Support/Debug.h"
41 
42 //===----------------------------------------------------------------------===//
43 //
44 // TBD:
45 // fastLowerArguments: Handle simple cases.
46 // PPCMaterializeGV: Handle TLS.
47 // SelectCall: Handle function pointers.
48 // SelectCall: Handle multi-register return values.
49 // SelectCall: Optimize away nops for local calls.
50 // processCallArgs: Handle bit-converted arguments.
51 // finishCall: Handle multi-register return values.
52 // PPCComputeAddress: Handle parameter references as FrameIndex's.
53 // PPCEmitCmp: Handle immediate as operand 1.
54 // SelectCall: Handle small byval arguments.
55 // SelectIntrinsicCall: Implement.
56 // SelectSelect: Implement.
57 // Consider factoring isTypeLegal into the base class.
58 // Implement switches and jump tables.
59 //
60 //===----------------------------------------------------------------------===//
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "ppcfastisel"
64 
65 namespace {
66 
67 typedef struct Address {
68  enum {
69  RegBase,
70  FrameIndexBase
71  } BaseType;
72 
73  union {
74  unsigned Reg;
75  int FI;
76  } Base;
77 
78  long Offset;
79 
80  // Innocuous defaults for our address.
81  Address()
82  : BaseType(RegBase), Offset(0) {
83  Base.Reg = 0;
84  }
85 } Address;
86 
87 class PPCFastISel final : public FastISel {
88 
89  const TargetMachine &TM;
90  const PPCSubtarget *PPCSubTarget;
91  PPCFunctionInfo *PPCFuncInfo;
92  const TargetInstrInfo &TII;
93  const TargetLowering &TLI;
95 
96  public:
97  explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
98  const TargetLibraryInfo *LibInfo)
99  : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
100  PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
101  PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
102  TII(*PPCSubTarget->getInstrInfo()),
103  TLI(*PPCSubTarget->getTargetLowering()),
104  Context(&FuncInfo.Fn->getContext()) {}
105 
106  // Backend specific FastISel code.
107  private:
108  bool fastSelectInstruction(const Instruction *I) override;
109  unsigned fastMaterializeConstant(const Constant *C) override;
110  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
111  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
112  const LoadInst *LI) override;
113  bool fastLowerArguments() override;
114  unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
115  unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
116  const TargetRegisterClass *RC,
117  unsigned Op0, bool Op0IsKill,
118  uint64_t Imm);
119  unsigned fastEmitInst_r(unsigned MachineInstOpcode,
120  const TargetRegisterClass *RC,
121  unsigned Op0, bool Op0IsKill);
122  unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
123  const TargetRegisterClass *RC,
124  unsigned Op0, bool Op0IsKill,
125  unsigned Op1, bool Op1IsKill);
126 
127  bool fastLowerCall(CallLoweringInfo &CLI) override;
128 
129  // Instruction selection routines.
130  private:
131  bool SelectLoad(const Instruction *I);
132  bool SelectStore(const Instruction *I);
133  bool SelectBranch(const Instruction *I);
134  bool SelectIndirectBr(const Instruction *I);
135  bool SelectFPExt(const Instruction *I);
136  bool SelectFPTrunc(const Instruction *I);
137  bool SelectIToFP(const Instruction *I, bool IsSigned);
138  bool SelectFPToI(const Instruction *I, bool IsSigned);
139  bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
140  bool SelectRet(const Instruction *I);
141  bool SelectTrunc(const Instruction *I);
142  bool SelectIntExt(const Instruction *I);
143 
144  // Utility routines.
145  private:
146  bool isTypeLegal(Type *Ty, MVT &VT);
147  bool isLoadTypeLegal(Type *Ty, MVT &VT);
148  bool isValueAvailable(const Value *V) const;
149  bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
150  return RC->getID() == PPC::VSFRCRegClassID;
151  }
152  bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
153  return RC->getID() == PPC::VSSRCRegClassID;
154  }
155  bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
156  bool isZExt, unsigned DestReg);
157  bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
158  const TargetRegisterClass *RC, bool IsZExt = true,
159  unsigned FP64LoadOpc = PPC::LFD);
160  bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
161  bool PPCComputeAddress(const Value *Obj, Address &Addr);
162  void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
163  unsigned &IndexReg);
164  bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
165  unsigned DestReg, bool IsZExt);
166  unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
167  unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
168  unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
169  bool UseSExt = true);
170  unsigned PPCMaterialize32BitInt(int64_t Imm,
171  const TargetRegisterClass *RC);
172  unsigned PPCMaterialize64BitInt(int64_t Imm,
173  const TargetRegisterClass *RC);
174  unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
175  unsigned SrcReg, bool IsSigned);
176  unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
177 
178  // Call handling routines.
179  private:
180  bool processCallArgs(SmallVectorImpl<Value*> &Args,
181  SmallVectorImpl<unsigned> &ArgRegs,
182  SmallVectorImpl<MVT> &ArgVTs,
184  SmallVectorImpl<unsigned> &RegArgs,
185  CallingConv::ID CC,
186  unsigned &NumBytes,
187  bool IsVarArg);
188  bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
189  LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag);
190 
191  private:
192  #include "PPCGenFastISel.inc"
193 
194 };
195 
196 } // end anonymous namespace
197 
198 #include "PPCGenCallingConv.inc"
199 
200 // Function whose sole purpose is to kill compiler warnings
201 // stemming from unused functions included from PPCGenCallingConv.inc.
202 CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
203  if (Flag == 1)
204  return CC_PPC32_SVR4;
205  else if (Flag == 2)
206  return CC_PPC32_SVR4_ByVal;
207  else if (Flag == 3)
208  return CC_PPC32_SVR4_VarArg;
209  else
210  return RetCC_PPC;
211 }
212 
214  switch (Pred) {
215  // These are not representable with any single compare.
216  case CmpInst::FCMP_FALSE:
217  case CmpInst::FCMP_TRUE:
218  // Major concern about the following 6 cases is NaN result. The comparison
219  // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
220  // only one of which will be set. The result is generated by fcmpu
221  // instruction. However, bc instruction only inspects one of the first 3
222  // bits, so when un is set, bc instruction may jump to to an undesired
223  // place.
224  //
225  // More specifically, if we expect an unordered comparison and un is set, we
226  // expect to always go to true branch; in such case UEQ, UGT and ULT still
227  // give false, which are undesired; but UNE, UGE, ULE happen to give true,
228  // since they are tested by inspecting !eq, !lt, !gt, respectively.
229  //
230  // Similarly, for ordered comparison, when un is set, we always expect the
231  // result to be false. In such case OGT, OLT and OEQ is good, since they are
232  // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
233  // and ONE are tested through !lt, !gt and !eq, and these are true.
234  case CmpInst::FCMP_UEQ:
235  case CmpInst::FCMP_UGT:
236  case CmpInst::FCMP_ULT:
237  case CmpInst::FCMP_OGE:
238  case CmpInst::FCMP_OLE:
239  case CmpInst::FCMP_ONE:
240  default:
241  return Optional<PPC::Predicate>();
242 
243  case CmpInst::FCMP_OEQ:
244  case CmpInst::ICMP_EQ:
245  return PPC::PRED_EQ;
246 
247  case CmpInst::FCMP_OGT:
248  case CmpInst::ICMP_UGT:
249  case CmpInst::ICMP_SGT:
250  return PPC::PRED_GT;
251 
252  case CmpInst::FCMP_UGE:
253  case CmpInst::ICMP_UGE:
254  case CmpInst::ICMP_SGE:
255  return PPC::PRED_GE;
256 
257  case CmpInst::FCMP_OLT:
258  case CmpInst::ICMP_ULT:
259  case CmpInst::ICMP_SLT:
260  return PPC::PRED_LT;
261 
262  case CmpInst::FCMP_ULE:
263  case CmpInst::ICMP_ULE:
264  case CmpInst::ICMP_SLE:
265  return PPC::PRED_LE;
266 
267  case CmpInst::FCMP_UNE:
268  case CmpInst::ICMP_NE:
269  return PPC::PRED_NE;
270 
271  case CmpInst::FCMP_ORD:
272  return PPC::PRED_NU;
273 
274  case CmpInst::FCMP_UNO:
275  return PPC::PRED_UN;
276  }
277 }
278 
279 // Determine whether the type Ty is simple enough to be handled by
280 // fast-isel, and return its equivalent machine type in VT.
281 // FIXME: Copied directly from ARM -- factor into base class?
282 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
283  EVT Evt = TLI.getValueType(DL, Ty, true);
284 
285  // Only handle simple types.
286  if (Evt == MVT::Other || !Evt.isSimple()) return false;
287  VT = Evt.getSimpleVT();
288 
289  // Handle all legal types, i.e. a register that will directly hold this
290  // value.
291  return TLI.isTypeLegal(VT);
292 }
293 
294 // Determine whether the type Ty is simple enough to be handled by
295 // fast-isel as a load target, and return its equivalent machine type in VT.
296 bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
297  if (isTypeLegal(Ty, VT)) return true;
298 
299  // If this is a type than can be sign or zero-extended to a basic operation
300  // go ahead and accept it now.
301  if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
302  return true;
303  }
304 
305  return false;
306 }
307 
308 bool PPCFastISel::isValueAvailable(const Value *V) const {
309  if (!isa<Instruction>(V))
310  return true;
311 
312  const auto *I = cast<Instruction>(V);
313  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
314 }
315 
316 // Given a value Obj, create an Address object Addr that represents its
317 // address. Return false if we can't handle it.
318 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
319  const User *U = nullptr;
320  unsigned Opcode = Instruction::UserOp1;
321  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
322  // Don't walk into other basic blocks unless the object is an alloca from
323  // another block, otherwise it may not have a virtual register assigned.
324  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
325  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
326  Opcode = I->getOpcode();
327  U = I;
328  }
329  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
330  Opcode = C->getOpcode();
331  U = C;
332  }
333 
334  switch (Opcode) {
335  default:
336  break;
337  case Instruction::BitCast:
338  // Look through bitcasts.
339  return PPCComputeAddress(U->getOperand(0), Addr);
340  case Instruction::IntToPtr:
341  // Look past no-op inttoptrs.
342  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
343  TLI.getPointerTy(DL))
344  return PPCComputeAddress(U->getOperand(0), Addr);
345  break;
346  case Instruction::PtrToInt:
347  // Look past no-op ptrtoints.
348  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
349  return PPCComputeAddress(U->getOperand(0), Addr);
350  break;
351  case Instruction::GetElementPtr: {
352  Address SavedAddr = Addr;
353  long TmpOffset = Addr.Offset;
354 
355  // Iterate through the GEP folding the constants into offsets where
356  // we can.
358  for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
359  II != IE; ++II, ++GTI) {
360  const Value *Op = *II;
361  if (StructType *STy = GTI.getStructTypeOrNull()) {
362  const StructLayout *SL = DL.getStructLayout(STy);
363  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
364  TmpOffset += SL->getElementOffset(Idx);
365  } else {
366  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
367  for (;;) {
368  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
369  // Constant-offset addressing.
370  TmpOffset += CI->getSExtValue() * S;
371  break;
372  }
373  if (canFoldAddIntoGEP(U, Op)) {
374  // A compatible add with a constant operand. Fold the constant.
375  ConstantInt *CI =
376  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
377  TmpOffset += CI->getSExtValue() * S;
378  // Iterate on the other operand.
379  Op = cast<AddOperator>(Op)->getOperand(0);
380  continue;
381  }
382  // Unsupported
383  goto unsupported_gep;
384  }
385  }
386  }
387 
388  // Try to grab the base operand now.
389  Addr.Offset = TmpOffset;
390  if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
391 
392  // We failed, restore everything and try the other options.
393  Addr = SavedAddr;
394 
395  unsupported_gep:
396  break;
397  }
398  case Instruction::Alloca: {
399  const AllocaInst *AI = cast<AllocaInst>(Obj);
401  FuncInfo.StaticAllocaMap.find(AI);
402  if (SI != FuncInfo.StaticAllocaMap.end()) {
403  Addr.BaseType = Address::FrameIndexBase;
404  Addr.Base.FI = SI->second;
405  return true;
406  }
407  break;
408  }
409  }
410 
411  // FIXME: References to parameters fall through to the behavior
412  // below. They should be able to reference a frame index since
413  // they are stored to the stack, so we can get "ld rx, offset(r1)"
414  // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
415  // just contain the parameter. Try to handle this with a FI.
416 
417  // Try to get this in a register if nothing else has worked.
418  if (Addr.Base.Reg == 0)
419  Addr.Base.Reg = getRegForValue(Obj);
420 
421  // Prevent assignment of base register to X0, which is inappropriate
422  // for loads and stores alike.
423  if (Addr.Base.Reg != 0)
424  MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
425 
426  return Addr.Base.Reg != 0;
427 }
428 
429 // Fix up some addresses that can't be used directly. For example, if
430 // an offset won't fit in an instruction field, we may need to move it
431 // into an index register.
432 void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
433  unsigned &IndexReg) {
434 
435  // Check whether the offset fits in the instruction field.
436  if (!isInt<16>(Addr.Offset))
437  UseOffset = false;
438 
439  // If this is a stack pointer and the offset needs to be simplified then
440  // put the alloca address into a register, set the base type back to
441  // register and continue. This should almost never happen.
442  if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
443  unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
444  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
445  ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
446  Addr.Base.Reg = ResultReg;
447  Addr.BaseType = Address::RegBase;
448  }
449 
450  if (!UseOffset) {
451  IntegerType *OffsetTy = Type::getInt64Ty(*Context);
452  const ConstantInt *Offset =
453  ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
454  IndexReg = PPCMaterializeInt(Offset, MVT::i64);
455  assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
456  }
457 }
458 
459 // Emit a load instruction if possible, returning true if we succeeded,
460 // otherwise false. See commentary below for how the register class of
461 // the load is determined.
462 bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
463  const TargetRegisterClass *RC,
464  bool IsZExt, unsigned FP64LoadOpc) {
465  unsigned Opc;
466  bool UseOffset = true;
467 
468  // If ResultReg is given, it determines the register class of the load.
469  // Otherwise, RC is the register class to use. If the result of the
470  // load isn't anticipated in this block, both may be zero, in which
471  // case we must make a conservative guess. In particular, don't assign
472  // R0 or X0 to the result register, as the result may be used in a load,
473  // store, add-immediate, or isel that won't permit this. (Though
474  // perhaps the spill and reload of live-exit values would handle this?)
475  const TargetRegisterClass *UseRC =
476  (ResultReg ? MRI.getRegClass(ResultReg) :
477  (RC ? RC :
478  (VT == MVT::f64 ? &PPC::F8RCRegClass :
479  (VT == MVT::f32 ? &PPC::F4RCRegClass :
480  (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
481  &PPC::GPRC_and_GPRC_NOR0RegClass)))));
482 
483  bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
484 
485  switch (VT.SimpleTy) {
486  default: // e.g., vector types not handled
487  return false;
488  case MVT::i8:
489  Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
490  break;
491  case MVT::i16:
492  Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
493  : (Is32BitInt ? PPC::LHA : PPC::LHA8));
494  break;
495  case MVT::i32:
496  Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
497  : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
498  if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
499  UseOffset = false;
500  break;
501  case MVT::i64:
502  Opc = PPC::LD;
503  assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
504  "64-bit load with 32-bit target??");
505  UseOffset = ((Addr.Offset & 3) == 0);
506  break;
507  case MVT::f32:
508  Opc = PPC::LFS;
509  break;
510  case MVT::f64:
511  Opc = FP64LoadOpc;
512  break;
513  }
514 
515  // If necessary, materialize the offset into a register and use
516  // the indexed form. Also handle stack pointers with special needs.
517  unsigned IndexReg = 0;
518  PPCSimplifyAddress(Addr, UseOffset, IndexReg);
519 
520  // If this is a potential VSX load with an offset of 0, a VSX indexed load can
521  // be used.
522  bool IsVSSRC = isVSSRCRegClass(UseRC);
523  bool IsVSFRC = isVSFRCRegClass(UseRC);
524  bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
525  bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
526  if ((Is32VSXLoad || Is64VSXLoad) &&
527  (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
528  (Addr.Offset == 0)) {
529  UseOffset = false;
530  }
531 
532  if (ResultReg == 0)
533  ResultReg = createResultReg(UseRC);
534 
535  // Note: If we still have a frame index here, we know the offset is
536  // in range, as otherwise PPCSimplifyAddress would have converted it
537  // into a RegBase.
538  if (Addr.BaseType == Address::FrameIndexBase) {
539  // VSX only provides an indexed load.
540  if (Is32VSXLoad || Is64VSXLoad) return false;
541 
542  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
543  MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
544  Addr.Offset),
545  MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
546  MFI.getObjectAlignment(Addr.Base.FI));
547 
548  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
549  .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
550 
551  // Base reg with offset in range.
552  } else if (UseOffset) {
553  // VSX only provides an indexed load.
554  if (Is32VSXLoad || Is64VSXLoad) return false;
555 
556  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
557  .addImm(Addr.Offset).addReg(Addr.Base.Reg);
558 
559  // Indexed form.
560  } else {
561  // Get the RR opcode corresponding to the RI one. FIXME: It would be
562  // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
563  // is hard to get at.
564  switch (Opc) {
565  default: llvm_unreachable("Unexpected opcode!");
566  case PPC::LBZ: Opc = PPC::LBZX; break;
567  case PPC::LBZ8: Opc = PPC::LBZX8; break;
568  case PPC::LHZ: Opc = PPC::LHZX; break;
569  case PPC::LHZ8: Opc = PPC::LHZX8; break;
570  case PPC::LHA: Opc = PPC::LHAX; break;
571  case PPC::LHA8: Opc = PPC::LHAX8; break;
572  case PPC::LWZ: Opc = PPC::LWZX; break;
573  case PPC::LWZ8: Opc = PPC::LWZX8; break;
574  case PPC::LWA: Opc = PPC::LWAX; break;
575  case PPC::LWA_32: Opc = PPC::LWAX_32; break;
576  case PPC::LD: Opc = PPC::LDX; break;
577  case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
578  case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
579  }
580 
581  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
582  ResultReg);
583 
584  // If we have an index register defined we use it in the store inst,
585  // otherwise we use X0 as base as it makes the vector instructions to
586  // use zero in the computation of the effective address regardless the
587  // content of the register.
588  if (IndexReg)
589  MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
590  else
591  MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
592  }
593 
594  return true;
595 }
596 
597 // Attempt to fast-select a load instruction.
598 bool PPCFastISel::SelectLoad(const Instruction *I) {
599  // FIXME: No atomic loads are supported.
600  if (cast<LoadInst>(I)->isAtomic())
601  return false;
602 
603  // Verify we have a legal type before going any further.
604  MVT VT;
605  if (!isLoadTypeLegal(I->getType(), VT))
606  return false;
607 
608  // See if we can handle this address.
609  Address Addr;
610  if (!PPCComputeAddress(I->getOperand(0), Addr))
611  return false;
612 
613  // Look at the currently assigned register for this instruction
614  // to determine the required register class. This is necessary
615  // to constrain RA from using R0/X0 when this is not legal.
616  unsigned AssignedReg = FuncInfo.ValueMap[I];
617  const TargetRegisterClass *RC =
618  AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
619 
620  unsigned ResultReg = 0;
621  if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
622  return false;
623  updateValueMap(I, ResultReg);
624  return true;
625 }
626 
627 // Emit a store instruction to store SrcReg at Addr.
628 bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
629  assert(SrcReg && "Nothing to store!");
630  unsigned Opc;
631  bool UseOffset = true;
632 
633  const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
634  bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
635 
636  switch (VT.SimpleTy) {
637  default: // e.g., vector types not handled
638  return false;
639  case MVT::i8:
640  Opc = Is32BitInt ? PPC::STB : PPC::STB8;
641  break;
642  case MVT::i16:
643  Opc = Is32BitInt ? PPC::STH : PPC::STH8;
644  break;
645  case MVT::i32:
646  assert(Is32BitInt && "Not GPRC for i32??");
647  Opc = PPC::STW;
648  break;
649  case MVT::i64:
650  Opc = PPC::STD;
651  UseOffset = ((Addr.Offset & 3) == 0);
652  break;
653  case MVT::f32:
654  Opc = PPC::STFS;
655  break;
656  case MVT::f64:
657  Opc = PPC::STFD;
658  break;
659  }
660 
661  // If necessary, materialize the offset into a register and use
662  // the indexed form. Also handle stack pointers with special needs.
663  unsigned IndexReg = 0;
664  PPCSimplifyAddress(Addr, UseOffset, IndexReg);
665 
666  // If this is a potential VSX store with an offset of 0, a VSX indexed store
667  // can be used.
668  bool IsVSSRC = isVSSRCRegClass(RC);
669  bool IsVSFRC = isVSFRCRegClass(RC);
670  bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
671  bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
672  if ((Is32VSXStore || Is64VSXStore) &&
673  (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
674  (Addr.Offset == 0)) {
675  UseOffset = false;
676  }
677 
678  // Note: If we still have a frame index here, we know the offset is
679  // in range, as otherwise PPCSimplifyAddress would have converted it
680  // into a RegBase.
681  if (Addr.BaseType == Address::FrameIndexBase) {
682  // VSX only provides an indexed store.
683  if (Is32VSXStore || Is64VSXStore) return false;
684 
685  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
686  MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
687  Addr.Offset),
688  MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
689  MFI.getObjectAlignment(Addr.Base.FI));
690 
691  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
692  .addReg(SrcReg)
693  .addImm(Addr.Offset)
694  .addFrameIndex(Addr.Base.FI)
695  .addMemOperand(MMO);
696 
697  // Base reg with offset in range.
698  } else if (UseOffset) {
699  // VSX only provides an indexed store.
700  if (Is32VSXStore || Is64VSXStore)
701  return false;
702 
703  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
704  .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
705 
706  // Indexed form.
707  } else {
708  // Get the RR opcode corresponding to the RI one. FIXME: It would be
709  // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
710  // is hard to get at.
711  switch (Opc) {
712  default: llvm_unreachable("Unexpected opcode!");
713  case PPC::STB: Opc = PPC::STBX; break;
714  case PPC::STH : Opc = PPC::STHX; break;
715  case PPC::STW : Opc = PPC::STWX; break;
716  case PPC::STB8: Opc = PPC::STBX8; break;
717  case PPC::STH8: Opc = PPC::STHX8; break;
718  case PPC::STW8: Opc = PPC::STWX8; break;
719  case PPC::STD: Opc = PPC::STDX; break;
720  case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
721  case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
722  }
723 
724  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
725  .addReg(SrcReg);
726 
727  // If we have an index register defined we use it in the store inst,
728  // otherwise we use X0 as base as it makes the vector instructions to
729  // use zero in the computation of the effective address regardless the
730  // content of the register.
731  if (IndexReg)
732  MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
733  else
734  MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
735  }
736 
737  return true;
738 }
739 
740 // Attempt to fast-select a store instruction.
741 bool PPCFastISel::SelectStore(const Instruction *I) {
742  Value *Op0 = I->getOperand(0);
743  unsigned SrcReg = 0;
744 
745  // FIXME: No atomics loads are supported.
746  if (cast<StoreInst>(I)->isAtomic())
747  return false;
748 
749  // Verify we have a legal type before going any further.
750  MVT VT;
751  if (!isLoadTypeLegal(Op0->getType(), VT))
752  return false;
753 
754  // Get the value to be stored into a register.
755  SrcReg = getRegForValue(Op0);
756  if (SrcReg == 0)
757  return false;
758 
759  // See if we can handle this address.
760  Address Addr;
761  if (!PPCComputeAddress(I->getOperand(1), Addr))
762  return false;
763 
764  if (!PPCEmitStore(VT, SrcReg, Addr))
765  return false;
766 
767  return true;
768 }
769 
770 // Attempt to fast-select a branch instruction.
771 bool PPCFastISel::SelectBranch(const Instruction *I) {
772  const BranchInst *BI = cast<BranchInst>(I);
773  MachineBasicBlock *BrBB = FuncInfo.MBB;
774  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
775  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
776 
777  // For now, just try the simplest case where it's fed by a compare.
778  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
779  if (isValueAvailable(CI)) {
780  Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
781  if (!OptPPCPred)
782  return false;
783 
784  PPC::Predicate PPCPred = OptPPCPred.getValue();
785 
786  // Take advantage of fall-through opportunities.
787  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
788  std::swap(TBB, FBB);
789  PPCPred = PPC::InvertPredicate(PPCPred);
790  }
791 
792  unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
793 
794  if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
795  CondReg))
796  return false;
797 
798  BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
799  .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
800  finishCondBranch(BI->getParent(), TBB, FBB);
801  return true;
802  }
803  } else if (const ConstantInt *CI =
804  dyn_cast<ConstantInt>(BI->getCondition())) {
805  uint64_t Imm = CI->getZExtValue();
806  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
807  fastEmitBranch(Target, DbgLoc);
808  return true;
809  }
810 
811  // FIXME: ARM looks for a case where the block containing the compare
812  // has been split from the block containing the branch. If this happens,
813  // there is a vreg available containing the result of the compare. I'm
814  // not sure we can do much, as we've lost the predicate information with
815  // the compare instruction -- we have a 4-bit CR but don't know which bit
816  // to test here.
817  return false;
818 }
819 
820 // Attempt to emit a compare of the two source values. Signed and unsigned
821 // comparisons are supported. Return false if we can't handle it.
822 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
823  bool IsZExt, unsigned DestReg) {
824  Type *Ty = SrcValue1->getType();
825  EVT SrcEVT = TLI.getValueType(DL, Ty, true);
826  if (!SrcEVT.isSimple())
827  return false;
828  MVT SrcVT = SrcEVT.getSimpleVT();
829 
830  if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
831  return false;
832 
833  // See if operand 2 is an immediate encodeable in the compare.
834  // FIXME: Operands are not in canonical order at -O0, so an immediate
835  // operand in position 1 is a lost opportunity for now. We are
836  // similar to ARM in this regard.
837  long Imm = 0;
838  bool UseImm = false;
839 
840  // Only 16-bit integer constants can be represented in compares for
841  // PowerPC. Others will be materialized into a register.
842  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
843  if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
844  SrcVT == MVT::i8 || SrcVT == MVT::i1) {
845  const APInt &CIVal = ConstInt->getValue();
846  Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
847  if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
848  UseImm = true;
849  }
850  }
851 
852  unsigned CmpOpc;
853  bool NeedsExt = false;
854  switch (SrcVT.SimpleTy) {
855  default: return false;
856  case MVT::f32:
857  CmpOpc = PPC::FCMPUS;
858  break;
859  case MVT::f64:
860  CmpOpc = PPC::FCMPUD;
861  break;
862  case MVT::i1:
863  case MVT::i8:
864  case MVT::i16:
865  NeedsExt = true;
866  // Intentional fall-through.
867  case MVT::i32:
868  if (!UseImm)
869  CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
870  else
871  CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
872  break;
873  case MVT::i64:
874  if (!UseImm)
875  CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
876  else
877  CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
878  break;
879  }
880 
881  unsigned SrcReg1 = getRegForValue(SrcValue1);
882  if (SrcReg1 == 0)
883  return false;
884 
885  unsigned SrcReg2 = 0;
886  if (!UseImm) {
887  SrcReg2 = getRegForValue(SrcValue2);
888  if (SrcReg2 == 0)
889  return false;
890  }
891 
892  if (NeedsExt) {
893  unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
894  if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
895  return false;
896  SrcReg1 = ExtReg;
897 
898  if (!UseImm) {
899  unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
900  if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
901  return false;
902  SrcReg2 = ExtReg;
903  }
904  }
905 
906  if (!UseImm)
907  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
908  .addReg(SrcReg1).addReg(SrcReg2);
909  else
910  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
911  .addReg(SrcReg1).addImm(Imm);
912 
913  return true;
914 }
915 
916 // Attempt to fast-select a floating-point extend instruction.
917 bool PPCFastISel::SelectFPExt(const Instruction *I) {
918  Value *Src = I->getOperand(0);
919  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
920  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
921 
922  if (SrcVT != MVT::f32 || DestVT != MVT::f64)
923  return false;
924 
925  unsigned SrcReg = getRegForValue(Src);
926  if (!SrcReg)
927  return false;
928 
929  // No code is generated for a FP extend.
930  updateValueMap(I, SrcReg);
931  return true;
932 }
933 
934 // Attempt to fast-select a floating-point truncate instruction.
935 bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
936  Value *Src = I->getOperand(0);
937  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
938  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
939 
940  if (SrcVT != MVT::f64 || DestVT != MVT::f32)
941  return false;
942 
943  unsigned SrcReg = getRegForValue(Src);
944  if (!SrcReg)
945  return false;
946 
947  // Round the result to single precision.
948  unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
949  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
950  .addReg(SrcReg);
951 
952  updateValueMap(I, DestReg);
953  return true;
954 }
955 
956 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
957 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
958 // those should be used instead of moving via a stack slot when the
959 // subtarget permits.
960 // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
961 // stack slot and 4-byte store/load sequence. Or just sext the 4-byte
962 // case to 8 bytes which produces tighter code but wastes stack space.
963 unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
964  bool IsSigned) {
965 
966  // If necessary, extend 32-bit int to 64-bit.
967  if (SrcVT == MVT::i32) {
968  unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
969  if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
970  return 0;
971  SrcReg = TmpReg;
972  }
973 
974  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
975  Address Addr;
976  Addr.BaseType = Address::FrameIndexBase;
977  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
978 
979  // Store the value from the GPR.
980  if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
981  return 0;
982 
983  // Load the integer value into an FPR. The kind of load used depends
984  // on a number of conditions.
985  unsigned LoadOpc = PPC::LFD;
986 
987  if (SrcVT == MVT::i32) {
988  if (!IsSigned) {
989  LoadOpc = PPC::LFIWZX;
990  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
991  } else if (PPCSubTarget->hasLFIWAX()) {
992  LoadOpc = PPC::LFIWAX;
993  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
994  }
995  }
996 
997  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
998  unsigned ResultReg = 0;
999  if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1000  return 0;
1001 
1002  return ResultReg;
1003 }
1004 
1005 // Attempt to fast-select an integer-to-floating-point conversion.
1006 // FIXME: Once fast-isel has better support for VSX, conversions using
1007 // direct moves should be implemented.
1008 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1009  MVT DstVT;
1010  Type *DstTy = I->getType();
1011  if (!isTypeLegal(DstTy, DstVT))
1012  return false;
1013 
1014  if (DstVT != MVT::f32 && DstVT != MVT::f64)
1015  return false;
1016 
1017  Value *Src = I->getOperand(0);
1018  EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1019  if (!SrcEVT.isSimple())
1020  return false;
1021 
1022  MVT SrcVT = SrcEVT.getSimpleVT();
1023 
1024  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1025  SrcVT != MVT::i32 && SrcVT != MVT::i64)
1026  return false;
1027 
1028  unsigned SrcReg = getRegForValue(Src);
1029  if (SrcReg == 0)
1030  return false;
1031 
1032  // We can only lower an unsigned convert if we have the newer
1033  // floating-point conversion operations.
1034  if (!IsSigned && !PPCSubTarget->hasFPCVT())
1035  return false;
1036 
1037  // FIXME: For now we require the newer floating-point conversion operations
1038  // (which are present only on P7 and A2 server models) when converting
1039  // to single-precision float. Otherwise we have to generate a lot of
1040  // fiddly code to avoid double rounding. If necessary, the fiddly code
1041  // can be found in PPCTargetLowering::LowerINT_TO_FP().
1042  if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
1043  return false;
1044 
1045  // Extend the input if necessary.
1046  if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1047  unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
1048  if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1049  return false;
1050  SrcVT = MVT::i64;
1051  SrcReg = TmpReg;
1052  }
1053 
1054  // Move the integer value to an FPR.
1055  unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1056  if (FPReg == 0)
1057  return false;
1058 
1059  // Determine the opcode for the conversion.
1060  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1061  unsigned DestReg = createResultReg(RC);
1062  unsigned Opc;
1063 
1064  if (DstVT == MVT::f32)
1065  Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1066  else
1067  Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1068 
1069  // Generate the convert.
1070  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1071  .addReg(FPReg);
1072 
1073  updateValueMap(I, DestReg);
1074  return true;
1075 }
1076 
1077 // Move the floating-point value in SrcReg into an integer destination
1078 // register, and return the register (or zero if we can't handle it).
1079 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
1080 // those should be used instead of moving via a stack slot when the
1081 // subtarget permits.
1082 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1083  unsigned SrcReg, bool IsSigned) {
1084  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1085  // Note that if have STFIWX available, we could use a 4-byte stack
1086  // slot for i32, but this being fast-isel we'll just go with the
1087  // easiest code gen possible.
1088  Address Addr;
1089  Addr.BaseType = Address::FrameIndexBase;
1090  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
1091 
1092  // Store the value from the FPR.
1093  if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1094  return 0;
1095 
1096  // Reload it into a GPR. If we want an i32 on big endian, modify the
1097  // address to have a 4-byte offset so we load from the right place.
1098  if (VT == MVT::i32)
1099  Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
1100 
1101  // Look at the currently assigned register for this instruction
1102  // to determine the required register class.
1103  unsigned AssignedReg = FuncInfo.ValueMap[I];
1104  const TargetRegisterClass *RC =
1105  AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1106 
1107  unsigned ResultReg = 0;
1108  if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1109  return 0;
1110 
1111  return ResultReg;
1112 }
1113 
1114 // Attempt to fast-select a floating-point-to-integer conversion.
1115 // FIXME: Once fast-isel has better support for VSX, conversions using
1116 // direct moves should be implemented.
1117 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1118  MVT DstVT, SrcVT;
1119  Type *DstTy = I->getType();
1120  if (!isTypeLegal(DstTy, DstVT))
1121  return false;
1122 
1123  if (DstVT != MVT::i32 && DstVT != MVT::i64)
1124  return false;
1125 
1126  // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
1127  if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
1128  return false;
1129 
1130  Value *Src = I->getOperand(0);
1131  Type *SrcTy = Src->getType();
1132  if (!isTypeLegal(SrcTy, SrcVT))
1133  return false;
1134 
1135  if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1136  return false;
1137 
1138  unsigned SrcReg = getRegForValue(Src);
1139  if (SrcReg == 0)
1140  return false;
1141 
1142  // Convert f32 to f64 if necessary. This is just a meaningless copy
1143  // to get the register class right.
1144  const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1145  if (InRC == &PPC::F4RCRegClass) {
1146  unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
1147  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1148  TII.get(TargetOpcode::COPY), TmpReg)
1149  .addReg(SrcReg);
1150  SrcReg = TmpReg;
1151  }
1152 
1153  // Determine the opcode for the conversion, which takes place
1154  // entirely within FPRs.
1155  unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
1156  unsigned Opc;
1157 
1158  if (DstVT == MVT::i32)
1159  if (IsSigned)
1160  Opc = PPC::FCTIWZ;
1161  else
1162  Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1163  else
1164  Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1165 
1166  // Generate the convert.
1167  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1168  .addReg(SrcReg);
1169 
1170  // Now move the integer value from a float register to an integer register.
1171  unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1172  if (IntReg == 0)
1173  return false;
1174 
1175  updateValueMap(I, IntReg);
1176  return true;
1177 }
1178 
1179 // Attempt to fast-select a binary integer operation that isn't already
1180 // handled automatically.
1181 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1182  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1183 
1184  // We can get here in the case when we have a binary operation on a non-legal
1185  // type and the target independent selector doesn't know how to handle it.
1186  if (DestVT != MVT::i16 && DestVT != MVT::i8)
1187  return false;
1188 
1189  // Look at the currently assigned register for this instruction
1190  // to determine the required register class. If there is no register,
1191  // make a conservative choice (don't assign R0).
1192  unsigned AssignedReg = FuncInfo.ValueMap[I];
1193  const TargetRegisterClass *RC =
1194  (AssignedReg ? MRI.getRegClass(AssignedReg) :
1195  &PPC::GPRC_and_GPRC_NOR0RegClass);
1196  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1197 
1198  unsigned Opc;
1199  switch (ISDOpcode) {
1200  default: return false;
1201  case ISD::ADD:
1202  Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1203  break;
1204  case ISD::OR:
1205  Opc = IsGPRC ? PPC::OR : PPC::OR8;
1206  break;
1207  case ISD::SUB:
1208  Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1209  break;
1210  }
1211 
1212  unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1213  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1214  if (SrcReg1 == 0) return false;
1215 
1216  // Handle case of small immediate operand.
1217  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1218  const APInt &CIVal = ConstInt->getValue();
1219  int Imm = (int)CIVal.getSExtValue();
1220  bool UseImm = true;
1221  if (isInt<16>(Imm)) {
1222  switch (Opc) {
1223  default:
1224  llvm_unreachable("Missing case!");
1225  case PPC::ADD4:
1226  Opc = PPC::ADDI;
1227  MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1228  break;
1229  case PPC::ADD8:
1230  Opc = PPC::ADDI8;
1231  MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1232  break;
1233  case PPC::OR:
1234  Opc = PPC::ORI;
1235  break;
1236  case PPC::OR8:
1237  Opc = PPC::ORI8;
1238  break;
1239  case PPC::SUBF:
1240  if (Imm == -32768)
1241  UseImm = false;
1242  else {
1243  Opc = PPC::ADDI;
1244  MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1245  Imm = -Imm;
1246  }
1247  break;
1248  case PPC::SUBF8:
1249  if (Imm == -32768)
1250  UseImm = false;
1251  else {
1252  Opc = PPC::ADDI8;
1253  MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1254  Imm = -Imm;
1255  }
1256  break;
1257  }
1258 
1259  if (UseImm) {
1260  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1261  ResultReg)
1262  .addReg(SrcReg1)
1263  .addImm(Imm);
1264  updateValueMap(I, ResultReg);
1265  return true;
1266  }
1267  }
1268  }
1269 
1270  // Reg-reg case.
1271  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1272  if (SrcReg2 == 0) return false;
1273 
1274  // Reverse operands for subtract-from.
1275  if (ISDOpcode == ISD::SUB)
1276  std::swap(SrcReg1, SrcReg2);
1277 
1278  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1279  .addReg(SrcReg1).addReg(SrcReg2);
1280  updateValueMap(I, ResultReg);
1281  return true;
1282 }
1283 
1284 // Handle arguments to a call that we're attempting to fast-select.
1285 // Return false if the arguments are too complex for us at the moment.
1286 bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1287  SmallVectorImpl<unsigned> &ArgRegs,
1288  SmallVectorImpl<MVT> &ArgVTs,
1290  SmallVectorImpl<unsigned> &RegArgs,
1291  CallingConv::ID CC,
1292  unsigned &NumBytes,
1293  bool IsVarArg) {
1295  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1296 
1297  // Reserve space for the linkage area on the stack.
1298  unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
1299  CCInfo.AllocateStack(LinkageSize, 8);
1300 
1301  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
1302 
1303  // Bail out if we can't handle any of the arguments.
1304  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1305  CCValAssign &VA = ArgLocs[I];
1306  MVT ArgVT = ArgVTs[VA.getValNo()];
1307 
1308  // Skip vector arguments for now, as well as long double and
1309  // uint128_t, and anything that isn't passed in a register.
1310  if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1311  !VA.isRegLoc() || VA.needsCustom())
1312  return false;
1313 
1314  // Skip bit-converted arguments for now.
1315  if (VA.getLocInfo() == CCValAssign::BCvt)
1316  return false;
1317  }
1318 
1319  // Get a count of how many bytes are to be pushed onto the stack.
1320  NumBytes = CCInfo.getNextStackOffset();
1321 
1322  // The prolog code of the callee may store up to 8 GPR argument registers to
1323  // the stack, allowing va_start to index over them in memory if its varargs.
1324  // Because we cannot tell if this is needed on the caller side, we have to
1325  // conservatively assume that it is needed. As such, make sure we have at
1326  // least enough stack space for the caller to store the 8 GPRs.
1327  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1328  NumBytes = std::max(NumBytes, LinkageSize + 64);
1329 
1330  // Issue CALLSEQ_START.
1331  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1332  TII.get(TII.getCallFrameSetupOpcode()))
1333  .addImm(NumBytes).addImm(0);
1334 
1335  // Prepare to assign register arguments. Every argument uses up a
1336  // GPR protocol register even if it's passed in a floating-point
1337  // register (unless we're using the fast calling convention).
1338  unsigned NextGPR = PPC::X3;
1339  unsigned NextFPR = PPC::F1;
1340 
1341  // Process arguments.
1342  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1343  CCValAssign &VA = ArgLocs[I];
1344  unsigned Arg = ArgRegs[VA.getValNo()];
1345  MVT ArgVT = ArgVTs[VA.getValNo()];
1346 
1347  // Handle argument promotion and bitcasts.
1348  switch (VA.getLocInfo()) {
1349  default:
1350  llvm_unreachable("Unknown loc info!");
1351  case CCValAssign::Full:
1352  break;
1353  case CCValAssign::SExt: {
1354  MVT DestVT = VA.getLocVT();
1355  const TargetRegisterClass *RC =
1356  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1357  unsigned TmpReg = createResultReg(RC);
1358  if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1359  llvm_unreachable("Failed to emit a sext!");
1360  ArgVT = DestVT;
1361  Arg = TmpReg;
1362  break;
1363  }
1364  case CCValAssign::AExt:
1365  case CCValAssign::ZExt: {
1366  MVT DestVT = VA.getLocVT();
1367  const TargetRegisterClass *RC =
1368  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1369  unsigned TmpReg = createResultReg(RC);
1370  if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1371  llvm_unreachable("Failed to emit a zext!");
1372  ArgVT = DestVT;
1373  Arg = TmpReg;
1374  break;
1375  }
1376  case CCValAssign::BCvt: {
1377  // FIXME: Not yet handled.
1378  llvm_unreachable("Should have bailed before getting here!");
1379  break;
1380  }
1381  }
1382 
1383  // Copy this argument to the appropriate register.
1384  unsigned ArgReg;
1385  if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1386  ArgReg = NextFPR++;
1387  if (CC != CallingConv::Fast)
1388  ++NextGPR;
1389  } else
1390  ArgReg = NextGPR++;
1391 
1392  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1393  TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1394  RegArgs.push_back(ArgReg);
1395  }
1396 
1397  return true;
1398 }
1399 
1400 // For a call that we've determined we can fast-select, finish the
1401 // call sequence and generate a copy to obtain the return value (if any).
1402 bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1403  CallingConv::ID CC = CLI.CallConv;
1404 
1405  // Issue CallSEQ_END.
1406  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1407  TII.get(TII.getCallFrameDestroyOpcode()))
1408  .addImm(NumBytes).addImm(0);
1409 
1410  // Next, generate a copy to obtain the return value.
1411  // FIXME: No multi-register return values yet, though I don't foresee
1412  // any real difficulties there.
1413  if (RetVT != MVT::isVoid) {
1415  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1416  CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1417  CCValAssign &VA = RVLocs[0];
1418  assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1419  assert(VA.isRegLoc() && "Can only return in registers!");
1420 
1421  MVT DestVT = VA.getValVT();
1422  MVT CopyVT = DestVT;
1423 
1424  // Ints smaller than a register still arrive in a full 64-bit
1425  // register, so make sure we recognize this.
1426  if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1427  CopyVT = MVT::i64;
1428 
1429  unsigned SourcePhysReg = VA.getLocReg();
1430  unsigned ResultReg = 0;
1431 
1432  if (RetVT == CopyVT) {
1433  const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1434  ResultReg = createResultReg(CpyRC);
1435 
1436  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1437  TII.get(TargetOpcode::COPY), ResultReg)
1438  .addReg(SourcePhysReg);
1439 
1440  // If necessary, round the floating result to single precision.
1441  } else if (CopyVT == MVT::f64) {
1442  ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
1444  ResultReg).addReg(SourcePhysReg);
1445 
1446  // If only the low half of a general register is needed, generate
1447  // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1448  // used along the fast-isel path (not lowered), and downstream logic
1449  // also doesn't like a direct subreg copy on a physical reg.)
1450  } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1451  ResultReg = createResultReg(&PPC::GPRCRegClass);
1452  // Convert physical register from G8RC to GPRC.
1453  SourcePhysReg -= PPC::X0 - PPC::R0;
1454  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1455  TII.get(TargetOpcode::COPY), ResultReg)
1456  .addReg(SourcePhysReg);
1457  }
1458 
1459  assert(ResultReg && "ResultReg unset!");
1460  CLI.InRegs.push_back(SourcePhysReg);
1461  CLI.ResultReg = ResultReg;
1462  CLI.NumResultRegs = 1;
1463  }
1464 
1465  return true;
1466 }
1467 
1468 bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1469  CallingConv::ID CC = CLI.CallConv;
1470  bool IsTailCall = CLI.IsTailCall;
1471  bool IsVarArg = CLI.IsVarArg;
1472  const Value *Callee = CLI.Callee;
1473  const MCSymbol *Symbol = CLI.Symbol;
1474 
1475  if (!Callee && !Symbol)
1476  return false;
1477 
1478  // Allow SelectionDAG isel to handle tail calls.
1479  if (IsTailCall)
1480  return false;
1481 
1482  // Let SDISel handle vararg functions.
1483  if (IsVarArg)
1484  return false;
1485 
1486  // Handle simple calls for now, with legal return types and
1487  // those that can be extended.
1488  Type *RetTy = CLI.RetTy;
1489  MVT RetVT;
1490  if (RetTy->isVoidTy())
1491  RetVT = MVT::isVoid;
1492  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1493  RetVT != MVT::i8)
1494  return false;
1495  else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
1496  // We can't handle boolean returns when CR bits are in use.
1497  return false;
1498 
1499  // FIXME: No multi-register return values yet.
1500  if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1501  RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1502  RetVT != MVT::f64) {
1504  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1505  CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1506  if (RVLocs.size() > 1)
1507  return false;
1508  }
1509 
1510  // Bail early if more than 8 arguments, as we only currently
1511  // handle arguments passed in registers.
1512  unsigned NumArgs = CLI.OutVals.size();
1513  if (NumArgs > 8)
1514  return false;
1515 
1516  // Set up the argument vectors.
1518  SmallVector<unsigned, 8> ArgRegs;
1519  SmallVector<MVT, 8> ArgVTs;
1521 
1522  Args.reserve(NumArgs);
1523  ArgRegs.reserve(NumArgs);
1524  ArgVTs.reserve(NumArgs);
1525  ArgFlags.reserve(NumArgs);
1526 
1527  for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1528  // Only handle easy calls for now. It would be reasonably easy
1529  // to handle <= 8-byte structures passed ByVal in registers, but we
1530  // have to ensure they are right-justified in the register.
1531  ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1532  if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1533  return false;
1534 
1535  Value *ArgValue = CLI.OutVals[i];
1536  Type *ArgTy = ArgValue->getType();
1537  MVT ArgVT;
1538  if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1539  return false;
1540 
1541  if (ArgVT.isVector())
1542  return false;
1543 
1544  unsigned Arg = getRegForValue(ArgValue);
1545  if (Arg == 0)
1546  return false;
1547 
1548  Args.push_back(ArgValue);
1549  ArgRegs.push_back(Arg);
1550  ArgVTs.push_back(ArgVT);
1551  ArgFlags.push_back(Flags);
1552  }
1553 
1554  // Process the arguments.
1555  SmallVector<unsigned, 8> RegArgs;
1556  unsigned NumBytes;
1557 
1558  if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1559  RegArgs, CC, NumBytes, IsVarArg))
1560  return false;
1561 
1562  MachineInstrBuilder MIB;
1563  // FIXME: No handling for function pointers yet. This requires
1564  // implementing the function descriptor (OPD) setup.
1565  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1566  if (!GV) {
1567  // patchpoints are a special case; they always dispatch to a pointer value.
1568  // However, we don't actually want to generate the indirect call sequence
1569  // here (that will be generated, as necessary, during asm printing), and
1570  // the call we generate here will be erased by FastISel::selectPatchpoint,
1571  // so don't try very hard...
1572  if (CLI.IsPatchPoint)
1573  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
1574  else
1575  return false;
1576  } else {
1577  // Build direct call with NOP for TOC restore.
1578  // FIXME: We can and should optimize away the NOP for local calls.
1579  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1580  TII.get(PPC::BL8_NOP));
1581  // Add callee.
1582  MIB.addGlobalAddress(GV);
1583  }
1584 
1585  // Add implicit physical register uses to the call.
1586  for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
1587  MIB.addReg(RegArgs[II], RegState::Implicit);
1588 
1589  // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1590  // into the call.
1591  PPCFuncInfo->setUsesTOCBasePtr();
1592  MIB.addReg(PPC::X2, RegState::Implicit);
1593 
1594  // Add a register mask with the call-preserved registers. Proper
1595  // defs for return values will be added by setPhysRegsDeadExcept().
1596  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1597 
1598  CLI.Call = MIB;
1599 
1600  // Finish off the call including any return values.
1601  return finishCall(RetVT, CLI, NumBytes);
1602 }
1603 
1604 // Attempt to fast-select a return instruction.
1605 bool PPCFastISel::SelectRet(const Instruction *I) {
1606 
1607  if (!FuncInfo.CanLowerReturn)
1608  return false;
1609 
1610  if (TLI.supportSplitCSR(FuncInfo.MF))
1611  return false;
1612 
1613  const ReturnInst *Ret = cast<ReturnInst>(I);
1614  const Function &F = *I->getParent()->getParent();
1615 
1616  // Build a list of return value registers.
1617  SmallVector<unsigned, 4> RetRegs;
1618  CallingConv::ID CC = F.getCallingConv();
1619 
1620  if (Ret->getNumOperands() > 0) {
1622  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1623 
1624  // Analyze operands of the call, assigning locations to each operand.
1626  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1627  CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1628  const Value *RV = Ret->getOperand(0);
1629 
1630  // FIXME: Only one output register for now.
1631  if (ValLocs.size() > 1)
1632  return false;
1633 
1634  // Special case for returning a constant integer of any size - materialize
1635  // the constant as an i64 and copy it to the return register.
1636  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
1637  CCValAssign &VA = ValLocs[0];
1638 
1639  unsigned RetReg = VA.getLocReg();
1640  // We still need to worry about properly extending the sign. For example,
1641  // we could have only a single bit or a constant that needs zero
1642  // extension rather than sign extension. Make sure we pass the return
1643  // value extension property to integer materialization.
1644  unsigned SrcReg =
1645  PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1646 
1647  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1648  TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1649 
1650  RetRegs.push_back(RetReg);
1651 
1652  } else {
1653  unsigned Reg = getRegForValue(RV);
1654 
1655  if (Reg == 0)
1656  return false;
1657 
1658  // Copy the result values into the output registers.
1659  for (unsigned i = 0; i < ValLocs.size(); ++i) {
1660 
1661  CCValAssign &VA = ValLocs[i];
1662  assert(VA.isRegLoc() && "Can only return in registers!");
1663  RetRegs.push_back(VA.getLocReg());
1664  unsigned SrcReg = Reg + VA.getValNo();
1665 
1666  EVT RVEVT = TLI.getValueType(DL, RV->getType());
1667  if (!RVEVT.isSimple())
1668  return false;
1669  MVT RVVT = RVEVT.getSimpleVT();
1670  MVT DestVT = VA.getLocVT();
1671 
1672  if (RVVT != DestVT && RVVT != MVT::i8 &&
1673  RVVT != MVT::i16 && RVVT != MVT::i32)
1674  return false;
1675 
1676  if (RVVT != DestVT) {
1677  switch (VA.getLocInfo()) {
1678  default:
1679  llvm_unreachable("Unknown loc info!");
1680  case CCValAssign::Full:
1681  llvm_unreachable("Full value assign but types don't match?");
1682  case CCValAssign::AExt:
1683  case CCValAssign::ZExt: {
1684  const TargetRegisterClass *RC =
1685  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1686  unsigned TmpReg = createResultReg(RC);
1687  if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1688  return false;
1689  SrcReg = TmpReg;
1690  break;
1691  }
1692  case CCValAssign::SExt: {
1693  const TargetRegisterClass *RC =
1694  (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1695  unsigned TmpReg = createResultReg(RC);
1696  if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1697  return false;
1698  SrcReg = TmpReg;
1699  break;
1700  }
1701  }
1702  }
1703 
1704  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1705  TII.get(TargetOpcode::COPY), RetRegs[i])
1706  .addReg(SrcReg);
1707  }
1708  }
1709  }
1710 
1711  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1712  TII.get(PPC::BLR8));
1713 
1714  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1715  MIB.addReg(RetRegs[i], RegState::Implicit);
1716 
1717  return true;
1718 }
1719 
1720 // Attempt to emit an integer extend of SrcReg into DestReg. Both
1721 // signed and zero extensions are supported. Return false if we
1722 // can't handle it.
1723 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1724  unsigned DestReg, bool IsZExt) {
1725  if (DestVT != MVT::i32 && DestVT != MVT::i64)
1726  return false;
1727  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1728  return false;
1729 
1730  // Signed extensions use EXTSB, EXTSH, EXTSW.
1731  if (!IsZExt) {
1732  unsigned Opc;
1733  if (SrcVT == MVT::i8)
1734  Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1735  else if (SrcVT == MVT::i16)
1736  Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1737  else {
1738  assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1739  Opc = PPC::EXTSW_32_64;
1740  }
1741  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1742  .addReg(SrcReg);
1743 
1744  // Unsigned 32-bit extensions use RLWINM.
1745  } else if (DestVT == MVT::i32) {
1746  unsigned MB;
1747  if (SrcVT == MVT::i8)
1748  MB = 24;
1749  else {
1750  assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1751  MB = 16;
1752  }
1753  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
1754  DestReg)
1755  .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1756 
1757  // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1758  } else {
1759  unsigned MB;
1760  if (SrcVT == MVT::i8)
1761  MB = 56;
1762  else if (SrcVT == MVT::i16)
1763  MB = 48;
1764  else
1765  MB = 32;
1766  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1767  TII.get(PPC::RLDICL_32_64), DestReg)
1768  .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1769  }
1770 
1771  return true;
1772 }
1773 
1774 // Attempt to fast-select an indirect branch instruction.
1775 bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1776  unsigned AddrReg = getRegForValue(I->getOperand(0));
1777  if (AddrReg == 0)
1778  return false;
1779 
1780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
1781  .addReg(AddrReg);
1782  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
1783 
1784  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1785  for (const BasicBlock *SuccBB : IB->successors())
1786  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
1787 
1788  return true;
1789 }
1790 
1791 // Attempt to fast-select an integer truncate instruction.
1792 bool PPCFastISel::SelectTrunc(const Instruction *I) {
1793  Value *Src = I->getOperand(0);
1794  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1795  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1796 
1797  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1798  return false;
1799 
1800  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1801  return false;
1802 
1803  unsigned SrcReg = getRegForValue(Src);
1804  if (!SrcReg)
1805  return false;
1806 
1807  // The only interesting case is when we need to switch register classes.
1808  if (SrcVT == MVT::i64) {
1809  unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
1810  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1811  TII.get(TargetOpcode::COPY),
1812  ResultReg).addReg(SrcReg, 0, PPC::sub_32);
1813  SrcReg = ResultReg;
1814  }
1815 
1816  updateValueMap(I, SrcReg);
1817  return true;
1818 }
1819 
1820 // Attempt to fast-select an integer extend instruction.
1821 bool PPCFastISel::SelectIntExt(const Instruction *I) {
1822  Type *DestTy = I->getType();
1823  Value *Src = I->getOperand(0);
1824  Type *SrcTy = Src->getType();
1825 
1826  bool IsZExt = isa<ZExtInst>(I);
1827  unsigned SrcReg = getRegForValue(Src);
1828  if (!SrcReg) return false;
1829 
1830  EVT SrcEVT, DestEVT;
1831  SrcEVT = TLI.getValueType(DL, SrcTy, true);
1832  DestEVT = TLI.getValueType(DL, DestTy, true);
1833  if (!SrcEVT.isSimple())
1834  return false;
1835  if (!DestEVT.isSimple())
1836  return false;
1837 
1838  MVT SrcVT = SrcEVT.getSimpleVT();
1839  MVT DestVT = DestEVT.getSimpleVT();
1840 
1841  // If we know the register class needed for the result of this
1842  // instruction, use it. Otherwise pick the register class of the
1843  // correct size that does not contain X0/R0, since we don't know
1844  // whether downstream uses permit that assignment.
1845  unsigned AssignedReg = FuncInfo.ValueMap[I];
1846  const TargetRegisterClass *RC =
1847  (AssignedReg ? MRI.getRegClass(AssignedReg) :
1848  (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1849  &PPC::GPRC_and_GPRC_NOR0RegClass));
1850  unsigned ResultReg = createResultReg(RC);
1851 
1852  if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1853  return false;
1854 
1855  updateValueMap(I, ResultReg);
1856  return true;
1857 }
1858 
1859 // Attempt to fast-select an instruction that wasn't handled by
1860 // the table-generated machinery.
1861 bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1862 
1863  switch (I->getOpcode()) {
1864  case Instruction::Load:
1865  return SelectLoad(I);
1866  case Instruction::Store:
1867  return SelectStore(I);
1868  case Instruction::Br:
1869  return SelectBranch(I);
1870  case Instruction::IndirectBr:
1871  return SelectIndirectBr(I);
1872  case Instruction::FPExt:
1873  return SelectFPExt(I);
1874  case Instruction::FPTrunc:
1875  return SelectFPTrunc(I);
1876  case Instruction::SIToFP:
1877  return SelectIToFP(I, /*IsSigned*/ true);
1878  case Instruction::UIToFP:
1879  return SelectIToFP(I, /*IsSigned*/ false);
1880  case Instruction::FPToSI:
1881  return SelectFPToI(I, /*IsSigned*/ true);
1882  case Instruction::FPToUI:
1883  return SelectFPToI(I, /*IsSigned*/ false);
1884  case Instruction::Add:
1885  return SelectBinaryIntOp(I, ISD::ADD);
1886  case Instruction::Or:
1887  return SelectBinaryIntOp(I, ISD::OR);
1888  case Instruction::Sub:
1889  return SelectBinaryIntOp(I, ISD::SUB);
1890  case Instruction::Call:
1891  return selectCall(I);
1892  case Instruction::Ret:
1893  return SelectRet(I);
1894  case Instruction::Trunc:
1895  return SelectTrunc(I);
1896  case Instruction::ZExt:
1897  case Instruction::SExt:
1898  return SelectIntExt(I);
1899  // Here add other flavors of Instruction::XXX that automated
1900  // cases don't catch. For example, switches are terminators
1901  // that aren't yet handled.
1902  default:
1903  break;
1904  }
1905  return false;
1906 }
1907 
1908 // Materialize a floating-point constant into a register, and return
1909 // the register number (or zero if we failed to handle it).
1910 unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1911  // No plans to handle long double here.
1912  if (VT != MVT::f32 && VT != MVT::f64)
1913  return 0;
1914 
1915  // All FP constants are loaded from the constant pool.
1916  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
1917  assert(Align > 0 && "Unexpectedly missing alignment information!");
1918  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
1919  const TargetRegisterClass *RC =
1920  (VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass;
1921  unsigned DestReg = createResultReg(RC);
1922  CodeModel::Model CModel = TM.getCodeModel();
1923 
1924  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1926  MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
1927 
1928  unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
1929  unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1930 
1931  PPCFuncInfo->setUsesTOCBasePtr();
1932  // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
1933  if (CModel == CodeModel::Small) {
1934  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
1935  TmpReg)
1936  .addConstantPoolIndex(Idx).addReg(PPC::X2);
1937  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1938  .addImm(0).addReg(TmpReg).addMemOperand(MMO);
1939  } else {
1940  // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
1941  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
1942  TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
1943  // But for large code model, we must generate a LDtocL followed
1944  // by the LF[SD].
1945  if (CModel == CodeModel::Large) {
1946  unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1947  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
1948  TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
1949  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1950  .addImm(0)
1951  .addReg(TmpReg2);
1952  } else
1953  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1954  .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
1955  .addReg(TmpReg)
1956  .addMemOperand(MMO);
1957  }
1958 
1959  return DestReg;
1960 }
1961 
1962 // Materialize the address of a global value into a register, and return
1963 // the register number (or zero if we failed to handle it).
1964 unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
1965  assert(VT == MVT::i64 && "Non-address!");
1966  const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
1967  unsigned DestReg = createResultReg(RC);
1968 
1969  // Global values may be plain old object addresses, TLS object
1970  // addresses, constant pool entries, or jump tables. How we generate
1971  // code for these may depend on small, medium, or large code model.
1972  CodeModel::Model CModel = TM.getCodeModel();
1973 
1974  // FIXME: Jump tables are not yet required because fast-isel doesn't
1975  // handle switches; if that changes, we need them as well. For now,
1976  // what follows assumes everything's a generic (or TLS) global address.
1977 
1978  // FIXME: We don't yet handle the complexity of TLS.
1979  if (GV->isThreadLocal())
1980  return 0;
1981 
1982  PPCFuncInfo->setUsesTOCBasePtr();
1983  // For small code model, generate a simple TOC load.
1984  if (CModel == CodeModel::Small)
1985  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
1986  DestReg)
1987  .addGlobalAddress(GV)
1988  .addReg(PPC::X2);
1989  else {
1990  // If the address is an externally defined symbol, a symbol with common
1991  // or externally available linkage, a non-local function address, or a
1992  // jump table address (not yet needed), or if we are generating code
1993  // for large code model, we generate:
1994  // LDtocL(GV, ADDIStocHA(%X2, GV))
1995  // Otherwise we generate:
1996  // ADDItocL(ADDIStocHA(%X2, GV), GV)
1997  // Either way, start with the ADDIStocHA:
1998  unsigned HighPartReg = createResultReg(RC);
1999  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
2000  HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2001 
2002  unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
2003  if (GVFlags & PPCII::MO_NLP_FLAG) {
2004  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
2005  DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2006  } else {
2007  // Otherwise generate the ADDItocL.
2008  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
2009  DestReg).addReg(HighPartReg).addGlobalAddress(GV);
2010  }
2011  }
2012 
2013  return DestReg;
2014 }
2015 
2016 // Materialize a 32-bit integer constant into a register, and return
2017 // the register number (or zero if we failed to handle it).
2018 unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2019  const TargetRegisterClass *RC) {
2020  unsigned Lo = Imm & 0xFFFF;
2021  unsigned Hi = (Imm >> 16) & 0xFFFF;
2022 
2023  unsigned ResultReg = createResultReg(RC);
2024  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2025 
2026  if (isInt<16>(Imm))
2027  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2028  TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2029  .addImm(Imm);
2030  else if (Lo) {
2031  // Both Lo and Hi have nonzero bits.
2032  unsigned TmpReg = createResultReg(RC);
2033  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2034  TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2035  .addImm(Hi);
2036  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2037  TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2038  .addReg(TmpReg).addImm(Lo);
2039  } else
2040  // Just Hi bits.
2041  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2042  TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2043  .addImm(Hi);
2044 
2045  return ResultReg;
2046 }
2047 
2048 // Materialize a 64-bit integer constant into a register, and return
2049 // the register number (or zero if we failed to handle it).
2050 unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2051  const TargetRegisterClass *RC) {
2052  unsigned Remainder = 0;
2053  unsigned Shift = 0;
2054 
2055  // If the value doesn't fit in 32 bits, see if we can shift it
2056  // so that it fits in 32 bits.
2057  if (!isInt<32>(Imm)) {
2058  Shift = countTrailingZeros<uint64_t>(Imm);
2059  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2060 
2061  if (isInt<32>(ImmSh))
2062  Imm = ImmSh;
2063  else {
2064  Remainder = Imm;
2065  Shift = 32;
2066  Imm >>= 32;
2067  }
2068  }
2069 
2070  // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2071  // (if not shifted).
2072  unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2073  if (!Shift)
2074  return TmpReg1;
2075 
2076  // If upper 32 bits were not zero, we've built them and need to shift
2077  // them into place.
2078  unsigned TmpReg2;
2079  if (Imm) {
2080  TmpReg2 = createResultReg(RC);
2081  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
2082  TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2083  } else
2084  TmpReg2 = TmpReg1;
2085 
2086  unsigned TmpReg3, Hi, Lo;
2087  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2088  TmpReg3 = createResultReg(RC);
2089  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
2090  TmpReg3).addReg(TmpReg2).addImm(Hi);
2091  } else
2092  TmpReg3 = TmpReg2;
2093 
2094  if ((Lo = Remainder & 0xFFFF)) {
2095  unsigned ResultReg = createResultReg(RC);
2096  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
2097  ResultReg).addReg(TmpReg3).addImm(Lo);
2098  return ResultReg;
2099  }
2100 
2101  return TmpReg3;
2102 }
2103 
2104 // Materialize an integer constant into a register, and return
2105 // the register number (or zero if we failed to handle it).
2106 unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2107  bool UseSExt) {
2108  // If we're using CR bit registers for i1 values, handle that as a special
2109  // case first.
2110  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2111  unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2112  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2113  TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2114  return ImmReg;
2115  }
2116 
2117  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2118  VT != MVT::i1)
2119  return 0;
2120 
2121  const TargetRegisterClass *RC =
2122  ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2123  int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2124 
2125  // If the constant is in range, use a load-immediate.
2126  // Since LI will sign extend the constant we need to make sure that for
2127  // our zeroext constants that the sign extended constant fits into 16-bits -
2128  // a range of 0..0x7fff.
2129  if (isInt<16>(Imm)) {
2130  unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2131  unsigned ImmReg = createResultReg(RC);
2132  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
2133  .addImm(Imm);
2134  return ImmReg;
2135  }
2136 
2137  // Construct the constant piecewise.
2138  if (VT == MVT::i64)
2139  return PPCMaterialize64BitInt(Imm, RC);
2140  else if (VT == MVT::i32)
2141  return PPCMaterialize32BitInt(Imm, RC);
2142 
2143  return 0;
2144 }
2145 
2146 // Materialize a constant into a register, and return the register
2147 // number (or zero if we failed to handle it).
2148 unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
2149  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2150 
2151  // Only handle simple types.
2152  if (!CEVT.isSimple()) return 0;
2153  MVT VT = CEVT.getSimpleVT();
2154 
2155  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2156  return PPCMaterializeFP(CFP, VT);
2157  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2158  return PPCMaterializeGV(GV, VT);
2159  else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2160  // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2161  // assumes that constant PHI operands will be zero extended, and failure to
2162  // match that assumption will cause problems if we sign extend here but
2163  // some user of a PHI is in a block for which we fall back to full SDAG
2164  // instruction selection.
2165  return PPCMaterializeInt(CI, VT, false);
2166 
2167  return 0;
2168 }
2169 
2170 // Materialize the address created by an alloca into a register, and
2171 // return the register number (or zero if we failed to handle it).
2172 unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2173  // Don't handle dynamic allocas.
2174  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
2175 
2176  MVT VT;
2177  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
2178 
2180  FuncInfo.StaticAllocaMap.find(AI);
2181 
2182  if (SI != FuncInfo.StaticAllocaMap.end()) {
2183  unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2184  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
2185  ResultReg).addFrameIndex(SI->second).addImm(0);
2186  return ResultReg;
2187  }
2188 
2189  return 0;
2190 }
2191 
2192 // Fold loads into extends when possible.
2193 // FIXME: We can have multiple redundant extend/trunc instructions
2194 // following a load. The folding only picks up one. Extend this
2195 // to check subsequent instructions for the same pattern and remove
2196 // them. Thus ResultReg should be the def reg for the last redundant
2197 // instruction in a chain, and all intervening instructions can be
2198 // removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2199 // to add ELF64-NOT: rldicl to the appropriate tests when this works.
2200 bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2201  const LoadInst *LI) {
2202  // Verify we have a legal type before going any further.
2203  MVT VT;
2204  if (!isLoadTypeLegal(LI->getType(), VT))
2205  return false;
2206 
2207  // Combine load followed by zero- or sign-extend.
2208  bool IsZExt = false;
2209  switch(MI->getOpcode()) {
2210  default:
2211  return false;
2212 
2213  case PPC::RLDICL:
2214  case PPC::RLDICL_32_64: {
2215  IsZExt = true;
2216  unsigned MB = MI->getOperand(3).getImm();
2217  if ((VT == MVT::i8 && MB <= 56) ||
2218  (VT == MVT::i16 && MB <= 48) ||
2219  (VT == MVT::i32 && MB <= 32))
2220  break;
2221  return false;
2222  }
2223 
2224  case PPC::RLWINM:
2225  case PPC::RLWINM8: {
2226  IsZExt = true;
2227  unsigned MB = MI->getOperand(3).getImm();
2228  if ((VT == MVT::i8 && MB <= 24) ||
2229  (VT == MVT::i16 && MB <= 16))
2230  break;
2231  return false;
2232  }
2233 
2234  case PPC::EXTSB:
2235  case PPC::EXTSB8:
2236  case PPC::EXTSB8_32_64:
2237  /* There is no sign-extending load-byte instruction. */
2238  return false;
2239 
2240  case PPC::EXTSH:
2241  case PPC::EXTSH8:
2242  case PPC::EXTSH8_32_64: {
2243  if (VT != MVT::i16 && VT != MVT::i8)
2244  return false;
2245  break;
2246  }
2247 
2248  case PPC::EXTSW:
2249  case PPC::EXTSW_32:
2250  case PPC::EXTSW_32_64: {
2251  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2252  return false;
2253  break;
2254  }
2255  }
2256 
2257  // See if we can handle this address.
2258  Address Addr;
2259  if (!PPCComputeAddress(LI->getOperand(0), Addr))
2260  return false;
2261 
2262  unsigned ResultReg = MI->getOperand(0).getReg();
2263 
2264  if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
2265  return false;
2266 
2267  MI->eraseFromParent();
2268  return true;
2269 }
2270 
2271 // Attempt to lower call arguments in a faster way than done by
2272 // the selection DAG code.
2273 bool PPCFastISel::fastLowerArguments() {
2274  // Defer to normal argument lowering for now. It's reasonably
2275  // efficient. Consider doing something like ARM to handle the
2276  // case where all args fit in registers, no varargs, no float
2277  // or vector args.
2278  return false;
2279 }
2280 
2281 // Handle materializing integer constants into a register. This is not
2282 // automatically generated for PowerPC, so must be explicitly created here.
2283 unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2284 
2285  if (Opc != ISD::Constant)
2286  return 0;
2287 
2288  // If we're using CR bit registers for i1 values, handle that as a special
2289  // case first.
2290  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2291  unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2292  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2293  TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2294  return ImmReg;
2295  }
2296 
2297  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2298  VT != MVT::i1)
2299  return 0;
2300 
2301  const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2302  &PPC::GPRCRegClass);
2303  if (VT == MVT::i64)
2304  return PPCMaterialize64BitInt(Imm, RC);
2305  else
2306  return PPCMaterialize32BitInt(Imm, RC);
2307 }
2308 
2309 // Override for ADDI and ADDI8 to set the correct register class
2310 // on RHS operand 0. The automatic infrastructure naively assumes
2311 // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2312 // for these cases. At the moment, none of the other automatically
2313 // generated RI instructions require special treatment. However, once
2314 // SelectSelect is implemented, "isel" requires similar handling.
2315 //
2316 // Also be conservative about the output register class. Avoid
2317 // assigning R0 or X0 to the output register for GPRC and G8RC
2318 // register classes, as any such result could be used in ADDI, etc.,
2319 // where those regs have another meaning.
2320 unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2321  const TargetRegisterClass *RC,
2322  unsigned Op0, bool Op0IsKill,
2323  uint64_t Imm) {
2324  if (MachineInstOpcode == PPC::ADDI)
2325  MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2326  else if (MachineInstOpcode == PPC::ADDI8)
2327  MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2328 
2329  const TargetRegisterClass *UseRC =
2330  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2331  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2332 
2333  return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
2334  Op0, Op0IsKill, Imm);
2335 }
2336 
2337 // Override for instructions with one register operand to avoid use of
2338 // R0/X0. The automatic infrastructure isn't aware of the context so
2339 // we must be conservative.
2340 unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2341  const TargetRegisterClass* RC,
2342  unsigned Op0, bool Op0IsKill) {
2343  const TargetRegisterClass *UseRC =
2344  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2345  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2346 
2347  return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
2348 }
2349 
2350 // Override for instructions with two register operands to avoid use
2351 // of R0/X0. The automatic infrastructure isn't aware of the context
2352 // so we must be conservative.
2353 unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2354  const TargetRegisterClass* RC,
2355  unsigned Op0, bool Op0IsKill,
2356  unsigned Op1, bool Op1IsKill) {
2357  const TargetRegisterClass *UseRC =
2358  (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2359  (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2360 
2361  return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
2362  Op1, Op1IsKill);
2363 }
2364 
2365 namespace llvm {
2366  // Create the fast instruction selector for PowerPC64 ELF.
2368  const TargetLibraryInfo *LibInfo) {
2369  // Only available on 64-bit ELF for now.
2370  const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2371  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
2372  return new PPCFastISel(FuncInfo, LibInfo);
2373  return nullptr;
2374  }
2375 }
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
uint64_t CallInst * C
Return a value (possibly void), from a function.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
void push_back(const T &Elt)
Definition: SmallVector.h:212
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:843
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isVector() const
Return true if this is a vector value type.
unsigned getReg() const
getReg - Returns the register number.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
unsigned less or equal
Definition: InstrTypes.h:879
unsigned less than
Definition: InstrTypes.h:878
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:859
unsigned getValNo() const
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:869
BasicBlock * getSuccessor(unsigned i) const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
void reserve(size_type N)
Definition: SmallVector.h:380
op_iterator op_begin()
Definition: User.h:214
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
bool needsCustom() const
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:864
unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Definition: FastISel.cpp:1898
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:491
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:863
A description of a memory reference used in the backend.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const HexagonInstrInfo * TII
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill)
Emit a MachineInstr with one register operand and a result register in the given register class...
Definition: FastISel.cpp:1825
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:860
SimpleValueType SimpleTy
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
LocInfo getLocInfo() const
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getSizeInBits() const
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:862
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1554
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:127
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:154
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
TargetInstrInfo - Interface to description of machine instruction set.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
unsigned const MachineRegisterInfo * MRI
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
MO_NLP_FLAG - If this bit is set, the symbol reference is actually to the non_lazy_ptr for the global...
Definition: PPC.h:83
Indirect Branch Instruction.
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
op_iterator op_end()
Definition: User.h:216
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:144
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:180
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:862
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
Class to represent integer types.
Definition: DerivedTypes.h:40
const MachineInstrBuilder & addFrameIndex(int Idx) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:870
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
Extended Value Type.
Definition: ValueTypes.h:34
static bool isAtomic(Instruction *I)
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:868
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:880
The memory access writes data.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:857
unsigned getNumOperands() const
Definition: User.h:176
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:179
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:867
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:882
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill)
Emit a MachineInstr with two register operands and a result register in the given register class...
Definition: FastISel.cpp:1846
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:574
int64_t getImm() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
signed less or equal
Definition: InstrTypes.h:883
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
This file defines the FastISel class.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Representation of each machine instruction.
Definition: MachineInstr.h:59
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:513
unsigned greater or equal
Definition: InstrTypes.h:877
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:861
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:865
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void GetReturnInfo(Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:856
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:866
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
bool isThreadLocal() const
If the value is "Thread Local", its value isn&#39;t shared by the threads.
Definition: GlobalValue.h:238
IRTranslator LLVM IR MI
unsigned greater than
Definition: InstrTypes.h:876
bool isSVR4ABI() const
Definition: PPCSubtarget.h:305
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getLocReg() const
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
static Optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:858
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:855
signed greater or equal
Definition: InstrTypes.h:881
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)