LLVM  4.0.0
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a pattern matching instruction selector for PowerPC,
11 // converting from a legalized dag to a PPC dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "PPC.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCTargetMachine.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/GlobalAlias.h"
29 #include "llvm/IR/GlobalValue.h"
30 #include "llvm/IR/GlobalVariable.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/Module.h"
34 #include "llvm/Support/Debug.h"
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "ppc-codegen"
42 
43 // FIXME: Remove this once the bug has been fixed!
44 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
45 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
46 
47 static cl::opt<bool>
48  UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
49  cl::desc("use aggressive ppc isel for bit permutations"),
50  cl::Hidden);
52  "ppc-bit-perm-rewriter-stress-rotates",
53  cl::desc("stress rotate selection in aggressive ppc isel for "
54  "bit permutations"),
55  cl::Hidden);
56 
58  "ppc-use-branch-hint", cl::init(true),
59  cl::desc("Enable static hinting of branches on ppc"),
60  cl::Hidden);
61 
62 namespace {
63  //===--------------------------------------------------------------------===//
64  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
65  /// instructions for SelectionDAG operations.
66  ///
67  class PPCDAGToDAGISel : public SelectionDAGISel {
68  const PPCTargetMachine &TM;
69  const PPCSubtarget *PPCSubTarget;
70  const PPCTargetLowering *PPCLowering;
71  unsigned GlobalBaseReg;
72  public:
73  explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
74  : SelectionDAGISel(tm), TM(tm) {}
75 
76  bool runOnMachineFunction(MachineFunction &MF) override {
77  // Make sure we re-emit a set of the global base reg if necessary
78  GlobalBaseReg = 0;
79  PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
80  PPCLowering = PPCSubTarget->getTargetLowering();
82 
83  if (!PPCSubTarget->isSVR4ABI())
84  InsertVRSaveCode(MF);
85 
86  return true;
87  }
88 
89  void PreprocessISelDAG() override;
90  void PostprocessISelDAG() override;
91 
92  /// getI32Imm - Return a target constant with the specified value, of type
93  /// i32.
94  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
95  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
96  }
97 
98  /// getI64Imm - Return a target constant with the specified value, of type
99  /// i64.
100  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
101  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
102  }
103 
104  /// getSmallIPtrImm - Return a target constant of pointer type.
105  inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
106  return CurDAG->getTargetConstant(
107  Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
108  }
109 
110  /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
111  /// rotate and mask opcode and mask operation.
112  static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
113  unsigned &SH, unsigned &MB, unsigned &ME);
114 
115  /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
116  /// base register. Return the virtual register that holds this value.
117  SDNode *getGlobalBaseReg();
118 
119  void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
120 
121  // Select - Convert the specified operand from a target-independent to a
122  // target-specific node if it hasn't already been changed.
123  void Select(SDNode *N) override;
124 
125  bool tryBitfieldInsert(SDNode *N);
126  bool tryBitPermutation(SDNode *N);
127 
128  /// SelectCC - Select a comparison of the specified values with the
129  /// specified condition code, returning the CR# of the expression.
130  SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
131  const SDLoc &dl);
132 
133  /// SelectAddrImm - Returns true if the address N can be represented by
134  /// a base register plus a signed 16-bit displacement [r+imm].
135  bool SelectAddrImm(SDValue N, SDValue &Disp,
136  SDValue &Base) {
137  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
138  }
139 
140  /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
141  /// immediate field. Note that the operand at this point is already the
142  /// result of a prior SelectAddressRegImm call.
143  bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
144  if (N.getOpcode() == ISD::TargetConstant ||
146  Out = N;
147  return true;
148  }
149 
150  return false;
151  }
152 
153  /// SelectAddrIdx - Given the specified addressed, check to see if it can be
154  /// represented as an indexed [r+r] operation. Returns false if it can
155  /// be represented by [r+imm], which are preferred.
156  bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
157  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
158  }
159 
160  /// SelectAddrIdxOnly - Given the specified addressed, force it to be
161  /// represented as an indexed [r+r] operation.
162  bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
163  return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
164  }
165 
166  /// SelectAddrImmX4 - Returns true if the address N can be represented by
167  /// a base register plus a signed 16-bit displacement that is a multiple of 4.
168  /// Suitable for use by STD and friends.
169  bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
170  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
171  }
172 
173  // Select an address into a single register.
174  bool SelectAddr(SDValue N, SDValue &Base) {
175  Base = N;
176  return true;
177  }
178 
179  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
180  /// inline asm expressions. It is always correct to compute the value into
181  /// a register. The case of adding a (possibly relocatable) constant to a
182  /// register can be improved, but it is wrong to substitute Reg+Reg for
183  /// Reg in an asm, because the load or store opcode would have to change.
184  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
185  unsigned ConstraintID,
186  std::vector<SDValue> &OutOps) override {
187 
188  switch(ConstraintID) {
189  default:
190  errs() << "ConstraintID: " << ConstraintID << "\n";
191  llvm_unreachable("Unexpected asm memory constraint");
199  // We need to make sure that this one operand does not end up in r0
200  // (because we might end up lowering this as 0(%op)).
201  const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
202  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
203  SDLoc dl(Op);
204  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
205  SDValue NewOp =
206  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
207  dl, Op.getValueType(),
208  Op, RC), 0);
209 
210  OutOps.push_back(NewOp);
211  return false;
212  }
213  return true;
214  }
215 
216  void InsertVRSaveCode(MachineFunction &MF);
217 
218  StringRef getPassName() const override {
219  return "PowerPC DAG->DAG Pattern Instruction Selection";
220  }
221 
222 // Include the pieces autogenerated from the target description.
223 #include "PPCGenDAGISel.inc"
224 
225 private:
226  bool trySETCC(SDNode *N);
227 
228  void PeepholePPC64();
229  void PeepholePPC64ZExt();
230  void PeepholeCROps();
231 
232  SDValue combineToCMPB(SDNode *N);
233  void foldBoolExts(SDValue &Res, SDNode *&N);
234 
235  bool AllUsersSelectZero(SDNode *N);
236  void SwapAllSelectUsers(SDNode *N);
237 
238  void transferMemOperands(SDNode *N, SDNode *Result);
239  };
240 }
241 
242 /// InsertVRSaveCode - Once the entire function has been instruction selected,
243 /// all virtual registers are created and all machine instructions are built,
244 /// check to see if we need to save/restore VRSAVE. If so, do it.
245 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
246  // Check to see if this function uses vector registers, which means we have to
247  // save and restore the VRSAVE register and update it with the regs we use.
248  //
249  // In this case, there will be virtual registers of vector type created
250  // by the scheduler. Detect them now.
251  bool HasVectorVReg = false;
252  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
254  if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
255  HasVectorVReg = true;
256  break;
257  }
258  }
259  if (!HasVectorVReg) return; // nothing to do.
260 
261  // If we have a vector register, we want to emit code into the entry and exit
262  // blocks to save and restore the VRSAVE register. We do this here (instead
263  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
264  //
265  // 1. This (trivially) reduces the load on the register allocator, by not
266  // having to represent the live range of the VRSAVE register.
267  // 2. This (more significantly) allows us to create a temporary virtual
268  // register to hold the saved VRSAVE value, allowing this temporary to be
269  // register allocated, instead of forcing it to be spilled to the stack.
270 
271  // Create two vregs - one to hold the VRSAVE register that is live-in to the
272  // function and one for the value after having bits or'd into it.
273  unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
274  unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
275 
276  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
277  MachineBasicBlock &EntryBB = *Fn.begin();
278  DebugLoc dl;
279  // Emit the following code into the entry block:
280  // InVRSAVE = MFVRSAVE
281  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
282  // MTVRSAVE UpdatedVRSAVE
283  MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
284  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
285  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
286  UpdatedVRSAVE).addReg(InVRSAVE);
287  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
288 
289  // Find all return blocks, outputting a restore in each epilog.
290  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
291  if (BB->isReturnBlock()) {
292  IP = BB->end(); --IP;
293 
294  // Skip over all terminator instructions, which are part of the return
295  // sequence.
297  while (I2 != BB->begin() && (--I2)->isTerminator())
298  IP = I2;
299 
300  // Emit: MTVRSAVE InVRSave
301  BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
302  }
303  }
304 }
305 
306 
307 /// getGlobalBaseReg - Output the instructions required to put the
308 /// base address to use for accessing globals into a register.
309 ///
310 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
311  if (!GlobalBaseReg) {
312  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
313  // Insert the set of GlobalBaseReg into the first MBB of the function
314  MachineBasicBlock &FirstMBB = MF->front();
315  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
316  const Module *M = MF->getFunction()->getParent();
317  DebugLoc dl;
318 
319  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
320  if (PPCSubTarget->isTargetELF()) {
321  GlobalBaseReg = PPC::R30;
322  if (M->getPICLevel() == PICLevel::SmallPIC) {
323  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
324  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
325  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
326  } else {
327  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
328  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
329  unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
330  BuildMI(FirstMBB, MBBI, dl,
331  TII.get(PPC::UpdateGBR), GlobalBaseReg)
332  .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
333  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
334  }
335  } else {
336  GlobalBaseReg =
337  RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
338  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
339  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
340  }
341  } else {
342  GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
343  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
344  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
345  }
346  }
347  return CurDAG->getRegister(GlobalBaseReg,
348  PPCLowering->getPointerTy(CurDAG->getDataLayout()))
349  .getNode();
350 }
351 
352 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
353 /// or 64-bit immediate, and if the value can be accurately represented as a
354 /// sign extension from a 16-bit value. If so, this returns true and the
355 /// immediate.
356 static bool isIntS16Immediate(SDNode *N, short &Imm) {
357  if (N->getOpcode() != ISD::Constant)
358  return false;
359 
360  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
361  if (N->getValueType(0) == MVT::i32)
362  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
363  else
364  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
365 }
366 
367 static bool isIntS16Immediate(SDValue Op, short &Imm) {
368  return isIntS16Immediate(Op.getNode(), Imm);
369 }
370 
371 
372 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
373 /// operand. If so Imm will receive the 32-bit value.
374 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
375  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
376  Imm = cast<ConstantSDNode>(N)->getZExtValue();
377  return true;
378  }
379  return false;
380 }
381 
382 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
383 /// operand. If so Imm will receive the 64-bit value.
384 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
385  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
386  Imm = cast<ConstantSDNode>(N)->getZExtValue();
387  return true;
388  }
389  return false;
390 }
391 
392 // isInt32Immediate - This method tests to see if a constant operand.
393 // If so Imm will receive the 32 bit value.
394 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
395  return isInt32Immediate(N.getNode(), Imm);
396 }
397 
398 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
399  const SDValue &DestMBB) {
400  assert(isa<BasicBlockSDNode>(DestMBB));
401 
402  if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
403 
404  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
405  const TerminatorInst *BBTerm = BB->getTerminator();
406 
407  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
408 
409  const BasicBlock *TBB = BBTerm->getSuccessor(0);
410  const BasicBlock *FBB = BBTerm->getSuccessor(1);
411 
412  auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
413  auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
414 
415  // We only want to handle cases which are easy to predict at static time, e.g.
416  // C++ throw statement, that is very likely not taken, or calling never
417  // returned function, e.g. stdlib exit(). So we set Threshold to filter
418  // unwanted cases.
419  //
420  // Below is LLVM branch weight table, we only want to handle case 1, 2
421  //
422  // Case Taken:Nontaken Example
423  // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
424  // 2. Invoke-terminating 1:1048575
425  // 3. Coldblock 4:64 __builtin_expect
426  // 4. Loop Branch 124:4 For loop
427  // 5. PH/ZH/FPH 20:12
428  const uint32_t Threshold = 10000;
429 
430  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
431  return PPC::BR_NO_HINT;
432 
433  DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
434  << BB->getName() << "'\n"
435  << " -> " << TBB->getName() << ": " << TProb << "\n"
436  << " -> " << FBB->getName() << ": " << FProb << "\n");
437 
438  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
439 
440  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
441  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
442  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
443  std::swap(TProb, FProb);
444 
445  return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
446 }
447 
448 // isOpcWithIntImmediate - This method tests to see if the node is a specific
449 // opcode and that it has a immediate integer right operand.
450 // If so Imm will receive the 32 bit value.
451 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
452  return N->getOpcode() == Opc
453  && isInt32Immediate(N->getOperand(1).getNode(), Imm);
454 }
455 
456 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
457  SDLoc dl(SN);
458  int FI = cast<FrameIndexSDNode>(N)->getIndex();
459  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
460  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
461  if (SN->hasOneUse())
462  CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
463  getSmallIPtrImm(Offset, dl));
464  else
465  ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
466  getSmallIPtrImm(Offset, dl)));
467 }
468 
469 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
470  bool isShiftMask, unsigned &SH,
471  unsigned &MB, unsigned &ME) {
472  // Don't even go down this path for i64, since different logic will be
473  // necessary for rldicl/rldicr/rldimi.
474  if (N->getValueType(0) != MVT::i32)
475  return false;
476 
477  unsigned Shift = 32;
478  unsigned Indeterminant = ~0; // bit mask marking indeterminant results
479  unsigned Opcode = N->getOpcode();
480  if (N->getNumOperands() != 2 ||
481  !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
482  return false;
483 
484  if (Opcode == ISD::SHL) {
485  // apply shift left to mask if it comes first
486  if (isShiftMask) Mask = Mask << Shift;
487  // determine which bits are made indeterminant by shift
488  Indeterminant = ~(0xFFFFFFFFu << Shift);
489  } else if (Opcode == ISD::SRL) {
490  // apply shift right to mask if it comes first
491  if (isShiftMask) Mask = Mask >> Shift;
492  // determine which bits are made indeterminant by shift
493  Indeterminant = ~(0xFFFFFFFFu >> Shift);
494  // adjust for the left rotate
495  Shift = 32 - Shift;
496  } else if (Opcode == ISD::ROTL) {
497  Indeterminant = 0;
498  } else {
499  return false;
500  }
501 
502  // if the mask doesn't intersect any Indeterminant bits
503  if (Mask && !(Mask & Indeterminant)) {
504  SH = Shift & 31;
505  // make sure the mask is still a mask (wrap arounds may not be)
506  return isRunOfOnes(Mask, MB, ME);
507  }
508  return false;
509 }
510 
511 /// Turn an or of two masked values into the rotate left word immediate then
512 /// mask insert (rlwimi) instruction.
513 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
514  SDValue Op0 = N->getOperand(0);
515  SDValue Op1 = N->getOperand(1);
516  SDLoc dl(N);
517 
518  APInt LKZ, LKO, RKZ, RKO;
519  CurDAG->computeKnownBits(Op0, LKZ, LKO);
520  CurDAG->computeKnownBits(Op1, RKZ, RKO);
521 
522  unsigned TargetMask = LKZ.getZExtValue();
523  unsigned InsertMask = RKZ.getZExtValue();
524 
525  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
526  unsigned Op0Opc = Op0.getOpcode();
527  unsigned Op1Opc = Op1.getOpcode();
528  unsigned Value, SH = 0;
529  TargetMask = ~TargetMask;
530  InsertMask = ~InsertMask;
531 
532  // If the LHS has a foldable shift and the RHS does not, then swap it to the
533  // RHS so that we can fold the shift into the insert.
534  if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
535  if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
536  Op0.getOperand(0).getOpcode() == ISD::SRL) {
537  if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
538  Op1.getOperand(0).getOpcode() != ISD::SRL) {
539  std::swap(Op0, Op1);
540  std::swap(Op0Opc, Op1Opc);
541  std::swap(TargetMask, InsertMask);
542  }
543  }
544  } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
545  if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
546  Op1.getOperand(0).getOpcode() != ISD::SRL) {
547  std::swap(Op0, Op1);
548  std::swap(Op0Opc, Op1Opc);
549  std::swap(TargetMask, InsertMask);
550  }
551  }
552 
553  unsigned MB, ME;
554  if (isRunOfOnes(InsertMask, MB, ME)) {
555  SDValue Tmp1, Tmp2;
556 
557  if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
558  isInt32Immediate(Op1.getOperand(1), Value)) {
559  Op1 = Op1.getOperand(0);
560  SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
561  }
562  if (Op1Opc == ISD::AND) {
563  // The AND mask might not be a constant, and we need to make sure that
564  // if we're going to fold the masking with the insert, all bits not
565  // know to be zero in the mask are known to be one.
566  APInt MKZ, MKO;
567  CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
568  bool CanFoldMask = InsertMask == MKO.getZExtValue();
569 
570  unsigned SHOpc = Op1.getOperand(0).getOpcode();
571  if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
572  isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
573  // Note that Value must be in range here (less than 32) because
574  // otherwise there would not be any bits set in InsertMask.
575  Op1 = Op1.getOperand(0).getOperand(0);
576  SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
577  }
578  }
579 
580  SH &= 31;
581  SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
582  getI32Imm(ME, dl) };
583  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
584  return true;
585  }
586  }
587  return false;
588 }
589 
590 // Predict the number of instructions that would be generated by calling
591 // getInt64(N).
592 static unsigned getInt64CountDirect(int64_t Imm) {
593  // Assume no remaining bits.
594  unsigned Remainder = 0;
595  // Assume no shift required.
596  unsigned Shift = 0;
597 
598  // If it can't be represented as a 32 bit value.
599  if (!isInt<32>(Imm)) {
600  Shift = countTrailingZeros<uint64_t>(Imm);
601  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
602 
603  // If the shifted value fits 32 bits.
604  if (isInt<32>(ImmSh)) {
605  // Go with the shifted value.
606  Imm = ImmSh;
607  } else {
608  // Still stuck with a 64 bit value.
609  Remainder = Imm;
610  Shift = 32;
611  Imm >>= 32;
612  }
613  }
614 
615  // Intermediate operand.
616  unsigned Result = 0;
617 
618  // Handle first 32 bits.
619  unsigned Lo = Imm & 0xFFFF;
620 
621  // Simple value.
622  if (isInt<16>(Imm)) {
623  // Just the Lo bits.
624  ++Result;
625  } else if (Lo) {
626  // Handle the Hi bits and Lo bits.
627  Result += 2;
628  } else {
629  // Just the Hi bits.
630  ++Result;
631  }
632 
633  // If no shift, we're done.
634  if (!Shift) return Result;
635 
636  // If Hi word == Lo word,
637  // we can use rldimi to insert the Lo word into Hi word.
638  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
639  ++Result;
640  return Result;
641  }
642 
643  // Shift for next step if the upper 32-bits were not zero.
644  if (Imm)
645  ++Result;
646 
647  // Add in the last bits as required.
648  if ((Remainder >> 16) & 0xFFFF)
649  ++Result;
650  if (Remainder & 0xFFFF)
651  ++Result;
652 
653  return Result;
654 }
655 
656 static uint64_t Rot64(uint64_t Imm, unsigned R) {
657  return (Imm << R) | (Imm >> (64 - R));
658 }
659 
660 static unsigned getInt64Count(int64_t Imm) {
661  unsigned Count = getInt64CountDirect(Imm);
662  if (Count == 1)
663  return Count;
664 
665  for (unsigned r = 1; r < 63; ++r) {
666  uint64_t RImm = Rot64(Imm, r);
667  unsigned RCount = getInt64CountDirect(RImm) + 1;
668  Count = std::min(Count, RCount);
669 
670  // See comments in getInt64 for an explanation of the logic below.
671  unsigned LS = findLastSet(RImm);
672  if (LS != r-1)
673  continue;
674 
675  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
676  uint64_t RImmWithOnes = RImm | OnesMask;
677 
678  RCount = getInt64CountDirect(RImmWithOnes) + 1;
679  Count = std::min(Count, RCount);
680  }
681 
682  return Count;
683 }
684 
685 // Select a 64-bit constant. For cost-modeling purposes, getInt64Count
686 // (above) needs to be kept in sync with this function.
687 static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl,
688  int64_t Imm) {
689  // Assume no remaining bits.
690  unsigned Remainder = 0;
691  // Assume no shift required.
692  unsigned Shift = 0;
693 
694  // If it can't be represented as a 32 bit value.
695  if (!isInt<32>(Imm)) {
696  Shift = countTrailingZeros<uint64_t>(Imm);
697  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
698 
699  // If the shifted value fits 32 bits.
700  if (isInt<32>(ImmSh)) {
701  // Go with the shifted value.
702  Imm = ImmSh;
703  } else {
704  // Still stuck with a 64 bit value.
705  Remainder = Imm;
706  Shift = 32;
707  Imm >>= 32;
708  }
709  }
710 
711  // Intermediate operand.
712  SDNode *Result;
713 
714  // Handle first 32 bits.
715  unsigned Lo = Imm & 0xFFFF;
716  unsigned Hi = (Imm >> 16) & 0xFFFF;
717 
718  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
719  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
720  };
721 
722  // Simple value.
723  if (isInt<16>(Imm)) {
724  // Just the Lo bits.
725  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
726  } else if (Lo) {
727  // Handle the Hi bits.
728  unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
729  Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
730  // And Lo bits.
731  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
732  SDValue(Result, 0), getI32Imm(Lo));
733  } else {
734  // Just the Hi bits.
735  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
736  }
737 
738  // If no shift, we're done.
739  if (!Shift) return Result;
740 
741  // If Hi word == Lo word,
742  // we can use rldimi to insert the Lo word into Hi word.
743  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
744  SDValue Ops[] =
745  { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
746  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
747  }
748 
749  // Shift for next step if the upper 32-bits were not zero.
750  if (Imm) {
751  Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
752  SDValue(Result, 0),
753  getI32Imm(Shift),
754  getI32Imm(63 - Shift));
755  }
756 
757  // Add in the last bits as required.
758  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
759  Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
760  SDValue(Result, 0), getI32Imm(Hi));
761  }
762  if ((Lo = Remainder & 0xFFFF)) {
763  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
764  SDValue(Result, 0), getI32Imm(Lo));
765  }
766 
767  return Result;
768 }
769 
770 static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) {
771  unsigned Count = getInt64CountDirect(Imm);
772  if (Count == 1)
773  return getInt64Direct(CurDAG, dl, Imm);
774 
775  unsigned RMin = 0;
776 
777  int64_t MatImm;
778  unsigned MaskEnd;
779 
780  for (unsigned r = 1; r < 63; ++r) {
781  uint64_t RImm = Rot64(Imm, r);
782  unsigned RCount = getInt64CountDirect(RImm) + 1;
783  if (RCount < Count) {
784  Count = RCount;
785  RMin = r;
786  MatImm = RImm;
787  MaskEnd = 63;
788  }
789 
790  // If the immediate to generate has many trailing zeros, it might be
791  // worthwhile to generate a rotated value with too many leading ones
792  // (because that's free with li/lis's sign-extension semantics), and then
793  // mask them off after rotation.
794 
795  unsigned LS = findLastSet(RImm);
796  // We're adding (63-LS) higher-order ones, and we expect to mask them off
797  // after performing the inverse rotation by (64-r). So we need that:
798  // 63-LS == 64-r => LS == r-1
799  if (LS != r-1)
800  continue;
801 
802  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
803  uint64_t RImmWithOnes = RImm | OnesMask;
804 
805  RCount = getInt64CountDirect(RImmWithOnes) + 1;
806  if (RCount < Count) {
807  Count = RCount;
808  RMin = r;
809  MatImm = RImmWithOnes;
810  MaskEnd = LS;
811  }
812  }
813 
814  if (!RMin)
815  return getInt64Direct(CurDAG, dl, Imm);
816 
817  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
818  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
819  };
820 
821  SDValue Val = SDValue(getInt64Direct(CurDAG, dl, MatImm), 0);
822  return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
823  getI32Imm(64 - RMin), getI32Imm(MaskEnd));
824 }
825 
826 // Select a 64-bit constant.
827 static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) {
828  SDLoc dl(N);
829 
830  // Get 64 bit value.
831  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
832  return getInt64(CurDAG, dl, Imm);
833 }
834 
835 namespace {
836 class BitPermutationSelector {
837  struct ValueBit {
838  SDValue V;
839 
840  // The bit number in the value, using a convention where bit 0 is the
841  // lowest-order bit.
842  unsigned Idx;
843 
844  enum Kind {
845  ConstZero,
846  Variable
847  } K;
848 
849  ValueBit(SDValue V, unsigned I, Kind K = Variable)
850  : V(V), Idx(I), K(K) {}
851  ValueBit(Kind K = Variable)
852  : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
853 
854  bool isZero() const {
855  return K == ConstZero;
856  }
857 
858  bool hasValue() const {
859  return K == Variable;
860  }
861 
862  SDValue getValue() const {
863  assert(hasValue() && "Cannot get the value of a constant bit");
864  return V;
865  }
866 
867  unsigned getValueBitIndex() const {
868  assert(hasValue() && "Cannot get the value bit index of a constant bit");
869  return Idx;
870  }
871  };
872 
873  // A bit group has the same underlying value and the same rotate factor.
874  struct BitGroup {
875  SDValue V;
876  unsigned RLAmt;
877  unsigned StartIdx, EndIdx;
878 
879  // This rotation amount assumes that the lower 32 bits of the quantity are
880  // replicated in the high 32 bits by the rotation operator (which is done
881  // by rlwinm and friends in 64-bit mode).
882  bool Repl32;
883  // Did converting to Repl32 == true change the rotation factor? If it did,
884  // it decreased it by 32.
885  bool Repl32CR;
886  // Was this group coalesced after setting Repl32 to true?
887  bool Repl32Coalesced;
888 
889  BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
890  : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
891  Repl32Coalesced(false) {
892  DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
893  " [" << S << ", " << E << "]\n");
894  }
895  };
896 
897  // Information on each (Value, RLAmt) pair (like the number of groups
898  // associated with each) used to choose the lowering method.
899  struct ValueRotInfo {
900  SDValue V;
901  unsigned RLAmt;
902  unsigned NumGroups;
903  unsigned FirstGroupStartIdx;
904  bool Repl32;
905 
906  ValueRotInfo()
907  : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX),
908  Repl32(false) {}
909 
910  // For sorting (in reverse order) by NumGroups, and then by
911  // FirstGroupStartIdx.
912  bool operator < (const ValueRotInfo &Other) const {
913  // We need to sort so that the non-Repl32 come first because, when we're
914  // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
915  // masking operation.
916  if (Repl32 < Other.Repl32)
917  return true;
918  else if (Repl32 > Other.Repl32)
919  return false;
920  else if (NumGroups > Other.NumGroups)
921  return true;
922  else if (NumGroups < Other.NumGroups)
923  return false;
924  else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
925  return true;
926  return false;
927  }
928  };
929 
930  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
931  using ValueBitsMemoizer =
933  ValueBitsMemoizer Memoizer;
934 
935  // Return a pair of bool and a SmallVector pointer to a memoization entry.
936  // The bool is true if something interesting was deduced, otherwise if we're
937  // providing only a generic representation of V (or something else likewise
938  // uninteresting for instruction selection) through the SmallVector.
939  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
940  unsigned NumBits) {
941  auto &ValueEntry = Memoizer[V];
942  if (ValueEntry)
943  return std::make_pair(ValueEntry->first, &ValueEntry->second);
944  ValueEntry.reset(new ValueBitsMemoizedValue());
945  bool &Interesting = ValueEntry->first;
946  SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
947  Bits.resize(NumBits);
948 
949  switch (V.getOpcode()) {
950  default: break;
951  case ISD::ROTL:
952  if (isa<ConstantSDNode>(V.getOperand(1))) {
953  unsigned RotAmt = V.getConstantOperandVal(1);
954 
955  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
956 
957  for (unsigned i = 0; i < NumBits; ++i)
958  Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
959 
960  return std::make_pair(Interesting = true, &Bits);
961  }
962  break;
963  case ISD::SHL:
964  if (isa<ConstantSDNode>(V.getOperand(1))) {
965  unsigned ShiftAmt = V.getConstantOperandVal(1);
966 
967  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
968 
969  for (unsigned i = ShiftAmt; i < NumBits; ++i)
970  Bits[i] = LHSBits[i - ShiftAmt];
971 
972  for (unsigned i = 0; i < ShiftAmt; ++i)
973  Bits[i] = ValueBit(ValueBit::ConstZero);
974 
975  return std::make_pair(Interesting = true, &Bits);
976  }
977  break;
978  case ISD::SRL:
979  if (isa<ConstantSDNode>(V.getOperand(1))) {
980  unsigned ShiftAmt = V.getConstantOperandVal(1);
981 
982  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
983 
984  for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
985  Bits[i] = LHSBits[i + ShiftAmt];
986 
987  for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
988  Bits[i] = ValueBit(ValueBit::ConstZero);
989 
990  return std::make_pair(Interesting = true, &Bits);
991  }
992  break;
993  case ISD::AND:
994  if (isa<ConstantSDNode>(V.getOperand(1))) {
995  uint64_t Mask = V.getConstantOperandVal(1);
996 
997  const SmallVector<ValueBit, 64> *LHSBits;
998  // Mark this as interesting, only if the LHS was also interesting. This
999  // prevents the overall procedure from matching a single immediate 'and'
1000  // (which is non-optimal because such an and might be folded with other
1001  // things if we don't select it here).
1002  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1003 
1004  for (unsigned i = 0; i < NumBits; ++i)
1005  if (((Mask >> i) & 1) == 1)
1006  Bits[i] = (*LHSBits)[i];
1007  else
1008  Bits[i] = ValueBit(ValueBit::ConstZero);
1009 
1010  return std::make_pair(Interesting, &Bits);
1011  }
1012  break;
1013  case ISD::OR: {
1014  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1015  const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1016 
1017  bool AllDisjoint = true;
1018  for (unsigned i = 0; i < NumBits; ++i)
1019  if (LHSBits[i].isZero())
1020  Bits[i] = RHSBits[i];
1021  else if (RHSBits[i].isZero())
1022  Bits[i] = LHSBits[i];
1023  else {
1024  AllDisjoint = false;
1025  break;
1026  }
1027 
1028  if (!AllDisjoint)
1029  break;
1030 
1031  return std::make_pair(Interesting = true, &Bits);
1032  }
1033  }
1034 
1035  for (unsigned i = 0; i < NumBits; ++i)
1036  Bits[i] = ValueBit(V, i);
1037 
1038  return std::make_pair(Interesting = false, &Bits);
1039  }
1040 
1041  // For each value (except the constant ones), compute the left-rotate amount
1042  // to get it from its original to final position.
1043  void computeRotationAmounts() {
1044  HasZeros = false;
1045  RLAmt.resize(Bits.size());
1046  for (unsigned i = 0; i < Bits.size(); ++i)
1047  if (Bits[i].hasValue()) {
1048  unsigned VBI = Bits[i].getValueBitIndex();
1049  if (i >= VBI)
1050  RLAmt[i] = i - VBI;
1051  else
1052  RLAmt[i] = Bits.size() - (VBI - i);
1053  } else if (Bits[i].isZero()) {
1054  HasZeros = true;
1055  RLAmt[i] = UINT32_MAX;
1056  } else {
1057  llvm_unreachable("Unknown value bit type");
1058  }
1059  }
1060 
1061  // Collect groups of consecutive bits with the same underlying value and
1062  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1063  // they break up groups.
1064  void collectBitGroups(bool LateMask) {
1065  BitGroups.clear();
1066 
1067  unsigned LastRLAmt = RLAmt[0];
1068  SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1069  unsigned LastGroupStartIdx = 0;
1070  for (unsigned i = 1; i < Bits.size(); ++i) {
1071  unsigned ThisRLAmt = RLAmt[i];
1072  SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1073  if (LateMask && !ThisValue) {
1074  ThisValue = LastValue;
1075  ThisRLAmt = LastRLAmt;
1076  // If we're doing late masking, then the first bit group always starts
1077  // at zero (even if the first bits were zero).
1078  if (BitGroups.empty())
1079  LastGroupStartIdx = 0;
1080  }
1081 
1082  // If this bit has the same underlying value and the same rotate factor as
1083  // the last one, then they're part of the same group.
1084  if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1085  continue;
1086 
1087  if (LastValue.getNode())
1088  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1089  i-1));
1090  LastRLAmt = ThisRLAmt;
1091  LastValue = ThisValue;
1092  LastGroupStartIdx = i;
1093  }
1094  if (LastValue.getNode())
1095  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1096  Bits.size()-1));
1097 
1098  if (BitGroups.empty())
1099  return;
1100 
1101  // We might be able to combine the first and last groups.
1102  if (BitGroups.size() > 1) {
1103  // If the first and last groups are the same, then remove the first group
1104  // in favor of the last group, making the ending index of the last group
1105  // equal to the ending index of the to-be-removed first group.
1106  if (BitGroups[0].StartIdx == 0 &&
1107  BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1108  BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1109  BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1110  DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1111  BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1112  BitGroups.erase(BitGroups.begin());
1113  }
1114  }
1115  }
1116 
1117  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1118  // associated with each. If there is a degeneracy, pick the one that occurs
1119  // first (in the final value).
1120  void collectValueRotInfo() {
1121  ValueRots.clear();
1122 
1123  for (auto &BG : BitGroups) {
1124  unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1125  ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1126  VRI.V = BG.V;
1127  VRI.RLAmt = BG.RLAmt;
1128  VRI.Repl32 = BG.Repl32;
1129  VRI.NumGroups += 1;
1130  VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1131  }
1132 
1133  // Now that we've collected the various ValueRotInfo instances, we need to
1134  // sort them.
1135  ValueRotsVec.clear();
1136  for (auto &I : ValueRots) {
1137  ValueRotsVec.push_back(I.second);
1138  }
1139  std::sort(ValueRotsVec.begin(), ValueRotsVec.end());
1140  }
1141 
1142  // In 64-bit mode, rlwinm and friends have a rotation operator that
1143  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1144  // indices of these instructions can only be in the lower 32 bits, so they
1145  // can only represent some 64-bit bit groups. However, when they can be used,
1146  // the 32-bit replication can be used to represent, as a single bit group,
1147  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1148  // groups when possible. Returns true if any of the bit groups were
1149  // converted.
1150  void assignRepl32BitGroups() {
1151  // If we have bits like this:
1152  //
1153  // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1154  // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1155  // Groups: | RLAmt = 8 | RLAmt = 40 |
1156  //
1157  // But, making use of a 32-bit operation that replicates the low-order 32
1158  // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1159  // of 8.
1160 
1161  auto IsAllLow32 = [this](BitGroup & BG) {
1162  if (BG.StartIdx <= BG.EndIdx) {
1163  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1164  if (!Bits[i].hasValue())
1165  continue;
1166  if (Bits[i].getValueBitIndex() >= 32)
1167  return false;
1168  }
1169  } else {
1170  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1171  if (!Bits[i].hasValue())
1172  continue;
1173  if (Bits[i].getValueBitIndex() >= 32)
1174  return false;
1175  }
1176  for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1177  if (!Bits[i].hasValue())
1178  continue;
1179  if (Bits[i].getValueBitIndex() >= 32)
1180  return false;
1181  }
1182  }
1183 
1184  return true;
1185  };
1186 
1187  for (auto &BG : BitGroups) {
1188  if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1189  if (IsAllLow32(BG)) {
1190  if (BG.RLAmt >= 32) {
1191  BG.RLAmt -= 32;
1192  BG.Repl32CR = true;
1193  }
1194 
1195  BG.Repl32 = true;
1196 
1197  DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
1198  BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
1199  " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1200  }
1201  }
1202  }
1203 
1204  // Now walk through the bit groups, consolidating where possible.
1205  for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1206  // We might want to remove this bit group by merging it with the previous
1207  // group (which might be the ending group).
1208  auto IP = (I == BitGroups.begin()) ?
1209  std::prev(BitGroups.end()) : std::prev(I);
1210  if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1211  I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1212 
1213  DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
1214  I->V.getNode() << " RLAmt = " << I->RLAmt <<
1215  " [" << I->StartIdx << ", " << I->EndIdx <<
1216  "] with group with range [" <<
1217  IP->StartIdx << ", " << IP->EndIdx << "]\n");
1218 
1219  IP->EndIdx = I->EndIdx;
1220  IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1221  IP->Repl32Coalesced = true;
1222  I = BitGroups.erase(I);
1223  continue;
1224  } else {
1225  // There is a special case worth handling: If there is a single group
1226  // covering the entire upper 32 bits, and it can be merged with both
1227  // the next and previous groups (which might be the same group), then
1228  // do so. If it is the same group (so there will be only one group in
1229  // total), then we need to reverse the order of the range so that it
1230  // covers the entire 64 bits.
1231  if (I->StartIdx == 32 && I->EndIdx == 63) {
1232  assert(std::next(I) == BitGroups.end() &&
1233  "bit group ends at index 63 but there is another?");
1234  auto IN = BitGroups.begin();
1235 
1236  if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1237  (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1238  IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1239  IsAllLow32(*I)) {
1240 
1241  DEBUG(dbgs() << "\tcombining bit group for " <<
1242  I->V.getNode() << " RLAmt = " << I->RLAmt <<
1243  " [" << I->StartIdx << ", " << I->EndIdx <<
1244  "] with 32-bit replicated groups with ranges [" <<
1245  IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
1246  IN->StartIdx << ", " << IN->EndIdx << "]\n");
1247 
1248  if (IP == IN) {
1249  // There is only one other group; change it to cover the whole
1250  // range (backward, so that it can still be Repl32 but cover the
1251  // whole 64-bit range).
1252  IP->StartIdx = 31;
1253  IP->EndIdx = 30;
1254  IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1255  IP->Repl32Coalesced = true;
1256  I = BitGroups.erase(I);
1257  } else {
1258  // There are two separate groups, one before this group and one
1259  // after us (at the beginning). We're going to remove this group,
1260  // but also the group at the very beginning.
1261  IP->EndIdx = IN->EndIdx;
1262  IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1263  IP->Repl32Coalesced = true;
1264  I = BitGroups.erase(I);
1265  BitGroups.erase(BitGroups.begin());
1266  }
1267 
1268  // This must be the last group in the vector (and we might have
1269  // just invalidated the iterator above), so break here.
1270  break;
1271  }
1272  }
1273  }
1274 
1275  ++I;
1276  }
1277  }
1278 
1279  SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1280  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1281  }
1282 
1283  uint64_t getZerosMask() {
1284  uint64_t Mask = 0;
1285  for (unsigned i = 0; i < Bits.size(); ++i) {
1286  if (Bits[i].hasValue())
1287  continue;
1288  Mask |= (UINT64_C(1) << i);
1289  }
1290 
1291  return ~Mask;
1292  }
1293 
1294  // Depending on the number of groups for a particular value, it might be
1295  // better to rotate, mask explicitly (using andi/andis), and then or the
1296  // result. Select this part of the result first.
1297  void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1299  return;
1300 
1301  for (ValueRotInfo &VRI : ValueRotsVec) {
1302  unsigned Mask = 0;
1303  for (unsigned i = 0; i < Bits.size(); ++i) {
1304  if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1305  continue;
1306  if (RLAmt[i] != VRI.RLAmt)
1307  continue;
1308  Mask |= (1u << i);
1309  }
1310 
1311  // Compute the masks for andi/andis that would be necessary.
1312  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1313  assert((ANDIMask != 0 || ANDISMask != 0) &&
1314  "No set bits in mask for value bit groups");
1315  bool NeedsRotate = VRI.RLAmt != 0;
1316 
1317  // We're trying to minimize the number of instructions. If we have one
1318  // group, using one of andi/andis can break even. If we have three
1319  // groups, we can use both andi and andis and break even (to use both
1320  // andi and andis we also need to or the results together). We need four
1321  // groups if we also need to rotate. To use andi/andis we need to do more
1322  // than break even because rotate-and-mask instructions tend to be easier
1323  // to schedule.
1324 
1325  // FIXME: We've biased here against using andi/andis, which is right for
1326  // POWER cores, but not optimal everywhere. For example, on the A2,
1327  // andi/andis have single-cycle latency whereas the rotate-and-mask
1328  // instructions take two cycles, and it would be better to bias toward
1329  // andi/andis in break-even cases.
1330 
1331  unsigned NumAndInsts = (unsigned) NeedsRotate +
1332  (unsigned) (ANDIMask != 0) +
1333  (unsigned) (ANDISMask != 0) +
1334  (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1335  (unsigned) (bool) Res;
1336 
1337  DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1338  " RL: " << VRI.RLAmt << ":" <<
1339  "\n\t\t\tisel using masking: " << NumAndInsts <<
1340  " using rotates: " << VRI.NumGroups << "\n");
1341 
1342  if (NumAndInsts >= VRI.NumGroups)
1343  continue;
1344 
1345  DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1346 
1347  if (InstCnt) *InstCnt += NumAndInsts;
1348 
1349  SDValue VRot;
1350  if (VRI.RLAmt) {
1351  SDValue Ops[] =
1352  { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1353  getI32Imm(31, dl) };
1354  VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1355  Ops), 0);
1356  } else {
1357  VRot = VRI.V;
1358  }
1359 
1360  SDValue ANDIVal, ANDISVal;
1361  if (ANDIMask != 0)
1362  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1363  VRot, getI32Imm(ANDIMask, dl)), 0);
1364  if (ANDISMask != 0)
1365  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1366  VRot, getI32Imm(ANDISMask, dl)), 0);
1367 
1368  SDValue TotalVal;
1369  if (!ANDIVal)
1370  TotalVal = ANDISVal;
1371  else if (!ANDISVal)
1372  TotalVal = ANDIVal;
1373  else
1374  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1375  ANDIVal, ANDISVal), 0);
1376 
1377  if (!Res)
1378  Res = TotalVal;
1379  else
1380  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1381  Res, TotalVal), 0);
1382 
1383  // Now, remove all groups with this underlying value and rotation
1384  // factor.
1385  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1386  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1387  });
1388  }
1389  }
1390 
1391  // Instruction selection for the 32-bit case.
1392  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1393  SDLoc dl(N);
1394  SDValue Res;
1395 
1396  if (InstCnt) *InstCnt = 0;
1397 
1398  // Take care of cases that should use andi/andis first.
1399  SelectAndParts32(dl, Res, InstCnt);
1400 
1401  // If we've not yet selected a 'starting' instruction, and we have no zeros
1402  // to fill in, select the (Value, RLAmt) with the highest priority (largest
1403  // number of groups), and start with this rotated value.
1404  if ((!HasZeros || LateMask) && !Res) {
1405  ValueRotInfo &VRI = ValueRotsVec[0];
1406  if (VRI.RLAmt) {
1407  if (InstCnt) *InstCnt += 1;
1408  SDValue Ops[] =
1409  { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1410  getI32Imm(31, dl) };
1411  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1412  0);
1413  } else {
1414  Res = VRI.V;
1415  }
1416 
1417  // Now, remove all groups with this underlying value and rotation factor.
1418  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1419  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1420  });
1421  }
1422 
1423  if (InstCnt) *InstCnt += BitGroups.size();
1424 
1425  // Insert the other groups (one at a time).
1426  for (auto &BG : BitGroups) {
1427  if (!Res) {
1428  SDValue Ops[] =
1429  { BG.V, getI32Imm(BG.RLAmt, dl),
1430  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1431  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1432  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1433  } else {
1434  SDValue Ops[] =
1435  { Res, BG.V, getI32Imm(BG.RLAmt, dl),
1436  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1437  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1438  Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1439  }
1440  }
1441 
1442  if (LateMask) {
1443  unsigned Mask = (unsigned) getZerosMask();
1444 
1445  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1446  assert((ANDIMask != 0 || ANDISMask != 0) &&
1447  "No set bits in zeros mask?");
1448 
1449  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1450  (unsigned) (ANDISMask != 0) +
1451  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1452 
1453  SDValue ANDIVal, ANDISVal;
1454  if (ANDIMask != 0)
1455  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1456  Res, getI32Imm(ANDIMask, dl)), 0);
1457  if (ANDISMask != 0)
1458  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1459  Res, getI32Imm(ANDISMask, dl)), 0);
1460 
1461  if (!ANDIVal)
1462  Res = ANDISVal;
1463  else if (!ANDISVal)
1464  Res = ANDIVal;
1465  else
1466  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1467  ANDIVal, ANDISVal), 0);
1468  }
1469 
1470  return Res.getNode();
1471  }
1472 
1473  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1474  unsigned MaskStart, unsigned MaskEnd,
1475  bool IsIns) {
1476  // In the notation used by the instructions, 'start' and 'end' are reversed
1477  // because bits are counted from high to low order.
1478  unsigned InstMaskStart = 64 - MaskEnd - 1,
1479  InstMaskEnd = 64 - MaskStart - 1;
1480 
1481  if (Repl32)
1482  return 1;
1483 
1484  if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1485  InstMaskEnd == 63 - RLAmt)
1486  return 1;
1487 
1488  return 2;
1489  }
1490 
1491  // For 64-bit values, not all combinations of rotates and masks are
1492  // available. Produce one if it is available.
1493  SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
1494  bool Repl32, unsigned MaskStart, unsigned MaskEnd,
1495  unsigned *InstCnt = nullptr) {
1496  // In the notation used by the instructions, 'start' and 'end' are reversed
1497  // because bits are counted from high to low order.
1498  unsigned InstMaskStart = 64 - MaskEnd - 1,
1499  InstMaskEnd = 64 - MaskStart - 1;
1500 
1501  if (InstCnt) *InstCnt += 1;
1502 
1503  if (Repl32) {
1504  // This rotation amount assumes that the lower 32 bits of the quantity
1505  // are replicated in the high 32 bits by the rotation operator (which is
1506  // done by rlwinm and friends).
1507  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1508  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1509  SDValue Ops[] =
1510  { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
1511  getI32Imm(InstMaskEnd - 32, dl) };
1512  return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1513  Ops), 0);
1514  }
1515 
1516  if (InstMaskEnd == 63) {
1517  SDValue Ops[] =
1518  { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
1519  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1520  }
1521 
1522  if (InstMaskStart == 0) {
1523  SDValue Ops[] =
1524  { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) };
1525  return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1526  }
1527 
1528  if (InstMaskEnd == 63 - RLAmt) {
1529  SDValue Ops[] =
1530  { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
1531  return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1532  }
1533 
1534  // We cannot do this with a single instruction, so we'll use two. The
1535  // problem is that we're not free to choose both a rotation amount and mask
1536  // start and end independently. We can choose an arbitrary mask start and
1537  // end, but then the rotation amount is fixed. Rotation, however, can be
1538  // inverted, and so by applying an "inverse" rotation first, we can get the
1539  // desired result.
1540  if (InstCnt) *InstCnt += 1;
1541 
1542  // The rotation mask for the second instruction must be MaskStart.
1543  unsigned RLAmt2 = MaskStart;
1544  // The first instruction must rotate V so that the overall rotation amount
1545  // is RLAmt.
1546  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1547  if (RLAmt1)
1548  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1549  return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
1550  }
1551 
1552  // For 64-bit values, not all combinations of rotates and masks are
1553  // available. Produce a rotate-mask-and-insert if one is available.
1554  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
1555  unsigned RLAmt, bool Repl32, unsigned MaskStart,
1556  unsigned MaskEnd, unsigned *InstCnt = nullptr) {
1557  // In the notation used by the instructions, 'start' and 'end' are reversed
1558  // because bits are counted from high to low order.
1559  unsigned InstMaskStart = 64 - MaskEnd - 1,
1560  InstMaskEnd = 64 - MaskStart - 1;
1561 
1562  if (InstCnt) *InstCnt += 1;
1563 
1564  if (Repl32) {
1565  // This rotation amount assumes that the lower 32 bits of the quantity
1566  // are replicated in the high 32 bits by the rotation operator (which is
1567  // done by rlwinm and friends).
1568  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1569  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1570  SDValue Ops[] =
1571  { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
1572  getI32Imm(InstMaskEnd - 32, dl) };
1573  return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
1574  Ops), 0);
1575  }
1576 
1577  if (InstMaskEnd == 63 - RLAmt) {
1578  SDValue Ops[] =
1579  { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
1580  return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
1581  }
1582 
1583  // We cannot do this with a single instruction, so we'll use two. The
1584  // problem is that we're not free to choose both a rotation amount and mask
1585  // start and end independently. We can choose an arbitrary mask start and
1586  // end, but then the rotation amount is fixed. Rotation, however, can be
1587  // inverted, and so by applying an "inverse" rotation first, we can get the
1588  // desired result.
1589  if (InstCnt) *InstCnt += 1;
1590 
1591  // The rotation mask for the second instruction must be MaskStart.
1592  unsigned RLAmt2 = MaskStart;
1593  // The first instruction must rotate V so that the overall rotation amount
1594  // is RLAmt.
1595  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1596  if (RLAmt1)
1597  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1598  return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
1599  }
1600 
1601  void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1603  return;
1604 
1605  // The idea here is the same as in the 32-bit version, but with additional
1606  // complications from the fact that Repl32 might be true. Because we
1607  // aggressively convert bit groups to Repl32 form (which, for small
1608  // rotation factors, involves no other change), and then coalesce, it might
1609  // be the case that a single 64-bit masking operation could handle both
1610  // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
1611  // form allowed coalescing, then we must use a 32-bit rotaton in order to
1612  // completely capture the new combined bit group.
1613 
1614  for (ValueRotInfo &VRI : ValueRotsVec) {
1615  uint64_t Mask = 0;
1616 
1617  // We need to add to the mask all bits from the associated bit groups.
1618  // If Repl32 is false, we need to add bits from bit groups that have
1619  // Repl32 true, but are trivially convertable to Repl32 false. Such a
1620  // group is trivially convertable if it overlaps only with the lower 32
1621  // bits, and the group has not been coalesced.
1622  auto MatchingBG = [VRI](const BitGroup &BG) {
1623  if (VRI.V != BG.V)
1624  return false;
1625 
1626  unsigned EffRLAmt = BG.RLAmt;
1627  if (!VRI.Repl32 && BG.Repl32) {
1628  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
1629  !BG.Repl32Coalesced) {
1630  if (BG.Repl32CR)
1631  EffRLAmt += 32;
1632  } else {
1633  return false;
1634  }
1635  } else if (VRI.Repl32 != BG.Repl32) {
1636  return false;
1637  }
1638 
1639  return VRI.RLAmt == EffRLAmt;
1640  };
1641 
1642  for (auto &BG : BitGroups) {
1643  if (!MatchingBG(BG))
1644  continue;
1645 
1646  if (BG.StartIdx <= BG.EndIdx) {
1647  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
1648  Mask |= (UINT64_C(1) << i);
1649  } else {
1650  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
1651  Mask |= (UINT64_C(1) << i);
1652  for (unsigned i = 0; i <= BG.EndIdx; ++i)
1653  Mask |= (UINT64_C(1) << i);
1654  }
1655  }
1656 
1657  // We can use the 32-bit andi/andis technique if the mask does not
1658  // require any higher-order bits. This can save an instruction compared
1659  // to always using the general 64-bit technique.
1660  bool Use32BitInsts = isUInt<32>(Mask);
1661  // Compute the masks for andi/andis that would be necessary.
1662  unsigned ANDIMask = (Mask & UINT16_MAX),
1663  ANDISMask = (Mask >> 16) & UINT16_MAX;
1664 
1665  bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
1666 
1667  unsigned NumAndInsts = (unsigned) NeedsRotate +
1668  (unsigned) (bool) Res;
1669  if (Use32BitInsts)
1670  NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
1671  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1672  else
1673  NumAndInsts += getInt64Count(Mask) + /* and */ 1;
1674 
1675  unsigned NumRLInsts = 0;
1676  bool FirstBG = true;
1677  bool MoreBG = false;
1678  for (auto &BG : BitGroups) {
1679  if (!MatchingBG(BG)) {
1680  MoreBG = true;
1681  continue;
1682  }
1683  NumRLInsts +=
1684  SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
1685  !FirstBG);
1686  FirstBG = false;
1687  }
1688 
1689  DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1690  " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
1691  "\n\t\t\tisel using masking: " << NumAndInsts <<
1692  " using rotates: " << NumRLInsts << "\n");
1693 
1694  // When we'd use andi/andis, we bias toward using the rotates (andi only
1695  // has a record form, and is cracked on POWER cores). However, when using
1696  // general 64-bit constant formation, bias toward the constant form,
1697  // because that exposes more opportunities for CSE.
1698  if (NumAndInsts > NumRLInsts)
1699  continue;
1700  // When merging multiple bit groups, instruction or is used.
1701  // But when rotate is used, rldimi can inert the rotated value into any
1702  // register, so instruction or can be avoided.
1703  if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
1704  continue;
1705 
1706  DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1707 
1708  if (InstCnt) *InstCnt += NumAndInsts;
1709 
1710  SDValue VRot;
1711  // We actually need to generate a rotation if we have a non-zero rotation
1712  // factor or, in the Repl32 case, if we care about any of the
1713  // higher-order replicated bits. In the latter case, we generate a mask
1714  // backward so that it actually includes the entire 64 bits.
1715  if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
1716  VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1717  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
1718  else
1719  VRot = VRI.V;
1720 
1721  SDValue TotalVal;
1722  if (Use32BitInsts) {
1723  assert((ANDIMask != 0 || ANDISMask != 0) &&
1724  "No set bits in mask when using 32-bit ands for 64-bit value");
1725 
1726  SDValue ANDIVal, ANDISVal;
1727  if (ANDIMask != 0)
1728  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
1729  VRot, getI32Imm(ANDIMask, dl)), 0);
1730  if (ANDISMask != 0)
1731  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
1732  VRot, getI32Imm(ANDISMask, dl)), 0);
1733 
1734  if (!ANDIVal)
1735  TotalVal = ANDISVal;
1736  else if (!ANDISVal)
1737  TotalVal = ANDIVal;
1738  else
1739  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1740  ANDIVal, ANDISVal), 0);
1741  } else {
1742  TotalVal = SDValue(getInt64(CurDAG, dl, Mask), 0);
1743  TotalVal =
1744  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
1745  VRot, TotalVal), 0);
1746  }
1747 
1748  if (!Res)
1749  Res = TotalVal;
1750  else
1751  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1752  Res, TotalVal), 0);
1753 
1754  // Now, remove all groups with this underlying value and rotation
1755  // factor.
1756  eraseMatchingBitGroups(MatchingBG);
1757  }
1758  }
1759 
1760  // Instruction selection for the 64-bit case.
1761  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
1762  SDLoc dl(N);
1763  SDValue Res;
1764 
1765  if (InstCnt) *InstCnt = 0;
1766 
1767  // Take care of cases that should use andi/andis first.
1768  SelectAndParts64(dl, Res, InstCnt);
1769 
1770  // If we've not yet selected a 'starting' instruction, and we have no zeros
1771  // to fill in, select the (Value, RLAmt) with the highest priority (largest
1772  // number of groups), and start with this rotated value.
1773  if ((!HasZeros || LateMask) && !Res) {
1774  // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
1775  // groups will come first, and so the VRI representing the largest number
1776  // of groups might not be first (it might be the first Repl32 groups).
1777  unsigned MaxGroupsIdx = 0;
1778  if (!ValueRotsVec[0].Repl32) {
1779  for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
1780  if (ValueRotsVec[i].Repl32) {
1781  if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
1782  MaxGroupsIdx = i;
1783  break;
1784  }
1785  }
1786 
1787  ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
1788  bool NeedsRotate = false;
1789  if (VRI.RLAmt) {
1790  NeedsRotate = true;
1791  } else if (VRI.Repl32) {
1792  for (auto &BG : BitGroups) {
1793  if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
1794  BG.Repl32 != VRI.Repl32)
1795  continue;
1796 
1797  // We don't need a rotate if the bit group is confined to the lower
1798  // 32 bits.
1799  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
1800  continue;
1801 
1802  NeedsRotate = true;
1803  break;
1804  }
1805  }
1806 
1807  if (NeedsRotate)
1808  Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1809  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
1810  InstCnt);
1811  else
1812  Res = VRI.V;
1813 
1814  // Now, remove all groups with this underlying value and rotation factor.
1815  if (Res)
1816  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1817  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
1818  BG.Repl32 == VRI.Repl32;
1819  });
1820  }
1821 
1822  // Because 64-bit rotates are more flexible than inserts, we might have a
1823  // preference regarding which one we do first (to save one instruction).
1824  if (!Res)
1825  for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
1826  if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
1827  false) <
1828  SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
1829  true)) {
1830  if (I != BitGroups.begin()) {
1831  BitGroup BG = *I;
1832  BitGroups.erase(I);
1833  BitGroups.insert(BitGroups.begin(), BG);
1834  }
1835 
1836  break;
1837  }
1838  }
1839 
1840  // Insert the other groups (one at a time).
1841  for (auto &BG : BitGroups) {
1842  if (!Res)
1843  Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
1844  BG.EndIdx, InstCnt);
1845  else
1846  Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
1847  BG.StartIdx, BG.EndIdx, InstCnt);
1848  }
1849 
1850  if (LateMask) {
1851  uint64_t Mask = getZerosMask();
1852 
1853  // We can use the 32-bit andi/andis technique if the mask does not
1854  // require any higher-order bits. This can save an instruction compared
1855  // to always using the general 64-bit technique.
1856  bool Use32BitInsts = isUInt<32>(Mask);
1857  // Compute the masks for andi/andis that would be necessary.
1858  unsigned ANDIMask = (Mask & UINT16_MAX),
1859  ANDISMask = (Mask >> 16) & UINT16_MAX;
1860 
1861  if (Use32BitInsts) {
1862  assert((ANDIMask != 0 || ANDISMask != 0) &&
1863  "No set bits in mask when using 32-bit ands for 64-bit value");
1864 
1865  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1866  (unsigned) (ANDISMask != 0) +
1867  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1868 
1869  SDValue ANDIVal, ANDISVal;
1870  if (ANDIMask != 0)
1871  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
1872  Res, getI32Imm(ANDIMask, dl)), 0);
1873  if (ANDISMask != 0)
1874  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
1875  Res, getI32Imm(ANDISMask, dl)), 0);
1876 
1877  if (!ANDIVal)
1878  Res = ANDISVal;
1879  else if (!ANDISVal)
1880  Res = ANDIVal;
1881  else
1882  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1883  ANDIVal, ANDISVal), 0);
1884  } else {
1885  if (InstCnt) *InstCnt += getInt64Count(Mask) + /* and */ 1;
1886 
1887  SDValue MaskVal = SDValue(getInt64(CurDAG, dl, Mask), 0);
1888  Res =
1889  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
1890  Res, MaskVal), 0);
1891  }
1892  }
1893 
1894  return Res.getNode();
1895  }
1896 
1897  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
1898  // Fill in BitGroups.
1899  collectBitGroups(LateMask);
1900  if (BitGroups.empty())
1901  return nullptr;
1902 
1903  // For 64-bit values, figure out when we can use 32-bit instructions.
1904  if (Bits.size() == 64)
1905  assignRepl32BitGroups();
1906 
1907  // Fill in ValueRotsVec.
1908  collectValueRotInfo();
1909 
1910  if (Bits.size() == 32) {
1911  return Select32(N, LateMask, InstCnt);
1912  } else {
1913  assert(Bits.size() == 64 && "Not 64 bits here?");
1914  return Select64(N, LateMask, InstCnt);
1915  }
1916 
1917  return nullptr;
1918  }
1919 
1920  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
1921  BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
1922  }
1923 
1925 
1926  bool HasZeros;
1928 
1929  SmallVector<BitGroup, 16> BitGroups;
1930 
1931  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
1932  SmallVector<ValueRotInfo, 16> ValueRotsVec;
1933 
1934  SelectionDAG *CurDAG;
1935 
1936 public:
1937  BitPermutationSelector(SelectionDAG *DAG)
1938  : CurDAG(DAG) {}
1939 
1940  // Here we try to match complex bit permutations into a set of
1941  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
1942  // known to produce optimial code for common cases (like i32 byte swapping).
1943  SDNode *Select(SDNode *N) {
1944  Memoizer.clear();
1945  auto Result =
1946  getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
1947  if (!Result.first)
1948  return nullptr;
1949  Bits = std::move(*Result.second);
1950 
1951  DEBUG(dbgs() << "Considering bit-permutation-based instruction"
1952  " selection for: ");
1953  DEBUG(N->dump(CurDAG));
1954 
1955  // Fill it RLAmt and set HasZeros.
1956  computeRotationAmounts();
1957 
1958  if (!HasZeros)
1959  return Select(N, false);
1960 
1961  // We currently have two techniques for handling results with zeros: early
1962  // masking (the default) and late masking. Late masking is sometimes more
1963  // efficient, but because the structure of the bit groups is different, it
1964  // is hard to tell without generating both and comparing the results. With
1965  // late masking, we ignore zeros in the resulting value when inserting each
1966  // set of bit groups, and then mask in the zeros at the end. With early
1967  // masking, we only insert the non-zero parts of the result at every step.
1968 
1969  unsigned InstCnt, InstCntLateMask;
1970  DEBUG(dbgs() << "\tEarly masking:\n");
1971  SDNode *RN = Select(N, false, &InstCnt);
1972  DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
1973 
1974  DEBUG(dbgs() << "\tLate masking:\n");
1975  SDNode *RNLM = Select(N, true, &InstCntLateMask);
1976  DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
1977  " instructions\n");
1978 
1979  if (InstCnt <= InstCntLateMask) {
1980  DEBUG(dbgs() << "\tUsing early-masking for isel\n");
1981  return RN;
1982  }
1983 
1984  DEBUG(dbgs() << "\tUsing late-masking for isel\n");
1985  return RNLM;
1986  }
1987 };
1988 } // anonymous namespace
1989 
1990 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
1991  if (N->getValueType(0) != MVT::i32 &&
1992  N->getValueType(0) != MVT::i64)
1993  return false;
1994 
1995  if (!UseBitPermRewriter)
1996  return false;
1997 
1998  switch (N->getOpcode()) {
1999  default: break;
2000  case ISD::ROTL:
2001  case ISD::SHL:
2002  case ISD::SRL:
2003  case ISD::AND:
2004  case ISD::OR: {
2005  BitPermutationSelector BPS(CurDAG);
2006  if (SDNode *New = BPS.Select(N)) {
2007  ReplaceNode(N, New);
2008  return true;
2009  }
2010  return false;
2011  }
2012  }
2013 
2014  return false;
2015 }
2016 
2017 /// SelectCC - Select a comparison of the specified values with the specified
2018 /// condition code, returning the CR# of the expression.
2019 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2020  const SDLoc &dl) {
2021  // Always select the LHS.
2022  unsigned Opc;
2023 
2024  if (LHS.getValueType() == MVT::i32) {
2025  unsigned Imm;
2026  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2027  if (isInt32Immediate(RHS, Imm)) {
2028  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
2029  if (isUInt<16>(Imm))
2030  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
2031  getI32Imm(Imm & 0xFFFF, dl)),
2032  0);
2033  // If this is a 16-bit signed immediate, fold it.
2034  if (isInt<16>((int)Imm))
2035  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
2036  getI32Imm(Imm & 0xFFFF, dl)),
2037  0);
2038 
2039  // For non-equality comparisons, the default code would materialize the
2040  // constant, then compare against it, like this:
2041  // lis r2, 4660
2042  // ori r2, r2, 22136
2043  // cmpw cr0, r3, r2
2044  // Since we are just comparing for equality, we can emit this instead:
2045  // xoris r0,r3,0x1234
2046  // cmplwi cr0,r0,0x5678
2047  // beq cr0,L6
2048  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
2049  getI32Imm(Imm >> 16, dl)), 0);
2050  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
2051  getI32Imm(Imm & 0xFFFF, dl)), 0);
2052  }
2053  Opc = PPC::CMPLW;
2054  } else if (ISD::isUnsignedIntSetCC(CC)) {
2055  if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
2056  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
2057  getI32Imm(Imm & 0xFFFF, dl)), 0);
2058  Opc = PPC::CMPLW;
2059  } else {
2060  short SImm;
2061  if (isIntS16Immediate(RHS, SImm))
2062  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
2063  getI32Imm((int)SImm & 0xFFFF,
2064  dl)),
2065  0);
2066  Opc = PPC::CMPW;
2067  }
2068  } else if (LHS.getValueType() == MVT::i64) {
2069  uint64_t Imm;
2070  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2071  if (isInt64Immediate(RHS.getNode(), Imm)) {
2072  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
2073  if (isUInt<16>(Imm))
2074  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
2075  getI32Imm(Imm & 0xFFFF, dl)),
2076  0);
2077  // If this is a 16-bit signed immediate, fold it.
2078  if (isInt<16>(Imm))
2079  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
2080  getI32Imm(Imm & 0xFFFF, dl)),
2081  0);
2082 
2083  // For non-equality comparisons, the default code would materialize the
2084  // constant, then compare against it, like this:
2085  // lis r2, 4660
2086  // ori r2, r2, 22136
2087  // cmpd cr0, r3, r2
2088  // Since we are just comparing for equality, we can emit this instead:
2089  // xoris r0,r3,0x1234
2090  // cmpldi cr0,r0,0x5678
2091  // beq cr0,L6
2092  if (isUInt<32>(Imm)) {
2093  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
2094  getI64Imm(Imm >> 16, dl)), 0);
2095  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
2096  getI64Imm(Imm & 0xFFFF, dl)),
2097  0);
2098  }
2099  }
2100  Opc = PPC::CMPLD;
2101  } else if (ISD::isUnsignedIntSetCC(CC)) {
2102  if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
2103  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
2104  getI64Imm(Imm & 0xFFFF, dl)), 0);
2105  Opc = PPC::CMPLD;
2106  } else {
2107  short SImm;
2108  if (isIntS16Immediate(RHS, SImm))
2109  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
2110  getI64Imm(SImm & 0xFFFF, dl)),
2111  0);
2112  Opc = PPC::CMPD;
2113  }
2114  } else if (LHS.getValueType() == MVT::f32) {
2115  Opc = PPC::FCMPUS;
2116  } else {
2117  assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
2118  Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
2119  }
2120  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
2121 }
2122 
2124  switch (CC) {
2125  case ISD::SETUEQ:
2126  case ISD::SETONE:
2127  case ISD::SETOLE:
2128  case ISD::SETOGE:
2129  llvm_unreachable("Should be lowered by legalize!");
2130  default: llvm_unreachable("Unknown condition!");
2131  case ISD::SETOEQ:
2132  case ISD::SETEQ: return PPC::PRED_EQ;
2133  case ISD::SETUNE:
2134  case ISD::SETNE: return PPC::PRED_NE;
2135  case ISD::SETOLT:
2136  case ISD::SETLT: return PPC::PRED_LT;
2137  case ISD::SETULE:
2138  case ISD::SETLE: return PPC::PRED_LE;
2139  case ISD::SETOGT:
2140  case ISD::SETGT: return PPC::PRED_GT;
2141  case ISD::SETUGE:
2142  case ISD::SETGE: return PPC::PRED_GE;
2143  case ISD::SETO: return PPC::PRED_NU;
2144  case ISD::SETUO: return PPC::PRED_UN;
2145  // These two are invalid for floating point. Assume we have int.
2146  case ISD::SETULT: return PPC::PRED_LT;
2147  case ISD::SETUGT: return PPC::PRED_GT;
2148  }
2149 }
2150 
2151 /// getCRIdxForSetCC - Return the index of the condition register field
2152 /// associated with the SetCC condition, and whether or not the field is
2153 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
2154 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
2155  Invert = false;
2156  switch (CC) {
2157  default: llvm_unreachable("Unknown condition!");
2158  case ISD::SETOLT:
2159  case ISD::SETLT: return 0; // Bit #0 = SETOLT
2160  case ISD::SETOGT:
2161  case ISD::SETGT: return 1; // Bit #1 = SETOGT
2162  case ISD::SETOEQ:
2163  case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
2164  case ISD::SETUO: return 3; // Bit #3 = SETUO
2165  case ISD::SETUGE:
2166  case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
2167  case ISD::SETULE:
2168  case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
2169  case ISD::SETUNE:
2170  case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
2171  case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
2172  case ISD::SETUEQ:
2173  case ISD::SETOGE:
2174  case ISD::SETOLE:
2175  case ISD::SETONE:
2176  llvm_unreachable("Invalid branch code: should be expanded by legalize");
2177  // These are invalid for floating point. Assume integer.
2178  case ISD::SETULT: return 0;
2179  case ISD::SETUGT: return 1;
2180  }
2181 }
2182 
2183 // getVCmpInst: return the vector compare instruction for the specified
2184 // vector type and condition code. Since this is for altivec specific code,
2185 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
2186 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
2187  bool HasVSX, bool &Swap, bool &Negate) {
2188  Swap = false;
2189  Negate = false;
2190 
2191  if (VecVT.isFloatingPoint()) {
2192  /* Handle some cases by swapping input operands. */
2193  switch (CC) {
2194  case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
2195  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
2196  case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
2197  case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
2198  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
2199  case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
2200  default: break;
2201  }
2202  /* Handle some cases by negating the result. */
2203  switch (CC) {
2204  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
2205  case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
2206  case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
2207  case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
2208  default: break;
2209  }
2210  /* We have instructions implementing the remaining cases. */
2211  switch (CC) {
2212  case ISD::SETEQ:
2213  case ISD::SETOEQ:
2214  if (VecVT == MVT::v4f32)
2215  return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
2216  else if (VecVT == MVT::v2f64)
2217  return PPC::XVCMPEQDP;
2218  break;
2219  case ISD::SETGT:
2220  case ISD::SETOGT:
2221  if (VecVT == MVT::v4f32)
2222  return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
2223  else if (VecVT == MVT::v2f64)
2224  return PPC::XVCMPGTDP;
2225  break;
2226  case ISD::SETGE:
2227  case ISD::SETOGE:
2228  if (VecVT == MVT::v4f32)
2229  return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
2230  else if (VecVT == MVT::v2f64)
2231  return PPC::XVCMPGEDP;
2232  break;
2233  default:
2234  break;
2235  }
2236  llvm_unreachable("Invalid floating-point vector compare condition");
2237  } else {
2238  /* Handle some cases by swapping input operands. */
2239  switch (CC) {
2240  case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
2241  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
2242  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
2243  case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
2244  default: break;
2245  }
2246  /* Handle some cases by negating the result. */
2247  switch (CC) {
2248  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
2249  case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
2250  case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
2251  case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
2252  default: break;
2253  }
2254  /* We have instructions implementing the remaining cases. */
2255  switch (CC) {
2256  case ISD::SETEQ:
2257  case ISD::SETUEQ:
2258  if (VecVT == MVT::v16i8)
2259  return PPC::VCMPEQUB;
2260  else if (VecVT == MVT::v8i16)
2261  return PPC::VCMPEQUH;
2262  else if (VecVT == MVT::v4i32)
2263  return PPC::VCMPEQUW;
2264  else if (VecVT == MVT::v2i64)
2265  return PPC::VCMPEQUD;
2266  break;
2267  case ISD::SETGT:
2268  if (VecVT == MVT::v16i8)
2269  return PPC::VCMPGTSB;
2270  else if (VecVT == MVT::v8i16)
2271  return PPC::VCMPGTSH;
2272  else if (VecVT == MVT::v4i32)
2273  return PPC::VCMPGTSW;
2274  else if (VecVT == MVT::v2i64)
2275  return PPC::VCMPGTSD;
2276  break;
2277  case ISD::SETUGT:
2278  if (VecVT == MVT::v16i8)
2279  return PPC::VCMPGTUB;
2280  else if (VecVT == MVT::v8i16)
2281  return PPC::VCMPGTUH;
2282  else if (VecVT == MVT::v4i32)
2283  return PPC::VCMPGTUW;
2284  else if (VecVT == MVT::v2i64)
2285  return PPC::VCMPGTUD;
2286  break;
2287  default:
2288  break;
2289  }
2290  llvm_unreachable("Invalid integer vector compare condition");
2291  }
2292 }
2293 
2294 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
2295  SDLoc dl(N);
2296  unsigned Imm;
2297  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2298  EVT PtrVT =
2299  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
2300  bool isPPC64 = (PtrVT == MVT::i64);
2301 
2302  if (!PPCSubTarget->useCRBits() &&
2303  isInt32Immediate(N->getOperand(1), Imm)) {
2304  // We can codegen setcc op, imm very efficiently compared to a brcond.
2305  // Check for those cases here.
2306  // setcc op, 0
2307  if (Imm == 0) {
2308  SDValue Op = N->getOperand(0);
2309  switch (CC) {
2310  default: break;
2311  case ISD::SETEQ: {
2312  Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
2313  SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
2314  getI32Imm(31, dl) };
2315  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2316  return true;
2317  }
2318  case ISD::SETNE: {
2319  if (isPPC64) break;
2320  SDValue AD =
2321  SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2322  Op, getI32Imm(~0U, dl)), 0);
2323  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
2324  return true;
2325  }
2326  case ISD::SETLT: {
2327  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
2328  getI32Imm(31, dl) };
2329  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2330  return true;
2331  }
2332  case ISD::SETGT: {
2333  SDValue T =
2334  SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
2335  T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
2336  SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
2337  getI32Imm(31, dl) };
2338  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2339  return true;
2340  }
2341  }
2342  } else if (Imm == ~0U) { // setcc op, -1
2343  SDValue Op = N->getOperand(0);
2344  switch (CC) {
2345  default: break;
2346  case ISD::SETEQ:
2347  if (isPPC64) break;
2348  Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2349  Op, getI32Imm(1, dl)), 0);
2350  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
2351  SDValue(CurDAG->getMachineNode(PPC::LI, dl,
2352  MVT::i32,
2353  getI32Imm(0, dl)),
2354  0), Op.getValue(1));
2355  return true;
2356  case ISD::SETNE: {
2357  if (isPPC64) break;
2358  Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
2359  SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2360  Op, getI32Imm(~0U, dl));
2361  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
2362  SDValue(AD, 1));
2363  return true;
2364  }
2365  case ISD::SETLT: {
2366  SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
2367  getI32Imm(1, dl)), 0);
2368  SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
2369  Op), 0);
2370  SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
2371  getI32Imm(31, dl) };
2372  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2373  return true;
2374  }
2375  case ISD::SETGT: {
2376  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
2377  getI32Imm(31, dl) };
2378  Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2379  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
2380  return true;
2381  }
2382  }
2383  }
2384  }
2385 
2386  SDValue LHS = N->getOperand(0);
2387  SDValue RHS = N->getOperand(1);
2388 
2389  // Altivec Vector compare instructions do not set any CR register by default and
2390  // vector compare operations return the same type as the operands.
2391  if (LHS.getValueType().isVector()) {
2392  if (PPCSubTarget->hasQPX())
2393  return false;
2394 
2395  EVT VecVT = LHS.getValueType();
2396  bool Swap, Negate;
2397  unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
2398  PPCSubTarget->hasVSX(), Swap, Negate);
2399  if (Swap)
2400  std::swap(LHS, RHS);
2401 
2402  EVT ResVT = VecVT.changeVectorElementTypeToInteger();
2403  if (Negate) {
2404  SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
2405  CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
2406  ResVT, VCmp, VCmp);
2407  return true;
2408  }
2409 
2410  CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
2411  return true;
2412  }
2413 
2414  if (PPCSubTarget->useCRBits())
2415  return false;
2416 
2417  bool Inv;
2418  unsigned Idx = getCRIdxForSetCC(CC, Inv);
2419  SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
2420  SDValue IntCR;
2421 
2422  // Force the ccreg into CR7.
2423  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
2424 
2425  SDValue InFlag(nullptr, 0); // Null incoming flag value.
2426  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
2427  InFlag).getValue(1);
2428 
2429  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
2430  CCReg), 0);
2431 
2432  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
2433  getI32Imm(31, dl), getI32Imm(31, dl) };
2434  if (!Inv) {
2435  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2436  return true;
2437  }
2438 
2439  // Get the specified bit.
2440  SDValue Tmp =
2441  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2442  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
2443  return true;
2444 }
2445 
2446 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
2447  // Transfer memoperands.
2449  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2450  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
2451 }
2452 
2453 
2454 // Select - Convert the specified operand from a target-independent to a
2455 // target-specific node if it hasn't already been changed.
2457  SDLoc dl(N);
2458  if (N->isMachineOpcode()) {
2459  N->setNodeId(-1);
2460  return; // Already selected.
2461  }
2462 
2463  // In case any misguided DAG-level optimizations form an ADD with a
2464  // TargetConstant operand, crash here instead of miscompiling (by selecting
2465  // an r+r add instead of some kind of r+i add).
2466  if (N->getOpcode() == ISD::ADD &&
2468  llvm_unreachable("Invalid ADD with TargetConstant operand");
2469 
2470  // Try matching complex bit permutations before doing anything else.
2471  if (tryBitPermutation(N))
2472  return;
2473 
2474  switch (N->getOpcode()) {
2475  default: break;
2476 
2477  case ISD::Constant: {
2478  if (N->getValueType(0) == MVT::i64) {
2479  ReplaceNode(N, getInt64(CurDAG, N));
2480  return;
2481  }
2482  break;
2483  }
2484 
2485  case ISD::SETCC: {
2486  if (trySETCC(N))
2487  return;
2488  break;
2489  }
2490  case PPCISD::GlobalBaseReg:
2491  ReplaceNode(N, getGlobalBaseReg());
2492  return;
2493 
2494  case ISD::FrameIndex:
2495  selectFrameIndex(N, N);
2496  return;
2497 
2498  case PPCISD::MFOCRF: {
2499  SDValue InFlag = N->getOperand(1);
2500  ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
2501  N->getOperand(0), InFlag));
2502  return;
2503  }
2504 
2505  case PPCISD::READ_TIME_BASE: {
2506  ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
2507  MVT::Other, N->getOperand(0)));
2508  return;
2509  }
2510 
2511  case PPCISD::SRA_ADDZE: {
2512  SDValue N0 = N->getOperand(0);
2513  SDValue ShiftAmt =
2514  CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
2515  getConstantIntValue(), dl,
2516  N->getValueType(0));
2517  if (N->getValueType(0) == MVT::i64) {
2518  SDNode *Op =
2519  CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
2520  N0, ShiftAmt);
2521  CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
2522  SDValue(Op, 1));
2523  return;
2524  } else {
2525  assert(N->getValueType(0) == MVT::i32 &&
2526  "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
2527  SDNode *Op =
2528  CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
2529  N0, ShiftAmt);
2530  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
2531  SDValue(Op, 1));
2532  return;
2533  }
2534  }
2535 
2536  case ISD::LOAD: {
2537  // Handle preincrement loads.
2538  LoadSDNode *LD = cast<LoadSDNode>(N);
2539  EVT LoadedVT = LD->getMemoryVT();
2540 
2541  // Normal loads are handled by code generated from the .td file.
2542  if (LD->getAddressingMode() != ISD::PRE_INC)
2543  break;
2544 
2545  SDValue Offset = LD->getOffset();
2546  if (Offset.getOpcode() == ISD::TargetConstant ||
2547  Offset.getOpcode() == ISD::TargetGlobalAddress) {
2548 
2549  unsigned Opcode;
2550  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
2551  if (LD->getValueType(0) != MVT::i64) {
2552  // Handle PPC32 integer and normal FP loads.
2553  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2554  switch (LoadedVT.getSimpleVT().SimpleTy) {
2555  default: llvm_unreachable("Invalid PPC load type!");
2556  case MVT::f64: Opcode = PPC::LFDU; break;
2557  case MVT::f32: Opcode = PPC::LFSU; break;
2558  case MVT::i32: Opcode = PPC::LWZU; break;
2559  case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
2560  case MVT::i1:
2561  case MVT::i8: Opcode = PPC::LBZU; break;
2562  }
2563  } else {
2564  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
2565  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2566  switch (LoadedVT.getSimpleVT().SimpleTy) {
2567  default: llvm_unreachable("Invalid PPC load type!");
2568  case MVT::i64: Opcode = PPC::LDU; break;
2569  case MVT::i32: Opcode = PPC::LWZU8; break;
2570  case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
2571  case MVT::i1:
2572  case MVT::i8: Opcode = PPC::LBZU8; break;
2573  }
2574  }
2575 
2576  SDValue Chain = LD->getChain();
2577  SDValue Base = LD->getBasePtr();
2578  SDValue Ops[] = { Offset, Base, Chain };
2579  SDNode *MN = CurDAG->getMachineNode(
2580  Opcode, dl, LD->getValueType(0),
2581  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
2582  transferMemOperands(N, MN);
2583  ReplaceNode(N, MN);
2584  return;
2585  } else {
2586  unsigned Opcode;
2587  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
2588  if (LD->getValueType(0) != MVT::i64) {
2589  // Handle PPC32 integer and normal FP loads.
2590  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2591  switch (LoadedVT.getSimpleVT().SimpleTy) {
2592  default: llvm_unreachable("Invalid PPC load type!");
2593  case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
2594  case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
2595  case MVT::f64: Opcode = PPC::LFDUX; break;
2596  case MVT::f32: Opcode = PPC::LFSUX; break;
2597  case MVT::i32: Opcode = PPC::LWZUX; break;
2598  case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
2599  case MVT::i1:
2600  case MVT::i8: Opcode = PPC::LBZUX; break;
2601  }
2602  } else {
2603  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
2604  assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
2605  "Invalid sext update load");
2606  switch (LoadedVT.getSimpleVT().SimpleTy) {
2607  default: llvm_unreachable("Invalid PPC load type!");
2608  case MVT::i64: Opcode = PPC::LDUX; break;
2609  case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
2610  case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
2611  case MVT::i1:
2612  case MVT::i8: Opcode = PPC::LBZUX8; break;
2613  }
2614  }
2615 
2616  SDValue Chain = LD->getChain();
2617  SDValue Base = LD->getBasePtr();
2618  SDValue Ops[] = { Base, Offset, Chain };
2619  SDNode *MN = CurDAG->getMachineNode(
2620  Opcode, dl, LD->getValueType(0),
2621  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
2622  transferMemOperands(N, MN);
2623  ReplaceNode(N, MN);
2624  return;
2625  }
2626  }
2627 
2628  case ISD::AND: {
2629  unsigned Imm, Imm2, SH, MB, ME;
2630  uint64_t Imm64;
2631 
2632  // If this is an and of a value rotated between 0 and 31 bits and then and'd
2633  // with a mask, emit rlwinm
2634  if (isInt32Immediate(N->getOperand(1), Imm) &&
2635  isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
2636  SDValue Val = N->getOperand(0).getOperand(0);
2637  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
2638  getI32Imm(ME, dl) };
2639  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2640  return;
2641  }
2642  // If this is just a masked value where the input is not handled above, and
2643  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
2644  if (isInt32Immediate(N->getOperand(1), Imm) &&
2645  isRunOfOnes(Imm, MB, ME) &&
2646  N->getOperand(0).getOpcode() != ISD::ROTL) {
2647  SDValue Val = N->getOperand(0);
2648  SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
2649  getI32Imm(ME, dl) };
2650  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2651  return;
2652  }
2653  // If this is a 64-bit zero-extension mask, emit rldicl.
2654  if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
2655  isMask_64(Imm64)) {
2656  SDValue Val = N->getOperand(0);
2657  MB = 64 - countTrailingOnes(Imm64);
2658  SH = 0;
2659 
2660  if (Val.getOpcode() == ISD::ANY_EXTEND) {
2661  auto Op0 = Val.getOperand(0);
2662  if ( Op0.getOpcode() == ISD::SRL &&
2663  isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
2664 
2665  auto ResultType = Val.getNode()->getValueType(0);
2666  auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2667  ResultType);
2668  SDValue IDVal (ImDef, 0);
2669 
2670  Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2671  ResultType, IDVal, Op0.getOperand(0),
2672  getI32Imm(1, dl)), 0);
2673  SH = 64 - Imm;
2674  }
2675  }
2676 
2677  // If the operand is a logical right shift, we can fold it into this
2678  // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
2679  // for n <= mb. The right shift is really a left rotate followed by a
2680  // mask, and this mask is a more-restrictive sub-mask of the mask implied
2681  // by the shift.
2682  if (Val.getOpcode() == ISD::SRL &&
2683  isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
2684  assert(Imm < 64 && "Illegal shift amount");
2685  Val = Val.getOperand(0);
2686  SH = 64 - Imm;
2687  }
2688 
2689  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
2690  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
2691  return;
2692  }
2693  // AND X, 0 -> 0, not "rlwinm 32".
2694  if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
2695  ReplaceUses(SDValue(N, 0), N->getOperand(1));
2696  return;
2697  }
2698  // ISD::OR doesn't get all the bitfield insertion fun.
2699  // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
2700  // bitfield insert.
2701  if (isInt32Immediate(N->getOperand(1), Imm) &&
2702  N->getOperand(0).getOpcode() == ISD::OR &&
2703  isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
2704  // The idea here is to check whether this is equivalent to:
2705  // (c1 & m) | (x & ~m)
2706  // where m is a run-of-ones mask. The logic here is that, for each bit in
2707  // c1 and c2:
2708  // - if both are 1, then the output will be 1.
2709  // - if both are 0, then the output will be 0.
2710  // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
2711  // come from x.
2712  // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
2713  // be 0.
2714  // If that last condition is never the case, then we can form m from the
2715  // bits that are the same between c1 and c2.
2716  unsigned MB, ME;
2717  if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
2718  SDValue Ops[] = { N->getOperand(0).getOperand(0),
2719  N->getOperand(0).getOperand(1),
2720  getI32Imm(0, dl), getI32Imm(MB, dl),
2721  getI32Imm(ME, dl) };
2722  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
2723  return;
2724  }
2725  }
2726 
2727  // Other cases are autogenerated.
2728  break;
2729  }
2730  case ISD::OR: {
2731  if (N->getValueType(0) == MVT::i32)
2732  if (tryBitfieldInsert(N))
2733  return;
2734 
2735  short Imm;
2736  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
2737  isIntS16Immediate(N->getOperand(1), Imm)) {
2738  APInt LHSKnownZero, LHSKnownOne;
2739  CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne);
2740 
2741  // If this is equivalent to an add, then we can fold it with the
2742  // FrameIndex calculation.
2743  if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
2744  selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
2745  return;
2746  }
2747  }
2748 
2749  // Other cases are autogenerated.
2750  break;
2751  }
2752  case ISD::ADD: {
2753  short Imm;
2754  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
2755  isIntS16Immediate(N->getOperand(1), Imm)) {
2756  selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
2757  return;
2758  }
2759 
2760  break;
2761  }
2762  case ISD::SHL: {
2763  unsigned Imm, SH, MB, ME;
2764  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
2765  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
2766  SDValue Ops[] = { N->getOperand(0).getOperand(0),
2767  getI32Imm(SH, dl), getI32Imm(MB, dl),
2768  getI32Imm(ME, dl) };
2769  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2770  return;
2771  }
2772 
2773  // Other cases are autogenerated.
2774  break;
2775  }
2776  case ISD::SRL: {
2777  unsigned Imm, SH, MB, ME;
2778  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
2779  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
2780  SDValue Ops[] = { N->getOperand(0).getOperand(0),
2781  getI32Imm(SH, dl), getI32Imm(MB, dl),
2782  getI32Imm(ME, dl) };
2783  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2784  return;
2785  }
2786 
2787  // Other cases are autogenerated.
2788  break;
2789  }
2790  // FIXME: Remove this once the ANDI glue bug is fixed:
2792  case PPCISD::ANDIo_1_GT_BIT: {
2793  if (!ANDIGlueBug)
2794  break;
2795 
2796  EVT InVT = N->getOperand(0).getValueType();
2797  assert((InVT == MVT::i64 || InVT == MVT::i32) &&
2798  "Invalid input type for ANDIo_1_EQ_BIT");
2799 
2800  unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
2801  SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
2802  N->getOperand(0),
2803  CurDAG->getTargetConstant(1, dl, InVT)),
2804  0);
2805  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2806  SDValue SRIdxVal =
2808  PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
2809 
2810  CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
2811  SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
2812  return;
2813  }
2814  case ISD::SELECT_CC: {
2815  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2816  EVT PtrVT =
2817  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
2818  bool isPPC64 = (PtrVT == MVT::i64);
2819 
2820  // If this is a select of i1 operands, we'll pattern match it.
2821  if (PPCSubTarget->useCRBits() &&
2822  N->getOperand(0).getValueType() == MVT::i1)
2823  break;
2824 
2825  // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
2826  if (!isPPC64)
2827  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2828  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2829  if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
2830  if (N1C->isNullValue() && N3C->isNullValue() &&
2831  N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
2832  // FIXME: Implement this optzn for PPC64.
2833  N->getValueType(0) == MVT::i32) {
2834  SDNode *Tmp =
2835  CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2836  N->getOperand(0), getI32Imm(~0U, dl));
2837  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
2838  N->getOperand(0), SDValue(Tmp, 1));
2839  return;
2840  }
2841 
2842  SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
2843 
2844  if (N->getValueType(0) == MVT::i1) {
2845  // An i1 select is: (c & t) | (!c & f).
2846  bool Inv;
2847  unsigned Idx = getCRIdxForSetCC(CC, Inv);
2848 
2849  unsigned SRI;
2850  switch (Idx) {
2851  default: llvm_unreachable("Invalid CC index");
2852  case 0: SRI = PPC::sub_lt; break;
2853  case 1: SRI = PPC::sub_gt; break;
2854  case 2: SRI = PPC::sub_eq; break;
2855  case 3: SRI = PPC::sub_un; break;
2856  }
2857 
2858  SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
2859 
2860  SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
2861  CCBit, CCBit), 0);
2862  SDValue C = Inv ? NotCCBit : CCBit,
2863  NotC = Inv ? CCBit : NotCCBit;
2864 
2865  SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
2866  C, N->getOperand(2)), 0);
2867  SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
2868  NotC, N->getOperand(3)), 0);
2869 
2870  CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
2871  return;
2872  }
2873 
2874  unsigned BROpc = getPredicateForSetCC(CC);
2875 
2876  unsigned SelectCCOp;
2877  if (N->getValueType(0) == MVT::i32)
2878  SelectCCOp = PPC::SELECT_CC_I4;
2879  else if (N->getValueType(0) == MVT::i64)
2880  SelectCCOp = PPC::SELECT_CC_I8;
2881  else if (N->getValueType(0) == MVT::f32)
2882  if (PPCSubTarget->hasP8Vector())
2883  SelectCCOp = PPC::SELECT_CC_VSSRC;
2884  else
2885  SelectCCOp = PPC::SELECT_CC_F4;
2886  else if (N->getValueType(0) == MVT::f64)
2887  if (PPCSubTarget->hasVSX())
2888  SelectCCOp = PPC::SELECT_CC_VSFRC;
2889  else
2890  SelectCCOp = PPC::SELECT_CC_F8;
2891  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
2892  SelectCCOp = PPC::SELECT_CC_QFRC;
2893  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
2894  SelectCCOp = PPC::SELECT_CC_QSRC;
2895  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
2896  SelectCCOp = PPC::SELECT_CC_QBRC;
2897  else if (N->getValueType(0) == MVT::v2f64 ||
2898  N->getValueType(0) == MVT::v2i64)
2899  SelectCCOp = PPC::SELECT_CC_VSRC;
2900  else
2901  SelectCCOp = PPC::SELECT_CC_VRRC;
2902 
2903  SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
2904  getI32Imm(BROpc, dl) };
2905  CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
2906  return;
2907  }
2908  case ISD::VSELECT:
2909  if (PPCSubTarget->hasVSX()) {
2910  SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
2911  CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
2912  return;
2913  }
2914 
2915  break;
2916  case ISD::VECTOR_SHUFFLE:
2917  if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
2918  N->getValueType(0) == MVT::v2i64)) {
2919  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
2920 
2921  SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
2922  Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
2923  unsigned DM[2];
2924 
2925  for (int i = 0; i < 2; ++i)
2926  if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
2927  DM[i] = 0;
2928  else
2929  DM[i] = 1;
2930 
2931  if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
2932  Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
2933  isa<LoadSDNode>(Op1.getOperand(0))) {
2934  LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
2935  SDValue Base, Offset;
2936 
2937  if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
2938  (LD->getMemoryVT() == MVT::f64 ||
2939  LD->getMemoryVT() == MVT::i64) &&
2940  SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
2941  SDValue Chain = LD->getChain();
2942  SDValue Ops[] = { Base, Offset, Chain };
2943  CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops);
2944  return;
2945  }
2946  }
2947 
2948  // For little endian, we must swap the input operands and adjust
2949  // the mask elements (reverse and invert them).
2950  if (PPCSubTarget->isLittleEndian()) {
2951  std::swap(Op1, Op2);
2952  unsigned tmp = DM[0];
2953  DM[0] = 1 - DM[1];
2954  DM[1] = 1 - tmp;
2955  }
2956 
2957  SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
2958  MVT::i32);
2959  SDValue Ops[] = { Op1, Op2, DMV };
2960  CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
2961  return;
2962  }
2963 
2964  break;
2965  case PPCISD::BDNZ:
2966  case PPCISD::BDZ: {
2967  bool IsPPC64 = PPCSubTarget->isPPC64();
2968  SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
2969  CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
2970  ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2971  : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
2972  MVT::Other, Ops);
2973  return;
2974  }
2975  case PPCISD::COND_BRANCH: {
2976  // Op #0 is the Chain.
2977  // Op #1 is the PPC::PRED_* number.
2978  // Op #2 is the CR#
2979  // Op #3 is the Dest MBB
2980  // Op #4 is the Flag.
2981  // Prevent PPC::PRED_* from being selected into LI.
2982  unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2983  if (EnableBranchHint)
2984  PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
2985 
2986  SDValue Pred = getI32Imm(PCC, dl);
2987  SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
2988  N->getOperand(0), N->getOperand(4) };
2989  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
2990  return;
2991  }
2992  case ISD::BR_CC: {
2993  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
2994  unsigned PCC = getPredicateForSetCC(CC);
2995 
2996  if (N->getOperand(2).getValueType() == MVT::i1) {
2997  unsigned Opc;
2998  bool Swap;
2999  switch (PCC) {
3000  default: llvm_unreachable("Unexpected Boolean-operand predicate");
3001  case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
3002  case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
3003  case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
3004  case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
3005  case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
3006  case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
3007  }
3008 
3009  SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
3010  N->getOperand(Swap ? 3 : 2),
3011  N->getOperand(Swap ? 2 : 3)), 0);
3012  CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
3013  N->getOperand(0));
3014  return;
3015  }
3016 
3017  if (EnableBranchHint)
3018  PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
3019 
3020  SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
3021  SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
3022  N->getOperand(4), N->getOperand(0) };
3023  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
3024  return;
3025  }
3026  case ISD::BRIND: {
3027  // FIXME: Should custom lower this.
3028  SDValue Chain = N->getOperand(0);
3029  SDValue Target = N->getOperand(1);
3030  unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
3031  unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
3032  Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
3033  Chain), 0);
3034  CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
3035  return;
3036  }
3037  case PPCISD::TOC_ENTRY: {
3038  assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
3039  "Only supported for 64-bit ABI and 32-bit SVR4");
3040  if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
3041  SDValue GA = N->getOperand(0);
3042  SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
3043  N->getOperand(1));
3044  transferMemOperands(N, MN);
3045  ReplaceNode(N, MN);
3046  return;
3047  }
3048 
3049  // For medium and large code model, we generate two instructions as
3050  // described below. Otherwise we allow SelectCodeCommon to handle this,
3051  // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
3052  CodeModel::Model CModel = TM.getCodeModel();
3053  if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
3054  break;
3055 
3056  // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
3057  // If it must be toc-referenced according to PPCSubTarget, we generate:
3058  // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
3059  // Otherwise we generate:
3060  // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
3061  SDValue GA = N->getOperand(0);
3062  SDValue TOCbase = N->getOperand(1);
3063  SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
3064  TOCbase, GA);
3065 
3066  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
3067  CModel == CodeModel::Large) {
3068  SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
3069  SDValue(Tmp, 0));
3070  transferMemOperands(N, MN);
3071  ReplaceNode(N, MN);
3072  return;
3073  }
3074 
3075  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
3076  const GlobalValue *GV = G->getGlobal();
3077  unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
3078  if (GVFlags & PPCII::MO_NLP_FLAG) {
3079  SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
3080  SDValue(Tmp, 0));
3081  transferMemOperands(N, MN);
3082  ReplaceNode(N, MN);
3083  return;
3084  }
3085  }
3086 
3087  ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
3088  SDValue(Tmp, 0), GA));
3089  return;
3090  }
3091  case PPCISD::PPC32_PICGOT: {
3092  // Generate a PIC-safe GOT reference.
3093  assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
3094  "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
3095  CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
3096  PPCLowering->getPointerTy(CurDAG->getDataLayout()),
3097  MVT::i32);
3098  return;
3099  }
3100  case PPCISD::VADD_SPLAT: {
3101  // This expands into one of three sequences, depending on whether
3102  // the first operand is odd or even, positive or negative.
3103  assert(isa<ConstantSDNode>(N->getOperand(0)) &&
3104  isa<ConstantSDNode>(N->getOperand(1)) &&
3105  "Invalid operand on VADD_SPLAT!");
3106 
3107  int Elt = N->getConstantOperandVal(0);
3108  int EltSize = N->getConstantOperandVal(1);
3109  unsigned Opc1, Opc2, Opc3;
3110  EVT VT;
3111 
3112  if (EltSize == 1) {
3113  Opc1 = PPC::VSPLTISB;
3114  Opc2 = PPC::VADDUBM;
3115  Opc3 = PPC::VSUBUBM;
3116  VT = MVT::v16i8;
3117  } else if (EltSize == 2) {
3118  Opc1 = PPC::VSPLTISH;
3119  Opc2 = PPC::VADDUHM;
3120  Opc3 = PPC::VSUBUHM;
3121  VT = MVT::v8i16;
3122  } else {
3123  assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
3124  Opc1 = PPC::VSPLTISW;
3125  Opc2 = PPC::VADDUWM;
3126  Opc3 = PPC::VSUBUWM;
3127  VT = MVT::v4i32;
3128  }
3129 
3130  if ((Elt & 1) == 0) {
3131  // Elt is even, in the range [-32,-18] + [16,30].
3132  //
3133  // Convert: VADD_SPLAT elt, size
3134  // Into: tmp = VSPLTIS[BHW] elt
3135  // VADDU[BHW]M tmp, tmp
3136  // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
3137  SDValue EltVal = getI32Imm(Elt >> 1, dl);
3138  SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3139  SDValue TmpVal = SDValue(Tmp, 0);
3140  ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
3141  return;
3142 
3143  } else if (Elt > 0) {
3144  // Elt is odd and positive, in the range [17,31].
3145  //
3146  // Convert: VADD_SPLAT elt, size
3147  // Into: tmp1 = VSPLTIS[BHW] elt-16
3148  // tmp2 = VSPLTIS[BHW] -16
3149  // VSUBU[BHW]M tmp1, tmp2
3150  SDValue EltVal = getI32Imm(Elt - 16, dl);
3151  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3152  EltVal = getI32Imm(-16, dl);
3153  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3154  ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
3155  SDValue(Tmp2, 0)));
3156  return;
3157 
3158  } else {
3159  // Elt is odd and negative, in the range [-31,-17].
3160  //
3161  // Convert: VADD_SPLAT elt, size
3162  // Into: tmp1 = VSPLTIS[BHW] elt+16
3163  // tmp2 = VSPLTIS[BHW] -16
3164  // VADDU[BHW]M tmp1, tmp2
3165  SDValue EltVal = getI32Imm(Elt + 16, dl);
3166  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3167  EltVal = getI32Imm(-16, dl);
3168  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3169  ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
3170  SDValue(Tmp2, 0)));
3171  return;
3172  }
3173  }
3174  }
3175 
3176  SelectCode(N);
3177 }
3178 
3179 // If the target supports the cmpb instruction, do the idiom recognition here.
3180 // We don't do this as a DAG combine because we don't want to do it as nodes
3181 // are being combined (because we might miss part of the eventual idiom). We
3182 // don't want to do it during instruction selection because we want to reuse
3183 // the logic for lowering the masking operations already part of the
3184 // instruction selector.
3185 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
3186  SDLoc dl(N);
3187 
3188  assert(N->getOpcode() == ISD::OR &&
3189  "Only OR nodes are supported for CMPB");
3190 
3191  SDValue Res;
3192  if (!PPCSubTarget->hasCMPB())
3193  return Res;
3194 
3195  if (N->getValueType(0) != MVT::i32 &&
3196  N->getValueType(0) != MVT::i64)
3197  return Res;
3198 
3199  EVT VT = N->getValueType(0);
3200 
3201  SDValue RHS, LHS;
3202  bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
3203  uint64_t Mask = 0, Alt = 0;
3204 
3205  auto IsByteSelectCC = [this](SDValue O, unsigned &b,
3206  uint64_t &Mask, uint64_t &Alt,
3207  SDValue &LHS, SDValue &RHS) {
3208  if (O.getOpcode() != ISD::SELECT_CC)
3209  return false;
3210  ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
3211 
3212  if (!isa<ConstantSDNode>(O.getOperand(2)) ||
3213  !isa<ConstantSDNode>(O.getOperand(3)))
3214  return false;
3215 
3216  uint64_t PM = O.getConstantOperandVal(2);
3217  uint64_t PAlt = O.getConstantOperandVal(3);
3218  for (b = 0; b < 8; ++b) {
3219  uint64_t Mask = UINT64_C(0xFF) << (8*b);
3220  if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
3221  break;
3222  }
3223 
3224  if (b == 8)
3225  return false;
3226  Mask |= PM;
3227  Alt |= PAlt;
3228 
3229  if (!isa<ConstantSDNode>(O.getOperand(1)) ||
3230  O.getConstantOperandVal(1) != 0) {
3231  SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
3232  if (Op0.getOpcode() == ISD::TRUNCATE)
3233  Op0 = Op0.getOperand(0);
3234  if (Op1.getOpcode() == ISD::TRUNCATE)
3235  Op1 = Op1.getOperand(0);
3236 
3237  if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
3238  Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
3239  isa<ConstantSDNode>(Op0.getOperand(1))) {
3240 
3241  unsigned Bits = Op0.getValueSizeInBits();
3242  if (b != Bits/8-1)
3243  return false;
3244  if (Op0.getConstantOperandVal(1) != Bits-8)
3245  return false;
3246 
3247  LHS = Op0.getOperand(0);
3248  RHS = Op1.getOperand(0);
3249  return true;
3250  }
3251 
3252  // When we have small integers (i16 to be specific), the form present
3253  // post-legalization uses SETULT in the SELECT_CC for the
3254  // higher-order byte, depending on the fact that the
3255  // even-higher-order bytes are known to all be zero, for example:
3256  // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
3257  // (so when the second byte is the same, because all higher-order
3258  // bits from bytes 3 and 4 are known to be zero, the result of the
3259  // xor can be at most 255)
3260  if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
3261  isa<ConstantSDNode>(O.getOperand(1))) {
3262 
3263  uint64_t ULim = O.getConstantOperandVal(1);
3264  if (ULim != (UINT64_C(1) << b*8))
3265  return false;
3266 
3267  // Now we need to make sure that the upper bytes are known to be
3268  // zero.
3269  unsigned Bits = Op0.getValueSizeInBits();
3270  if (!CurDAG->MaskedValueIsZero(
3271  Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
3272  return false;
3273 
3274  LHS = Op0.getOperand(0);
3275  RHS = Op0.getOperand(1);
3276  return true;
3277  }
3278 
3279  return false;
3280  }
3281 
3282  if (CC != ISD::SETEQ)
3283  return false;
3284 
3285  SDValue Op = O.getOperand(0);
3286  if (Op.getOpcode() == ISD::AND) {
3287  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3288  return false;
3289  if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
3290  return false;
3291 
3292  SDValue XOR = Op.getOperand(0);
3293  if (XOR.getOpcode() == ISD::TRUNCATE)
3294  XOR = XOR.getOperand(0);
3295  if (XOR.getOpcode() != ISD::XOR)
3296  return false;
3297 
3298  LHS = XOR.getOperand(0);
3299  RHS = XOR.getOperand(1);
3300  return true;
3301  } else if (Op.getOpcode() == ISD::SRL) {
3302  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3303  return false;
3304  unsigned Bits = Op.getValueSizeInBits();
3305  if (b != Bits/8-1)
3306  return false;
3307  if (Op.getConstantOperandVal(1) != Bits-8)
3308  return false;
3309 
3310  SDValue XOR = Op.getOperand(0);
3311  if (XOR.getOpcode() == ISD::TRUNCATE)
3312  XOR = XOR.getOperand(0);
3313  if (XOR.getOpcode() != ISD::XOR)
3314  return false;
3315 
3316  LHS = XOR.getOperand(0);
3317  RHS = XOR.getOperand(1);
3318  return true;
3319  }
3320 
3321  return false;
3322  };
3323 
3325  while (!Queue.empty()) {
3326  SDValue V = Queue.pop_back_val();
3327 
3328  for (const SDValue &O : V.getNode()->ops()) {
3329  unsigned b;
3330  uint64_t M = 0, A = 0;
3331  SDValue OLHS, ORHS;
3332  if (O.getOpcode() == ISD::OR) {
3333  Queue.push_back(O);
3334  } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
3335  if (!LHS) {
3336  LHS = OLHS;
3337  RHS = ORHS;
3338  BytesFound[b] = true;
3339  Mask |= M;
3340  Alt |= A;
3341  } else if ((LHS == ORHS && RHS == OLHS) ||
3342  (RHS == ORHS && LHS == OLHS)) {
3343  BytesFound[b] = true;
3344  Mask |= M;
3345  Alt |= A;
3346  } else {
3347  return Res;
3348  }
3349  } else {
3350  return Res;
3351  }
3352  }
3353  }
3354 
3355  unsigned LastB = 0, BCnt = 0;
3356  for (unsigned i = 0; i < 8; ++i)
3357  if (BytesFound[LastB]) {
3358  ++BCnt;
3359  LastB = i;
3360  }
3361 
3362  if (!LastB || BCnt < 2)
3363  return Res;
3364 
3365  // Because we'll be zero-extending the output anyway if don't have a specific
3366  // value for each input byte (via the Mask), we can 'anyext' the inputs.
3367  if (LHS.getValueType() != VT) {
3368  LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
3369  RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
3370  }
3371 
3372  Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
3373 
3374  bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
3375  if (NonTrivialMask && !Alt) {
3376  // Res = Mask & CMPB
3377  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
3378  CurDAG->getConstant(Mask, dl, VT));
3379  } else if (Alt) {
3380  // Res = (CMPB & Mask) | (~CMPB & Alt)
3381  // Which, as suggested here:
3382  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
3383  // can be written as:
3384  // Res = Alt ^ ((Alt ^ Mask) & CMPB)
3385  // useful because the (Alt ^ Mask) can be pre-computed.
3386  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
3387  CurDAG->getConstant(Mask ^ Alt, dl, VT));
3388  Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
3389  CurDAG->getConstant(Alt, dl, VT));
3390  }
3391 
3392  return Res;
3393 }
3394 
3395 // When CR bit registers are enabled, an extension of an i1 variable to a i32
3396 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
3397 // involves constant materialization of a 0 or a 1 or both. If the result of
3398 // the extension is then operated upon by some operator that can be constant
3399 // folded with a constant 0 or 1, and that constant can be materialized using
3400 // only one instruction (like a zero or one), then we should fold in those
3401 // operations with the select.
3402 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
3403  if (!PPCSubTarget->useCRBits())
3404  return;
3405 
3406  if (N->getOpcode() != ISD::ZERO_EXTEND &&
3407  N->getOpcode() != ISD::SIGN_EXTEND &&
3408  N->getOpcode() != ISD::ANY_EXTEND)
3409  return;
3410 
3411  if (N->getOperand(0).getValueType() != MVT::i1)
3412  return;
3413 
3414  if (!N->hasOneUse())
3415  return;
3416 
3417  SDLoc dl(N);
3418  EVT VT = N->getValueType(0);
3419  SDValue Cond = N->getOperand(0);
3420  SDValue ConstTrue =
3421  CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
3422  SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
3423 
3424  do {
3425  SDNode *User = *N->use_begin();
3426  if (User->getNumOperands() != 2)
3427  break;
3428 
3429  auto TryFold = [this, N, User, dl](SDValue Val) {
3430  SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
3431  SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
3432  SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
3433 
3434  return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
3435  User->getValueType(0),
3436  O0.getNode(), O1.getNode());
3437  };
3438 
3439  SDValue TrueRes = TryFold(ConstTrue);
3440  if (!TrueRes)
3441  break;
3442  SDValue FalseRes = TryFold(ConstFalse);
3443  if (!FalseRes)
3444  break;
3445 
3446  // For us to materialize these using one instruction, we must be able to
3447  // represent them as signed 16-bit integers.
3448  uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
3449  False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
3450  if (!isInt<16>(True) || !isInt<16>(False))
3451  break;
3452 
3453  // We can replace User with a new SELECT node, and try again to see if we
3454  // can fold the select with its user.
3455  Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
3456  N = User;
3457  ConstTrue = TrueRes;
3458  ConstFalse = FalseRes;
3459  } while (N->hasOneUse());
3460 }
3461 
3462 void PPCDAGToDAGISel::PreprocessISelDAG() {
3463  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
3464  ++Position;
3465 
3466  bool MadeChange = false;
3467  while (Position != CurDAG->allnodes_begin()) {
3468  SDNode *N = &*--Position;
3469  if (N->use_empty())
3470  continue;
3471 
3472  SDValue Res;
3473  switch (N->getOpcode()) {
3474  default: break;
3475  case ISD::OR:
3476  Res = combineToCMPB(N);
3477  break;
3478  }
3479 
3480  if (!Res)
3481  foldBoolExts(Res, N);
3482 
3483  if (Res) {
3484  DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
3485  DEBUG(N->dump(CurDAG));
3486  DEBUG(dbgs() << "\nNew: ");
3487  DEBUG(Res.getNode()->dump(CurDAG));
3488  DEBUG(dbgs() << "\n");
3489 
3490  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
3491  MadeChange = true;
3492  }
3493  }
3494 
3495  if (MadeChange)
3496  CurDAG->RemoveDeadNodes();
3497 }
3498 
3499 /// PostprocessISelDAG - Perform some late peephole optimizations
3500 /// on the DAG representation.
3501 void PPCDAGToDAGISel::PostprocessISelDAG() {
3502 
3503  // Skip peepholes at -O0.
3504  if (TM.getOptLevel() == CodeGenOpt::None)
3505  return;
3506 
3507  PeepholePPC64();
3508  PeepholeCROps();
3509  PeepholePPC64ZExt();
3510 }
3511 
3512 // Check if all users of this node will become isel where the second operand
3513 // is the constant zero. If this is so, and if we can negate the condition,
3514 // then we can flip the true and false operands. This will allow the zero to
3515 // be folded with the isel so that we don't need to materialize a register
3516 // containing zero.
3517 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
3518  // If we're not using isel, then this does not matter.
3519  if (!PPCSubTarget->hasISEL())
3520  return false;
3521 
3522  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
3523  UI != UE; ++UI) {
3524  SDNode *User = *UI;
3525  if (!User->isMachineOpcode())
3526  return false;
3527  if (User->getMachineOpcode() != PPC::SELECT_I4 &&
3528  User->getMachineOpcode() != PPC::SELECT_I8)
3529  return false;
3530 
3531  SDNode *Op2 = User->getOperand(2).getNode();
3532  if (!Op2->isMachineOpcode())
3533  return false;
3534 
3535  if (Op2->getMachineOpcode() != PPC::LI &&
3536  Op2->getMachineOpcode() != PPC::LI8)
3537  return false;
3538 
3540  if (!C)
3541  return false;
3542 
3543  if (!C->isNullValue())
3544  return false;
3545  }
3546 
3547  return true;
3548 }
3549 
3550 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
3551  SmallVector<SDNode *, 4> ToReplace;
3552  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
3553  UI != UE; ++UI) {
3554  SDNode *User = *UI;
3555  assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
3556  User->getMachineOpcode() == PPC::SELECT_I8) &&
3557  "Must have all select users");
3558  ToReplace.push_back(User);
3559  }
3560 
3561  for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
3562  UE = ToReplace.end(); UI != UE; ++UI) {
3563  SDNode *User = *UI;
3564  SDNode *ResNode =
3565  CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
3566  User->getValueType(0), User->getOperand(0),
3567  User->getOperand(2),
3568  User->getOperand(1));
3569 
3570  DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
3571  DEBUG(User->dump(CurDAG));
3572  DEBUG(dbgs() << "\nNew: ");
3573  DEBUG(ResNode->dump(CurDAG));
3574  DEBUG(dbgs() << "\n");
3575 
3576  ReplaceUses(User, ResNode);
3577  }
3578 }
3579 
3580 void PPCDAGToDAGISel::PeepholeCROps() {
3581  bool IsModified;
3582  do {
3583  IsModified = false;
3584  for (SDNode &Node : CurDAG->allnodes()) {
3585  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
3586  if (!MachineNode || MachineNode->use_empty())
3587  continue;
3588  SDNode *ResNode = MachineNode;
3589 
3590  bool Op1Set = false, Op1Unset = false,
3591  Op1Not = false,
3592  Op2Set = false, Op2Unset = false,
3593  Op2Not = false;
3594 
3595  unsigned Opcode = MachineNode->getMachineOpcode();
3596  switch (Opcode) {
3597  default: break;
3598  case PPC::CRAND:
3599  case PPC::CRNAND:
3600  case PPC::CROR:
3601  case PPC::CRXOR:
3602  case PPC::CRNOR:
3603  case PPC::CREQV:
3604  case PPC::CRANDC:
3605  case PPC::CRORC: {
3606  SDValue Op = MachineNode->getOperand(1);
3607  if (Op.isMachineOpcode()) {
3608  if (Op.getMachineOpcode() == PPC::CRSET)
3609  Op2Set = true;
3610  else if (Op.getMachineOpcode() == PPC::CRUNSET)
3611  Op2Unset = true;
3612  else if (Op.getMachineOpcode() == PPC::CRNOR &&
3613  Op.getOperand(0) == Op.getOperand(1))
3614  Op2Not = true;
3615  }
3617  }
3618  case PPC::BC:
3619  case PPC::BCn:
3620  case PPC::SELECT_I4:
3621  case PPC::SELECT_I8:
3622  case PPC::SELECT_F4:
3623  case PPC::SELECT_F8:
3624  case PPC::SELECT_QFRC:
3625  case PPC::SELECT_QSRC:
3626  case PPC::SELECT_QBRC:
3627  case PPC::SELECT_VRRC:
3628  case PPC::SELECT_VSFRC:
3629  case PPC::SELECT_VSSRC:
3630  case PPC::SELECT_VSRC: {
3631  SDValue Op = MachineNode->getOperand(0);
3632  if (Op.isMachineOpcode()) {
3633  if (Op.getMachineOpcode() == PPC::CRSET)
3634  Op1Set = true;
3635  else if (Op.getMachineOpcode() == PPC::CRUNSET)
3636  Op1Unset = true;
3637  else if (Op.getMachineOpcode() == PPC::CRNOR &&
3638  Op.getOperand(0) == Op.getOperand(1))
3639  Op1Not = true;
3640  }
3641  }
3642  break;
3643  }
3644 
3645  bool SelectSwap = false;
3646  switch (Opcode) {
3647  default: break;
3648  case PPC::CRAND:
3649  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3650  // x & x = x
3651  ResNode = MachineNode->getOperand(0).getNode();
3652  else if (Op1Set)
3653  // 1 & y = y
3654  ResNode = MachineNode->getOperand(1).getNode();
3655  else if (Op2Set)
3656  // x & 1 = x
3657  ResNode = MachineNode->getOperand(0).getNode();
3658  else if (Op1Unset || Op2Unset)
3659  // x & 0 = 0 & y = 0
3660  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3661  MVT::i1);
3662  else if (Op1Not)
3663  // ~x & y = andc(y, x)
3664  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3665  MVT::i1, MachineNode->getOperand(1),
3666  MachineNode->getOperand(0).
3667  getOperand(0));
3668  else if (Op2Not)
3669  // x & ~y = andc(x, y)
3670  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3671  MVT::i1, MachineNode->getOperand(0),
3672  MachineNode->getOperand(1).
3673  getOperand(0));
3674  else if (AllUsersSelectZero(MachineNode)) {
3675  ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
3676  MVT::i1, MachineNode->getOperand(0),
3677  MachineNode->getOperand(1));
3678  SelectSwap = true;
3679  }
3680  break;
3681  case PPC::CRNAND:
3682  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3683  // nand(x, x) -> nor(x, x)
3684  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3685  MVT::i1, MachineNode->getOperand(0),
3686  MachineNode->getOperand(0));
3687  else if (Op1Set)
3688  // nand(1, y) -> nor(y, y)
3689  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3690  MVT::i1, MachineNode->getOperand(1),
3691  MachineNode->getOperand(1));
3692  else if (Op2Set)
3693  // nand(x, 1) -> nor(x, x)
3694  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3695  MVT::i1, MachineNode->getOperand(0),
3696  MachineNode->getOperand(0));
3697  else if (Op1Unset || Op2Unset)
3698  // nand(x, 0) = nand(0, y) = 1
3699  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3700  MVT::i1);
3701  else if (Op1Not)
3702  // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
3703  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3704  MVT::i1, MachineNode->getOperand(0).
3705  getOperand(0),
3706  MachineNode->getOperand(1));
3707  else if (Op2Not)
3708  // nand(x, ~y) = ~x | y = orc(y, x)
3709  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3710  MVT::i1, MachineNode->getOperand(1).
3711  getOperand(0),
3712  MachineNode->getOperand(0));
3713  else if (AllUsersSelectZero(MachineNode)) {
3714  ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
3715  MVT::i1, MachineNode->getOperand(0),
3716  MachineNode->getOperand(1));
3717  SelectSwap = true;
3718  }
3719  break;
3720  case PPC::CROR:
3721  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3722  // x | x = x
3723  ResNode = MachineNode->getOperand(0).getNode();
3724  else if (Op1Set || Op2Set)
3725  // x | 1 = 1 | y = 1
3726  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3727  MVT::i1);
3728  else if (Op1Unset)
3729  // 0 | y = y
3730  ResNode = MachineNode->getOperand(1).getNode();
3731  else if (Op2Unset)
3732  // x | 0 = x
3733  ResNode = MachineNode->getOperand(0).getNode();
3734  else if (Op1Not)
3735  // ~x | y = orc(y, x)
3736  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3737  MVT::i1, MachineNode->getOperand(1),
3738  MachineNode->getOperand(0).
3739  getOperand(0));
3740  else if (Op2Not)
3741  // x | ~y = orc(x, y)
3742  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3743  MVT::i1, MachineNode->getOperand(0),
3744  MachineNode->getOperand(1).
3745  getOperand(0));
3746  else if (AllUsersSelectZero(MachineNode)) {
3747  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3748  MVT::i1, MachineNode->getOperand(0),
3749  MachineNode->getOperand(1));
3750  SelectSwap = true;
3751  }
3752  break;
3753  case PPC::CRXOR:
3754  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3755  // xor(x, x) = 0
3756  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3757  MVT::i1);
3758  else if (Op1Set)
3759  // xor(1, y) -> nor(y, y)
3760  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3761  MVT::i1, MachineNode->getOperand(1),
3762  MachineNode->getOperand(1));
3763  else if (Op2Set)
3764  // xor(x, 1) -> nor(x, x)
3765  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3766  MVT::i1, MachineNode->getOperand(0),
3767  MachineNode->getOperand(0));
3768  else if (Op1Unset)
3769  // xor(0, y) = y
3770  ResNode = MachineNode->getOperand(1).getNode();
3771  else if (Op2Unset)
3772  // xor(x, 0) = x
3773  ResNode = MachineNode->getOperand(0).getNode();
3774  else if (Op1Not)
3775  // xor(~x, y) = eqv(x, y)
3776  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3777  MVT::i1, MachineNode->getOperand(0).
3778  getOperand(0),
3779  MachineNode->getOperand(1));
3780  else if (Op2Not)
3781  // xor(x, ~y) = eqv(x, y)
3782  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3783  MVT::i1, MachineNode->getOperand(0),
3784  MachineNode->getOperand(1).
3785  getOperand(0));
3786  else if (AllUsersSelectZero(MachineNode)) {
3787  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3788  MVT::i1, MachineNode->getOperand(0),
3789  MachineNode->getOperand(1));
3790  SelectSwap = true;
3791  }
3792  break;
3793  case PPC::CRNOR:
3794  if (Op1Set || Op2Set)
3795  // nor(1, y) -> 0
3796  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3797  MVT::i1);
3798  else if (Op1Unset)
3799  // nor(0, y) = ~y -> nor(y, y)
3800  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3801  MVT::i1, MachineNode->getOperand(1),
3802  MachineNode->getOperand(1));
3803  else if (Op2Unset)
3804  // nor(x, 0) = ~x
3805  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3806  MVT::i1, MachineNode->getOperand(0),
3807  MachineNode->getOperand(0));
3808  else if (Op1Not)
3809  // nor(~x, y) = andc(x, y)
3810  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3811  MVT::i1, MachineNode->getOperand(0).
3812  getOperand(0),
3813  MachineNode->getOperand(1));
3814  else if (Op2Not)
3815  // nor(x, ~y) = andc(y, x)
3816  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3817  MVT::i1, MachineNode->getOperand(1).
3818  getOperand(0),
3819  MachineNode->getOperand(0));
3820  else if (AllUsersSelectZero(MachineNode)) {
3821  ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
3822  MVT::i1, MachineNode->getOperand(0),
3823  MachineNode->getOperand(1));
3824  SelectSwap = true;
3825  }
3826  break;
3827  case PPC::CREQV:
3828  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3829  // eqv(x, x) = 1
3830  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3831  MVT::i1);
3832  else if (Op1Set)
3833  // eqv(1, y) = y
3834  ResNode = MachineNode->getOperand(1).getNode();
3835  else if (Op2Set)
3836  // eqv(x, 1) = x
3837  ResNode = MachineNode->getOperand(0).getNode();
3838  else if (Op1Unset)
3839  // eqv(0, y) = ~y -> nor(y, y)
3840  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3841  MVT::i1, MachineNode->getOperand(1),
3842  MachineNode->getOperand(1));
3843  else if (Op2Unset)
3844  // eqv(x, 0) = ~x
3845  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3846  MVT::i1, MachineNode->getOperand(0),
3847  MachineNode->getOperand(0));
3848  else if (Op1Not)
3849  // eqv(~x, y) = xor(x, y)
3850  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
3851  MVT::i1, MachineNode->getOperand(0).
3852  getOperand(0),
3853  MachineNode->getOperand(1));
3854  else if (Op2Not)
3855  // eqv(x, ~y) = xor(x, y)
3856  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
3857  MVT::i1, MachineNode->getOperand(0),
3858  MachineNode->getOperand(1).
3859  getOperand(0));
3860  else if (AllUsersSelectZero(MachineNode)) {
3861  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
3862  MVT::i1, MachineNode->getOperand(0),
3863  MachineNode->getOperand(1));
3864  SelectSwap = true;
3865  }
3866  break;
3867  case PPC::CRANDC:
3868  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3869  // andc(x, x) = 0
3870  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3871  MVT::i1);
3872  else if (Op1Set)
3873  // andc(1, y) = ~y
3874  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3875  MVT::i1, MachineNode->getOperand(1),
3876  MachineNode->getOperand(1));
3877  else if (Op1Unset || Op2Set)
3878  // andc(0, y) = andc(x, 1) = 0
3879  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3880  MVT::i1);
3881  else if (Op2Unset)
3882  // andc(x, 0) = x
3883  ResNode = MachineNode->getOperand(0).getNode();
3884  else if (Op1Not)
3885  // andc(~x, y) = ~(x | y) = nor(x, y)
3886  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3887  MVT::i1, MachineNode->getOperand(0).
3888  getOperand(0),
3889  MachineNode->getOperand(1));
3890  else if (Op2Not)
3891  // andc(x, ~y) = x & y
3892  ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
3893  MVT::i1, MachineNode->getOperand(0),
3894  MachineNode->getOperand(1).
3895  getOperand(0));
3896  else if (AllUsersSelectZero(MachineNode)) {
3897  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3898  MVT::i1, MachineNode->getOperand(1),
3899  MachineNode->getOperand(0));
3900  SelectSwap = true;
3901  }
3902  break;
3903  case PPC::CRORC:
3904  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3905  // orc(x, x) = 1
3906  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3907  MVT::i1);
3908  else if (Op1Set || Op2Unset)
3909  // orc(1, y) = orc(x, 0) = 1
3910  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3911  MVT::i1);
3912  else if (Op2Set)
3913  // orc(x, 1) = x
3914  ResNode = MachineNode->getOperand(0).getNode();
3915  else if (Op1Unset)
3916  // orc(0, y) = ~y
3917  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3918  MVT::i1, MachineNode->getOperand(1),
3919  MachineNode->getOperand(1));
3920  else if (Op1Not)
3921  // orc(~x, y) = ~(x & y) = nand(x, y)
3922  ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
3923  MVT::i1, MachineNode->getOperand(0).
3924  getOperand(0),
3925  MachineNode->getOperand(1));
3926  else if (Op2Not)
3927  // orc(x, ~y) = x | y
3928  ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
3929  MVT::i1, MachineNode->getOperand(0),
3930  MachineNode->getOperand(1).
3931  getOperand(0));
3932  else if (AllUsersSelectZero(MachineNode)) {
3933  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3934  MVT::i1, MachineNode->getOperand(1),
3935  MachineNode->getOperand(0));
3936  SelectSwap = true;
3937  }
3938  break;
3939  case PPC::SELECT_I4:
3940  case PPC::SELECT_I8:
3941  case PPC::SELECT_F4:
3942  case PPC::SELECT_F8:
3943  case PPC::SELECT_QFRC:
3944  case PPC::SELECT_QSRC:
3945  case PPC::SELECT_QBRC:
3946  case PPC::SELECT_VRRC:
3947  case PPC::SELECT_VSFRC:
3948  case PPC::SELECT_VSSRC:
3949  case PPC::SELECT_VSRC:
3950  if (Op1Set)
3951  ResNode = MachineNode->getOperand(1).getNode();
3952  else if (Op1Unset)
3953  ResNode = MachineNode->getOperand(2).getNode();
3954  else if (Op1Not)
3955  ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
3956  SDLoc(MachineNode),
3957  MachineNode->getValueType(0),
3958  MachineNode->getOperand(0).
3959  getOperand(0),
3960  MachineNode->getOperand(2),
3961  MachineNode->getOperand(1));
3962  break;
3963  case PPC::BC:
3964  case PPC::BCn:
3965  if (Op1Not)
3966  ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
3967  PPC::BC,
3968  SDLoc(MachineNode),
3969  MVT::Other,
3970  MachineNode->getOperand(0).
3971  getOperand(0),
3972  MachineNode->getOperand(1),
3973  MachineNode->getOperand(2));
3974  // FIXME: Handle Op1Set, Op1Unset here too.
3975  break;
3976  }
3977 
3978  // If we're inverting this node because it is used only by selects that
3979  // we'd like to swap, then swap the selects before the node replacement.
3980  if (SelectSwap)
3981  SwapAllSelectUsers(MachineNode);
3982 
3983  if (ResNode != MachineNode) {
3984  DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
3985  DEBUG(MachineNode->dump(CurDAG));
3986  DEBUG(dbgs() << "\nNew: ");
3987  DEBUG(ResNode->dump(CurDAG));
3988  DEBUG(dbgs() << "\n");
3989 
3990  ReplaceUses(MachineNode, ResNode);
3991  IsModified = true;
3992  }
3993  }
3994  if (IsModified)
3995  CurDAG->RemoveDeadNodes();
3996  } while (IsModified);
3997 }
3998 
3999 // Gather the set of 32-bit operations that are known to have their
4000 // higher-order 32 bits zero, where ToPromote contains all such operations.
4002  SmallPtrSetImpl<SDNode *> &ToPromote) {
4003  if (!Op32.isMachineOpcode())
4004  return false;
4005 
4006  // First, check for the "frontier" instructions (those that will clear the
4007  // higher-order 32 bits.
4008 
4009  // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
4010  // around. If it does not, then these instructions will clear the
4011  // higher-order bits.
4012  if ((Op32.getMachineOpcode() == PPC::RLWINM ||
4013  Op32.getMachineOpcode() == PPC::RLWNM) &&
4014  Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
4015  ToPromote.insert(Op32.getNode());
4016  return true;
4017  }
4018 
4019  // SLW and SRW always clear the higher-order bits.
4020  if (Op32.getMachineOpcode() == PPC::SLW ||
4021  Op32.getMachineOpcode() == PPC::SRW) {
4022  ToPromote.insert(Op32.getNode());
4023  return true;
4024  }
4025 
4026  // For LI and LIS, we need the immediate to be positive (so that it is not
4027  // sign extended).
4028  if (Op32.getMachineOpcode() == PPC::LI ||
4029  Op32.getMachineOpcode() == PPC::LIS) {
4030  if (!isUInt<15>(Op32.getConstantOperandVal(0)))
4031  return false;
4032 
4033  ToPromote.insert(Op32.getNode());
4034  return true;
4035  }
4036 
4037  // LHBRX and LWBRX always clear the higher-order bits.
4038  if (Op32.getMachineOpcode() == PPC::LHBRX ||
4039  Op32.getMachineOpcode() == PPC::LWBRX) {
4040  ToPromote.insert(Op32.getNode());
4041  return true;
4042  }
4043 
4044  // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
4045  if (Op32.getMachineOpcode() == PPC::CNTLZW ||
4046  Op32.getMachineOpcode() == PPC::CNTTZW) {
4047  ToPromote.insert(Op32.getNode());
4048  return true;
4049  }
4050 
4051  // Next, check for those instructions we can look through.
4052 
4053  // Assuming the mask does not wrap around, then the higher-order bits are
4054  // taken directly from the first operand.
4055  if (Op32.getMachineOpcode() == PPC::RLWIMI &&
4056  Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
4057  SmallPtrSet<SDNode *, 16> ToPromote1;
4058  if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
4059  return false;
4060 
4061  ToPromote.insert(Op32.getNode());
4062  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4063  return true;
4064  }
4065 
4066  // For OR, the higher-order bits are zero if that is true for both operands.
4067  // For SELECT_I4, the same is true (but the relevant operand numbers are
4068  // shifted by 1).
4069  if (Op32.getMachineOpcode() == PPC::OR ||
4070  Op32.getMachineOpcode() == PPC::SELECT_I4) {
4071  unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
4072  SmallPtrSet<SDNode *, 16> ToPromote1;
4073  if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
4074  return false;
4075  if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
4076  return false;
4077 
4078  ToPromote.insert(Op32.getNode());
4079  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4080  return true;
4081  }
4082 
4083  // For ORI and ORIS, we need the higher-order bits of the first operand to be
4084  // zero, and also for the constant to be positive (so that it is not sign
4085  // extended).
4086  if (Op32.getMachineOpcode() == PPC::ORI ||
4087  Op32.getMachineOpcode() == PPC::ORIS) {
4088  SmallPtrSet<SDNode *, 16> ToPromote1;
4089  if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
4090  return false;
4091  if (!isUInt<15>(Op32.getConstantOperandVal(1)))
4092  return false;
4093 
4094  ToPromote.insert(Op32.getNode());
4095  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4096  return true;
4097  }
4098 
4099  // The higher-order bits of AND are zero if that is true for at least one of
4100  // the operands.
4101  if (Op32.getMachineOpcode() == PPC::AND) {
4102  SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
4103  bool Op0OK =
4104  PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
4105  bool Op1OK =
4106  PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
4107  if (!Op0OK && !Op1OK)
4108  return false;
4109 
4110  ToPromote.insert(Op32.getNode());
4111 
4112  if (Op0OK)
4113  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4114 
4115  if (Op1OK)
4116  ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
4117 
4118  return true;
4119  }
4120 
4121  // For ANDI and ANDIS, the higher-order bits are zero if either that is true
4122  // of the first operand, or if the second operand is positive (so that it is
4123  // not sign extended).
4124  if (Op32.getMachineOpcode() == PPC::ANDIo ||
4125  Op32.getMachineOpcode() == PPC::ANDISo) {
4126  SmallPtrSet<SDNode *, 16> ToPromote1;
4127  bool Op0OK =
4128  PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
4129  bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
4130  if (!Op0OK && !Op1OK)
4131  return false;
4132 
4133  ToPromote.insert(Op32.getNode());
4134 
4135  if (Op0OK)
4136  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4137 
4138  return true;
4139  }
4140 
4141  return false;
4142 }
4143 
4144 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
4145  if (!PPCSubTarget->isPPC64())
4146  return;
4147 
4148  // When we zero-extend from i32 to i64, we use a pattern like this:
4149  // def : Pat<(i64 (zext i32:$in)),
4150  // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
4151  // 0, 32)>;
4152  // There are several 32-bit shift/rotate instructions, however, that will
4153  // clear the higher-order bits of their output, rendering the RLDICL
4154  // unnecessary. When that happens, we remove it here, and redefine the
4155  // relevant 32-bit operation to be a 64-bit operation.
4156 
4157  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
4158  ++Position;
4159 
4160  bool MadeChange = false;
4161  while (Position != CurDAG->allnodes_begin()) {
4162  SDNode *N = &*--Position;
4163  // Skip dead nodes and any non-machine opcodes.
4164  if (N->use_empty() || !N->isMachineOpcode())
4165  continue;
4166 
4167  if (N->getMachineOpcode() != PPC::RLDICL)
4168  continue;
4169 
4170  if (N->getConstantOperandVal(1) != 0 ||
4171  N->getConstantOperandVal(2) != 32)
4172  continue;
4173 
4174  SDValue ISR = N->getOperand(0);
4175  if (!ISR.isMachineOpcode() ||
4176  ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
4177  continue;
4178 
4179  if (!ISR.hasOneUse())
4180  continue;
4181 
4182  if (ISR.getConstantOperandVal(2) != PPC::sub_32)
4183  continue;
4184 
4185  SDValue IDef = ISR.getOperand(0);
4186  if (!IDef.isMachineOpcode() ||
4187  IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
4188  continue;
4189 
4190  // We now know that we're looking at a canonical i32 -> i64 zext. See if we
4191  // can get rid of it.
4192 
4193  SDValue Op32 = ISR->getOperand(1);
4194  if (!Op32.isMachineOpcode())
4195  continue;
4196 
4197  // There are some 32-bit instructions that always clear the high-order 32
4198  // bits, there are also some instructions (like AND) that we can look
4199  // through.
4200  SmallPtrSet<SDNode *, 16> ToPromote;
4201  if (!PeepholePPC64ZExtGather(Op32, ToPromote))
4202  continue;
4203 
4204  // If the ToPromote set contains nodes that have uses outside of the set
4205  // (except for the original INSERT_SUBREG), then abort the transformation.
4206  bool OutsideUse = false;
4207  for (SDNode *PN : ToPromote) {
4208  for (SDNode *UN : PN->uses()) {
4209  if (!ToPromote.count(UN) && UN != ISR.getNode()) {
4210  OutsideUse = true;
4211  break;
4212  }
4213  }
4214 
4215  if (OutsideUse)
4216  break;
4217  }
4218  if (OutsideUse)
4219  continue;
4220 
4221  MadeChange = true;
4222 
4223  // We now know that this zero extension can be removed by promoting to
4224  // nodes in ToPromote to 64-bit operations, where for operations in the
4225  // frontier of the set, we need to insert INSERT_SUBREGs for their
4226  // operands.
4227  for (SDNode *PN : ToPromote) {
4228  unsigned NewOpcode;
4229  switch (PN->getMachineOpcode()) {
4230  default:
4231  llvm_unreachable("Don't know the 64-bit variant of this instruction");
4232  case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
4233  case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
4234  case PPC::SLW: NewOpcode = PPC::SLW8; break;
4235  case PPC::SRW: NewOpcode = PPC::SRW8; break;
4236  case PPC::LI: NewOpcode = PPC::LI8; break;
4237  case PPC::LIS: NewOpcode = PPC::LIS8; break;
4238  case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
4239  case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
4240  case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
4241  case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
4242  case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
4243  case PPC::OR: NewOpcode = PPC::OR8; break;
4244  case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
4245  case PPC::ORI: NewOpcode = PPC::ORI8; break;
4246  case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
4247  case PPC::AND: NewOpcode = PPC::AND8; break;
4248  case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break;
4249  case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break;
4250  }
4251 
4252  // Note: During the replacement process, the nodes will be in an
4253  // inconsistent state (some instructions will have operands with values
4254  // of the wrong type). Once done, however, everything should be right
4255  // again.
4256 
4258  for (const SDValue &V : PN->ops()) {
4259  if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
4260  !isa<ConstantSDNode>(V)) {
4261  SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
4262  SDNode *ReplOp =
4263  CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
4264  ISR.getNode()->getVTList(), ReplOpOps);
4265  Ops.push_back(SDValue(ReplOp, 0));
4266  } else {
4267  Ops.push_back(V);
4268  }
4269  }
4270 
4271  // Because all to-be-promoted nodes only have users that are other
4272  // promoted nodes (or the original INSERT_SUBREG), we can safely replace
4273  // the i32 result value type with i64.
4274 
4275  SmallVector<EVT, 2> NewVTs;
4276  SDVTList VTs = PN->getVTList();
4277  for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
4278  if (VTs.VTs[i] == MVT::i32)
4279  NewVTs.push_back(MVT::i64);
4280  else
4281  NewVTs.push_back(VTs.VTs[i]);
4282 
4283  DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
4284  DEBUG(PN->dump(CurDAG));
4285 
4286  CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
4287 
4288  DEBUG(dbgs() << "\nNew: ");
4289  DEBUG(PN->dump(CurDAG));
4290  DEBUG(dbgs() << "\n");
4291  }
4292 
4293  // Now we replace the original zero extend and its associated INSERT_SUBREG
4294  // with the value feeding the INSERT_SUBREG (which has now been promoted to
4295  // return an i64).
4296 
4297  DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
4298  DEBUG(N->dump(CurDAG));
4299  DEBUG(dbgs() << "\nNew: ");
4300  DEBUG(Op32.getNode()->dump(CurDAG));
4301  DEBUG(dbgs() << "\n");
4302 
4303  ReplaceUses(N, Op32.getNode());
4304  }
4305 
4306  if (MadeChange)
4307  CurDAG->RemoveDeadNodes();
4308 }
4309 
4310 void PPCDAGToDAGISel::PeepholePPC64() {
4311  // These optimizations are currently supported only for 64-bit SVR4.
4312  if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
4313  return;
4314 
4315  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
4316  ++Position;
4317 
4318  while (Position != CurDAG->allnodes_begin()) {
4319  SDNode *N = &*--Position;
4320  // Skip dead nodes and any non-machine opcodes.
4321  if (N->use_empty() || !N->isMachineOpcode())
4322  continue;
4323 
4324  unsigned FirstOp;
4325  unsigned StorageOpcode = N->getMachineOpcode();
4326 
4327  switch (StorageOpcode) {
4328  default: continue;
4329 
4330  case PPC::LBZ:
4331  case PPC::LBZ8:
4332  case PPC::LD:
4333  case PPC::LFD:
4334  case PPC::LFS:
4335  case PPC::LHA:
4336  case PPC::LHA8:
4337  case PPC::LHZ:
4338  case PPC::LHZ8:
4339  case PPC::LWA:
4340  case PPC::LWZ:
4341  case PPC::LWZ8:
4342  FirstOp = 0;
4343  break;
4344 
4345  case PPC::STB:
4346  case PPC::STB8:
4347  case PPC::STD:
4348  case PPC::STFD:
4349  case PPC::STFS:
4350  case PPC::STH:
4351  case PPC::STH8:
4352  case PPC::STW:
4353  case PPC::STW8:
4354  FirstOp = 1;
4355  break;
4356  }
4357 
4358  // If this is a load or store with a zero offset, or within the alignment,
4359  // we may be able to fold an add-immediate into the memory operation.
4360  // The check against alignment is below, as it can't occur until we check
4361  // the arguments to N
4362  if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
4363  continue;
4364 
4365  SDValue Base = N->getOperand(FirstOp + 1);
4366  if (!Base.isMachineOpcode())
4367  continue;
4368 
4369  unsigned Flags = 0;
4370  bool ReplaceFlags = true;
4371 
4372  // When the feeding operation is an add-immediate of some sort,
4373  // determine whether we need to add relocation information to the
4374  // target flags on the immediate operand when we fold it into the
4375  // load instruction.
4376  //
4377  // For something like ADDItocL, the relocation information is
4378  // inferred from the opcode; when we process it in the AsmPrinter,
4379  // we add the necessary relocation there. A load, though, can receive
4380  // relocation from various flavors of ADDIxxx, so we need to carry
4381  // the relocation information in the target flags.
4382  switch (Base.getMachineOpcode()) {
4383  default: continue;
4384 
4385  case PPC::ADDI8:
4386  case PPC::ADDI:
4387  // In some cases (such as TLS) the relocation information
4388  // is already in place on the operand, so copying the operand
4389  // is sufficient.
4390  ReplaceFlags = false;
4391  // For these cases, the immediate may not be divisible by 4, in
4392  // which case the fold is illegal for DS-form instructions. (The
4393  // other cases provide aligned addresses and are always safe.)
4394  if ((StorageOpcode == PPC::LWA ||
4395  StorageOpcode == PPC::LD ||
4396  StorageOpcode == PPC::STD) &&
4397  (!isa<ConstantSDNode>(Base.getOperand(1)) ||
4398  Base.getConstantOperandVal(1) % 4 != 0))
4399  continue;
4400  break;
4401  case PPC::ADDIdtprelL:
4402  Flags = PPCII::MO_DTPREL_LO;
4403  break;
4404  case PPC::ADDItlsldL:
4405  Flags = PPCII::MO_TLSLD_LO;
4406  break;
4407  case PPC::ADDItocL:
4408  Flags = PPCII::MO_TOC_LO;
4409  break;
4410  }
4411 
4412  SDValue ImmOpnd = Base.getOperand(1);
4413 
4414  // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
4415  // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
4416  // we might have needed different @ha relocation values for the offset
4417  // pointers).
4418  int MaxDisplacement = 7;
4419  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
4420  const GlobalValue *GV = GA->getGlobal();
4421  MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
4422  }
4423 
4424  bool UpdateHBase = false;
4425  SDValue HBase = Base.getOperand(0);
4426 
4427  int Offset = N->getConstantOperandVal(FirstOp);
4428  if (ReplaceFlags) {
4429  if (Offset < 0 || Offset > MaxDisplacement) {
4430  // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
4431  // one use, then we can do this for any offset, we just need to also
4432  // update the offset (i.e. the symbol addend) on the addis also.
4433  if (Base.getMachineOpcode() != PPC::ADDItocL)
4434  continue;
4435 
4436  if (!HBase.isMachineOpcode() ||
4437  HBase.getMachineOpcode() != PPC::ADDIStocHA)
4438  continue;
4439 
4440  if (!Base.hasOneUse() || !HBase.hasOneUse())
4441  continue;
4442 
4443  SDValue HImmOpnd = HBase.getOperand(1);
4444  if (HImmOpnd != ImmOpnd)
4445  continue;
4446 
4447  UpdateHBase = true;
4448  }
4449  } else {
4450  // If we're directly folding the addend from an addi instruction, then:
4451  // 1. In general, the offset on the memory access must be zero.
4452  // 2. If the addend is a constant, then it can be combined with a
4453  // non-zero offset, but only if the result meets the encoding
4454  // requirements.
4455  if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
4456  Offset += C->getSExtValue();
4457 
4458  if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD ||
4459  StorageOpcode == PPC::STD) && (Offset % 4) != 0)
4460  continue;
4461 
4462  if (!isInt<16>(Offset))
4463  continue;
4464 
4465  ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
4466  ImmOpnd.getValueType());
4467  } else if (Offset != 0) {
4468  continue;
4469  }
4470  }
4471 
4472  // We found an opportunity. Reverse the operands from the add
4473  // immediate and substitute them into the load or store. If
4474  // needed, update the target flags for the immediate operand to
4475  // reflect the necessary relocation information.
4476  DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
4477  DEBUG(Base->dump(CurDAG));
4478  DEBUG(dbgs() << "\nN: ");
4479  DEBUG(N->dump(CurDAG));
4480  DEBUG(dbgs() << "\n");
4481 
4482  // If the relocation information isn't already present on the
4483  // immediate operand, add it now.
4484  if (ReplaceFlags) {
4485  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
4486  SDLoc dl(GA);
4487  const GlobalValue *GV = GA->getGlobal();
4488  // We can't perform this optimization for data whose alignment
4489  // is insufficient for the instruction encoding.
4490  if (GV->getAlignment() < 4 &&
4491  (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
4492  StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
4493  DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
4494  continue;
4495  }
4496  ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
4497  } else if (ConstantPoolSDNode *CP =
4498  dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
4499  const Constant *C = CP->getConstVal();
4500  ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
4501  CP->getAlignment(),
4502  Offset, Flags);
4503  }
4504  }
4505 
4506  if (FirstOp == 1) // Store
4507  (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
4508  Base.getOperand(0), N->getOperand(3));
4509  else // Load
4510  (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
4511  N->getOperand(2));
4512 
4513  if (UpdateHBase)
4514  (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
4515  ImmOpnd);
4516 
4517  // The add-immediate may now be dead, in which case remove it.
4518  if (Base.getNode()->use_empty())
4519  CurDAG->RemoveDeadNode(Base.getNode());
4520  }
4521 }
4522 
4523 
4524 /// createPPCISelDag - This pass converts a legalized DAG into a
4525 /// PowerPC-specific DAG, ready for instruction scheduling.
4526 ///
4528  return new PPCDAGToDAGISel(TM);
4529 }
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:315
bool use_empty() const
Return true if there are no uses of this node.
SDValue getValue(unsigned R) const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
Definition: MathExtras.h:208
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:572
size_t i
GPRC = address of GLOBAL_OFFSET_TABLE.
bool hasOneUse() const
Return true if there is exactly one use of this node.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:313
static unsigned index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:776
SDVTList getVTList() const
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:83
unsigned getID() const
Return the register class ID number.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:271
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
A debug info location.
Definition: DebugLoc.h:34
const SDValue & getOperand(unsigned Num) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:817
void setNodeId(int Id)
Set unique node id.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
const SDValue & getBasePtr() const
unsigned int NumVTs
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:881
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static SDNode * getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm)
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode * > &ToPromote)
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
struct fuzzer::@269 Flags
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:344
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:452
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
Reg
All possible values of the reg field in the ModR/M byte.
unsigned getMachineOpcode() const
int getMaskElt(unsigned Idx) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:370
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const MachineBasicBlock & front() const
#define F(x, y, z)
Definition: MD5.cpp:51
constexpr bool isMask_64(uint64_t Value)
isMask_64 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:380
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
Function Alias Analysis false
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:125
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:487
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:328
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
TargetInstrInfo - Interface to description of machine instruction set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:518
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
Definition: InstrTypes.h:74
SDNode * getNode() const
get the SDNode which holds the desired result
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isIntS16Immediate(SDNode *N, short &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:52
MVT - Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
const SDValue & getOperand(unsigned i) const
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
Definition: InstrTypes.h:79
This is an important base class in LLVM.
Definition: Constant.h:42
MO_NLP_FLAG - If this bit is set, the symbol reference is actually to the non_lazy_ptr for the global...
Definition: PPC.h:74
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC)
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1952
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
This class provides iterator support for SDUse operands that use a specific SDNode.
uint32_t Offset
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:180
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
unsigned getOpcode() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
iterator begin() const
Definition: SmallPtrSet.h:398
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Common code between 32-bit and 64-bit PowerPC targets.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
EVT - Extended Value Type.
Definition: ValueTypes.h:31
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:378
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
MachineBasicBlock * MBB
MBB - The current block.
const SDValue & getOffset() const
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for ""bit permutations"), cl::Hidden)
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:546
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition: Module.cpp:488
void dump() const
Dump this node, for debugging.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:274
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:361
static SDNode * getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
SDNode * SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isMachineOpcode() const
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
These values identify relocations on immediates folded into memory operations.
Definition: PPC.h:93
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:584
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after execu...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:77
iterator_range< use_iterator > uses()
BranchProbabilityInfo * BPI
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *Cst1, SDNode *Cst2)
int64_t getSExtValue() const
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2...
iterator end() const
Definition: SmallPtrSet.h:405
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
uint64_t getConstantOperandVal(unsigned i) const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
ArrayRef< SDUse > ops() const
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
The CMPB instruction (takes two operands of i32 or i64).
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
static unsigned getInt64Count(int64_t Imm)
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:312
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
const unsigned Kind
unsigned getAlignment() const
Definition: Globals.cpp:72
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side...
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
static uint64_t Rot64(uint64_t Imm, unsigned R)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, const SDValue &DestMBB)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:331
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
#define DEBUG(X)
Definition: Debug.h:100
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
Conversion operators.
Definition: ISDOpcodes.h:397
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:381
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
FunctionPass * createPPCISelDag(PPCTargetMachine &TM)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG, ready for instruction scheduling.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static unsigned getInt64CountDirect(int64_t Imm)
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:321
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:529
BRIND - Indirect branch.
Definition: ISDOpcodes.h:556
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
void resize(size_type N)
Definition: SmallVector.h:352
This class is used to represent ISD::LOAD nodes.