LLVM  6.0.0svn
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a pattern matching instruction selector for PowerPC,
11 // converting from a legalized dag to a PPC dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
17 #include "PPC.h"
18 #include "PPCISelLowering.h"
19 #include "PPCMachineFunctionInfo.h"
20 #include "PPCSubtarget.h"
21 #include "PPCTargetMachine.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/GlobalValue.h"
46 #include "llvm/IR/InlineAsm.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CodeGen.h"
52 #include "llvm/Support/Compiler.h"
53 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/KnownBits.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <limits>
63 #include <memory>
64 #include <new>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "ppc-codegen"
71 
72 // FIXME: Remove this once the bug has been fixed!
73 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
74 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
75 
76 static cl::opt<bool>
77  UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
78  cl::desc("use aggressive ppc isel for bit permutations"),
79  cl::Hidden);
81  "ppc-bit-perm-rewriter-stress-rotates",
82  cl::desc("stress rotate selection in aggressive ppc isel for "
83  "bit permutations"),
84  cl::Hidden);
85 
87  "ppc-use-branch-hint", cl::init(true),
88  cl::desc("Enable static hinting of branches on ppc"),
89  cl::Hidden);
90 
91 namespace {
92 
93  //===--------------------------------------------------------------------===//
94  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
95  /// instructions for SelectionDAG operations.
96  ///
97  class PPCDAGToDAGISel : public SelectionDAGISel {
98  const PPCTargetMachine &TM;
99  const PPCSubtarget *PPCSubTarget;
100  const PPCTargetLowering *PPCLowering;
101  unsigned GlobalBaseReg;
102 
103  public:
104  explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
105  : SelectionDAGISel(tm, OptLevel), TM(tm) {}
106 
107  bool runOnMachineFunction(MachineFunction &MF) override {
108  // Make sure we re-emit a set of the global base reg if necessary
109  GlobalBaseReg = 0;
110  PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
111  PPCLowering = PPCSubTarget->getTargetLowering();
113 
114  if (!PPCSubTarget->isSVR4ABI())
115  InsertVRSaveCode(MF);
116 
117  return true;
118  }
119 
120  void PreprocessISelDAG() override;
121  void PostprocessISelDAG() override;
122 
123  /// getI16Imm - Return a target constant with the specified value, of type
124  /// i16.
125  inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
126  return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
127  }
128 
129  /// getI32Imm - Return a target constant with the specified value, of type
130  /// i32.
131  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
132  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
133  }
134 
135  /// getI64Imm - Return a target constant with the specified value, of type
136  /// i64.
137  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
138  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
139  }
140 
141  /// getSmallIPtrImm - Return a target constant of pointer type.
142  inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
143  return CurDAG->getTargetConstant(
144  Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
145  }
146 
147  /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
148  /// rotate and mask opcode and mask operation.
149  static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
150  unsigned &SH, unsigned &MB, unsigned &ME);
151 
152  /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
153  /// base register. Return the virtual register that holds this value.
154  SDNode *getGlobalBaseReg();
155 
156  void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
157 
158  // Select - Convert the specified operand from a target-independent to a
159  // target-specific node if it hasn't already been changed.
160  void Select(SDNode *N) override;
161 
162  bool tryBitfieldInsert(SDNode *N);
163  bool tryBitPermutation(SDNode *N);
164 
165  /// SelectCC - Select a comparison of the specified values with the
166  /// specified condition code, returning the CR# of the expression.
167  SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
168  const SDLoc &dl);
169 
170  /// SelectAddrImm - Returns true if the address N can be represented by
171  /// a base register plus a signed 16-bit displacement [r+imm].
172  bool SelectAddrImm(SDValue N, SDValue &Disp,
173  SDValue &Base) {
174  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
175  }
176 
177  /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
178  /// immediate field. Note that the operand at this point is already the
179  /// result of a prior SelectAddressRegImm call.
180  bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
181  if (N.getOpcode() == ISD::TargetConstant ||
183  Out = N;
184  return true;
185  }
186 
187  return false;
188  }
189 
190  /// SelectAddrIdx - Given the specified addressed, check to see if it can be
191  /// represented as an indexed [r+r] operation. Returns false if it can
192  /// be represented by [r+imm], which are preferred.
193  bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
194  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
195  }
196 
197  /// SelectAddrIdxOnly - Given the specified addressed, force it to be
198  /// represented as an indexed [r+r] operation.
199  bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
200  return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
201  }
202 
203  /// SelectAddrImmX4 - Returns true if the address N can be represented by
204  /// a base register plus a signed 16-bit displacement that is a multiple of 4.
205  /// Suitable for use by STD and friends.
206  bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
207  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
208  }
209 
210  bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
211  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
212  }
213 
214  // Select an address into a single register.
215  bool SelectAddr(SDValue N, SDValue &Base) {
216  Base = N;
217  return true;
218  }
219 
220  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
221  /// inline asm expressions. It is always correct to compute the value into
222  /// a register. The case of adding a (possibly relocatable) constant to a
223  /// register can be improved, but it is wrong to substitute Reg+Reg for
224  /// Reg in an asm, because the load or store opcode would have to change.
225  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
226  unsigned ConstraintID,
227  std::vector<SDValue> &OutOps) override {
228  switch(ConstraintID) {
229  default:
230  errs() << "ConstraintID: " << ConstraintID << "\n";
231  llvm_unreachable("Unexpected asm memory constraint");
239  // We need to make sure that this one operand does not end up in r0
240  // (because we might end up lowering this as 0(%op)).
241  const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
242  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
243  SDLoc dl(Op);
244  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
245  SDValue NewOp =
246  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
247  dl, Op.getValueType(),
248  Op, RC), 0);
249 
250  OutOps.push_back(NewOp);
251  return false;
252  }
253  return true;
254  }
255 
256  void InsertVRSaveCode(MachineFunction &MF);
257 
258  StringRef getPassName() const override {
259  return "PowerPC DAG->DAG Pattern Instruction Selection";
260  }
261 
262 // Include the pieces autogenerated from the target description.
263 #include "PPCGenDAGISel.inc"
264 
265 private:
266  bool trySETCC(SDNode *N);
267 
268  void PeepholePPC64();
269  void PeepholePPC64ZExt();
270  void PeepholeCROps();
271 
272  SDValue combineToCMPB(SDNode *N);
273  void foldBoolExts(SDValue &Res, SDNode *&N);
274 
275  bool AllUsersSelectZero(SDNode *N);
276  void SwapAllSelectUsers(SDNode *N);
277 
278  bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
279  void transferMemOperands(SDNode *N, SDNode *Result);
280  };
281 
282 } // end anonymous namespace
283 
284 /// InsertVRSaveCode - Once the entire function has been instruction selected,
285 /// all virtual registers are created and all machine instructions are built,
286 /// check to see if we need to save/restore VRSAVE. If so, do it.
287 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
288  // Check to see if this function uses vector registers, which means we have to
289  // save and restore the VRSAVE register and update it with the regs we use.
290  //
291  // In this case, there will be virtual registers of vector type created
292  // by the scheduler. Detect them now.
293  bool HasVectorVReg = false;
294  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
296  if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
297  HasVectorVReg = true;
298  break;
299  }
300  }
301  if (!HasVectorVReg) return; // nothing to do.
302 
303  // If we have a vector register, we want to emit code into the entry and exit
304  // blocks to save and restore the VRSAVE register. We do this here (instead
305  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
306  //
307  // 1. This (trivially) reduces the load on the register allocator, by not
308  // having to represent the live range of the VRSAVE register.
309  // 2. This (more significantly) allows us to create a temporary virtual
310  // register to hold the saved VRSAVE value, allowing this temporary to be
311  // register allocated, instead of forcing it to be spilled to the stack.
312 
313  // Create two vregs - one to hold the VRSAVE register that is live-in to the
314  // function and one for the value after having bits or'd into it.
315  unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
316  unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
317 
318  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
319  MachineBasicBlock &EntryBB = *Fn.begin();
320  DebugLoc dl;
321  // Emit the following code into the entry block:
322  // InVRSAVE = MFVRSAVE
323  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
324  // MTVRSAVE UpdatedVRSAVE
325  MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
326  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
327  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
328  UpdatedVRSAVE).addReg(InVRSAVE);
329  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
330 
331  // Find all return blocks, outputting a restore in each epilog.
332  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
333  if (BB->isReturnBlock()) {
334  IP = BB->end(); --IP;
335 
336  // Skip over all terminator instructions, which are part of the return
337  // sequence.
339  while (I2 != BB->begin() && (--I2)->isTerminator())
340  IP = I2;
341 
342  // Emit: MTVRSAVE InVRSave
343  BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
344  }
345  }
346 }
347 
348 /// getGlobalBaseReg - Output the instructions required to put the
349 /// base address to use for accessing globals into a register.
350 ///
351 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
352  if (!GlobalBaseReg) {
353  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
354  // Insert the set of GlobalBaseReg into the first MBB of the function
355  MachineBasicBlock &FirstMBB = MF->front();
356  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
357  const Module *M = MF->getFunction()->getParent();
358  DebugLoc dl;
359 
360  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
361  if (PPCSubTarget->isTargetELF()) {
362  GlobalBaseReg = PPC::R30;
363  if (M->getPICLevel() == PICLevel::SmallPIC) {
364  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
365  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
366  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
367  } else {
368  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
369  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
370  unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
371  BuildMI(FirstMBB, MBBI, dl,
372  TII.get(PPC::UpdateGBR), GlobalBaseReg)
373  .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
374  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
375  }
376  } else {
377  GlobalBaseReg =
378  RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
379  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
380  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
381  }
382  } else {
383  GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
384  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
385  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
386  }
387  }
388  return CurDAG->getRegister(GlobalBaseReg,
389  PPCLowering->getPointerTy(CurDAG->getDataLayout()))
390  .getNode();
391 }
392 
393 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
394 /// operand. If so Imm will receive the 32-bit value.
395 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
396  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
397  Imm = cast<ConstantSDNode>(N)->getZExtValue();
398  return true;
399  }
400  return false;
401 }
402 
403 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
404 /// operand. If so Imm will receive the 64-bit value.
405 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
406  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
407  Imm = cast<ConstantSDNode>(N)->getZExtValue();
408  return true;
409  }
410  return false;
411 }
412 
413 // isInt32Immediate - This method tests to see if a constant operand.
414 // If so Imm will receive the 32 bit value.
415 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
416  return isInt32Immediate(N.getNode(), Imm);
417 }
418 
419 /// isInt64Immediate - This method tests to see if the value is a 64-bit
420 /// constant operand. If so Imm will receive the 64-bit value.
421 static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
422  return isInt64Immediate(N.getNode(), Imm);
423 }
424 
425 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
426  const SDValue &DestMBB) {
427  assert(isa<BasicBlockSDNode>(DestMBB));
428 
429  if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
430 
431  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
432  const TerminatorInst *BBTerm = BB->getTerminator();
433 
434  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
435 
436  const BasicBlock *TBB = BBTerm->getSuccessor(0);
437  const BasicBlock *FBB = BBTerm->getSuccessor(1);
438 
439  auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
440  auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
441 
442  // We only want to handle cases which are easy to predict at static time, e.g.
443  // C++ throw statement, that is very likely not taken, or calling never
444  // returned function, e.g. stdlib exit(). So we set Threshold to filter
445  // unwanted cases.
446  //
447  // Below is LLVM branch weight table, we only want to handle case 1, 2
448  //
449  // Case Taken:Nontaken Example
450  // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
451  // 2. Invoke-terminating 1:1048575
452  // 3. Coldblock 4:64 __builtin_expect
453  // 4. Loop Branch 124:4 For loop
454  // 5. PH/ZH/FPH 20:12
455  const uint32_t Threshold = 10000;
456 
457  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
458  return PPC::BR_NO_HINT;
459 
460  DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
461  << BB->getName() << "'\n"
462  << " -> " << TBB->getName() << ": " << TProb << "\n"
463  << " -> " << FBB->getName() << ": " << FProb << "\n");
464 
465  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
466 
467  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
468  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
469  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
470  std::swap(TProb, FProb);
471 
472  return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
473 }
474 
475 // isOpcWithIntImmediate - This method tests to see if the node is a specific
476 // opcode and that it has a immediate integer right operand.
477 // If so Imm will receive the 32 bit value.
478 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
479  return N->getOpcode() == Opc
480  && isInt32Immediate(N->getOperand(1).getNode(), Imm);
481 }
482 
483 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
484  SDLoc dl(SN);
485  int FI = cast<FrameIndexSDNode>(N)->getIndex();
486  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
487  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
488  if (SN->hasOneUse())
489  CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
490  getSmallIPtrImm(Offset, dl));
491  else
492  ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
493  getSmallIPtrImm(Offset, dl)));
494 }
495 
496 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
497  bool isShiftMask, unsigned &SH,
498  unsigned &MB, unsigned &ME) {
499  // Don't even go down this path for i64, since different logic will be
500  // necessary for rldicl/rldicr/rldimi.
501  if (N->getValueType(0) != MVT::i32)
502  return false;
503 
504  unsigned Shift = 32;
505  unsigned Indeterminant = ~0; // bit mask marking indeterminant results
506  unsigned Opcode = N->getOpcode();
507  if (N->getNumOperands() != 2 ||
508  !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
509  return false;
510 
511  if (Opcode == ISD::SHL) {
512  // apply shift left to mask if it comes first
513  if (isShiftMask) Mask = Mask << Shift;
514  // determine which bits are made indeterminant by shift
515  Indeterminant = ~(0xFFFFFFFFu << Shift);
516  } else if (Opcode == ISD::SRL) {
517  // apply shift right to mask if it comes first
518  if (isShiftMask) Mask = Mask >> Shift;
519  // determine which bits are made indeterminant by shift
520  Indeterminant = ~(0xFFFFFFFFu >> Shift);
521  // adjust for the left rotate
522  Shift = 32 - Shift;
523  } else if (Opcode == ISD::ROTL) {
524  Indeterminant = 0;
525  } else {
526  return false;
527  }
528 
529  // if the mask doesn't intersect any Indeterminant bits
530  if (Mask && !(Mask & Indeterminant)) {
531  SH = Shift & 31;
532  // make sure the mask is still a mask (wrap arounds may not be)
533  return isRunOfOnes(Mask, MB, ME);
534  }
535  return false;
536 }
537 
538 /// Turn an or of two masked values into the rotate left word immediate then
539 /// mask insert (rlwimi) instruction.
540 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
541  SDValue Op0 = N->getOperand(0);
542  SDValue Op1 = N->getOperand(1);
543  SDLoc dl(N);
544 
545  KnownBits LKnown, RKnown;
546  CurDAG->computeKnownBits(Op0, LKnown);
547  CurDAG->computeKnownBits(Op1, RKnown);
548 
549  unsigned TargetMask = LKnown.Zero.getZExtValue();
550  unsigned InsertMask = RKnown.Zero.getZExtValue();
551 
552  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
553  unsigned Op0Opc = Op0.getOpcode();
554  unsigned Op1Opc = Op1.getOpcode();
555  unsigned Value, SH = 0;
556  TargetMask = ~TargetMask;
557  InsertMask = ~InsertMask;
558 
559  // If the LHS has a foldable shift and the RHS does not, then swap it to the
560  // RHS so that we can fold the shift into the insert.
561  if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
562  if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
563  Op0.getOperand(0).getOpcode() == ISD::SRL) {
564  if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
565  Op1.getOperand(0).getOpcode() != ISD::SRL) {
566  std::swap(Op0, Op1);
567  std::swap(Op0Opc, Op1Opc);
568  std::swap(TargetMask, InsertMask);
569  }
570  }
571  } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
572  if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
573  Op1.getOperand(0).getOpcode() != ISD::SRL) {
574  std::swap(Op0, Op1);
575  std::swap(Op0Opc, Op1Opc);
576  std::swap(TargetMask, InsertMask);
577  }
578  }
579 
580  unsigned MB, ME;
581  if (isRunOfOnes(InsertMask, MB, ME)) {
582  if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
583  isInt32Immediate(Op1.getOperand(1), Value)) {
584  Op1 = Op1.getOperand(0);
585  SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
586  }
587  if (Op1Opc == ISD::AND) {
588  // The AND mask might not be a constant, and we need to make sure that
589  // if we're going to fold the masking with the insert, all bits not
590  // know to be zero in the mask are known to be one.
591  KnownBits MKnown;
592  CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
593  bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
594 
595  unsigned SHOpc = Op1.getOperand(0).getOpcode();
596  if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
597  isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
598  // Note that Value must be in range here (less than 32) because
599  // otherwise there would not be any bits set in InsertMask.
600  Op1 = Op1.getOperand(0).getOperand(0);
601  SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
602  }
603  }
604 
605  SH &= 31;
606  SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
607  getI32Imm(ME, dl) };
608  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
609  return true;
610  }
611  }
612  return false;
613 }
614 
615 // Predict the number of instructions that would be generated by calling
616 // selectI64Imm(N).
617 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
618  // Assume no remaining bits.
619  unsigned Remainder = 0;
620  // Assume no shift required.
621  unsigned Shift = 0;
622 
623  // If it can't be represented as a 32 bit value.
624  if (!isInt<32>(Imm)) {
625  Shift = countTrailingZeros<uint64_t>(Imm);
626  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
627 
628  // If the shifted value fits 32 bits.
629  if (isInt<32>(ImmSh)) {
630  // Go with the shifted value.
631  Imm = ImmSh;
632  } else {
633  // Still stuck with a 64 bit value.
634  Remainder = Imm;
635  Shift = 32;
636  Imm >>= 32;
637  }
638  }
639 
640  // Intermediate operand.
641  unsigned Result = 0;
642 
643  // Handle first 32 bits.
644  unsigned Lo = Imm & 0xFFFF;
645 
646  // Simple value.
647  if (isInt<16>(Imm)) {
648  // Just the Lo bits.
649  ++Result;
650  } else if (Lo) {
651  // Handle the Hi bits and Lo bits.
652  Result += 2;
653  } else {
654  // Just the Hi bits.
655  ++Result;
656  }
657 
658  // If no shift, we're done.
659  if (!Shift) return Result;
660 
661  // If Hi word == Lo word,
662  // we can use rldimi to insert the Lo word into Hi word.
663  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
664  ++Result;
665  return Result;
666  }
667 
668  // Shift for next step if the upper 32-bits were not zero.
669  if (Imm)
670  ++Result;
671 
672  // Add in the last bits as required.
673  if ((Remainder >> 16) & 0xFFFF)
674  ++Result;
675  if (Remainder & 0xFFFF)
676  ++Result;
677 
678  return Result;
679 }
680 
681 static uint64_t Rot64(uint64_t Imm, unsigned R) {
682  return (Imm << R) | (Imm >> (64 - R));
683 }
684 
685 static unsigned selectI64ImmInstrCount(int64_t Imm) {
686  unsigned Count = selectI64ImmInstrCountDirect(Imm);
687 
688  // If the instruction count is 1 or 2, we do not need further analysis
689  // since rotate + load constant requires at least 2 instructions.
690  if (Count <= 2)
691  return Count;
692 
693  for (unsigned r = 1; r < 63; ++r) {
694  uint64_t RImm = Rot64(Imm, r);
695  unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
696  Count = std::min(Count, RCount);
697 
698  // See comments in selectI64Imm for an explanation of the logic below.
699  unsigned LS = findLastSet(RImm);
700  if (LS != r-1)
701  continue;
702 
703  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
704  uint64_t RImmWithOnes = RImm | OnesMask;
705 
706  RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
707  Count = std::min(Count, RCount);
708  }
709 
710  return Count;
711 }
712 
713 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
714 // (above) needs to be kept in sync with this function.
715 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
716  int64_t Imm) {
717  // Assume no remaining bits.
718  unsigned Remainder = 0;
719  // Assume no shift required.
720  unsigned Shift = 0;
721 
722  // If it can't be represented as a 32 bit value.
723  if (!isInt<32>(Imm)) {
724  Shift = countTrailingZeros<uint64_t>(Imm);
725  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
726 
727  // If the shifted value fits 32 bits.
728  if (isInt<32>(ImmSh)) {
729  // Go with the shifted value.
730  Imm = ImmSh;
731  } else {
732  // Still stuck with a 64 bit value.
733  Remainder = Imm;
734  Shift = 32;
735  Imm >>= 32;
736  }
737  }
738 
739  // Intermediate operand.
740  SDNode *Result;
741 
742  // Handle first 32 bits.
743  unsigned Lo = Imm & 0xFFFF;
744  unsigned Hi = (Imm >> 16) & 0xFFFF;
745 
746  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
747  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
748  };
749 
750  // Simple value.
751  if (isInt<16>(Imm)) {
752  // Just the Lo bits.
753  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
754  } else if (Lo) {
755  // Handle the Hi bits.
756  unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
757  Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
758  // And Lo bits.
759  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
760  SDValue(Result, 0), getI32Imm(Lo));
761  } else {
762  // Just the Hi bits.
763  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
764  }
765 
766  // If no shift, we're done.
767  if (!Shift) return Result;
768 
769  // If Hi word == Lo word,
770  // we can use rldimi to insert the Lo word into Hi word.
771  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
772  SDValue Ops[] =
773  { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
774  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
775  }
776 
777  // Shift for next step if the upper 32-bits were not zero.
778  if (Imm) {
779  Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
780  SDValue(Result, 0),
781  getI32Imm(Shift),
782  getI32Imm(63 - Shift));
783  }
784 
785  // Add in the last bits as required.
786  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
787  Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
788  SDValue(Result, 0), getI32Imm(Hi));
789  }
790  if ((Lo = Remainder & 0xFFFF)) {
791  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
792  SDValue(Result, 0), getI32Imm(Lo));
793  }
794 
795  return Result;
796 }
797 
798 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
799  int64_t Imm) {
800  unsigned Count = selectI64ImmInstrCountDirect(Imm);
801 
802  // If the instruction count is 1 or 2, we do not need further analysis
803  // since rotate + load constant requires at least 2 instructions.
804  if (Count <= 2)
805  return selectI64ImmDirect(CurDAG, dl, Imm);
806 
807  unsigned RMin = 0;
808 
809  int64_t MatImm;
810  unsigned MaskEnd;
811 
812  for (unsigned r = 1; r < 63; ++r) {
813  uint64_t RImm = Rot64(Imm, r);
814  unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
815  if (RCount < Count) {
816  Count = RCount;
817  RMin = r;
818  MatImm = RImm;
819  MaskEnd = 63;
820  }
821 
822  // If the immediate to generate has many trailing zeros, it might be
823  // worthwhile to generate a rotated value with too many leading ones
824  // (because that's free with li/lis's sign-extension semantics), and then
825  // mask them off after rotation.
826 
827  unsigned LS = findLastSet(RImm);
828  // We're adding (63-LS) higher-order ones, and we expect to mask them off
829  // after performing the inverse rotation by (64-r). So we need that:
830  // 63-LS == 64-r => LS == r-1
831  if (LS != r-1)
832  continue;
833 
834  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
835  uint64_t RImmWithOnes = RImm | OnesMask;
836 
837  RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
838  if (RCount < Count) {
839  Count = RCount;
840  RMin = r;
841  MatImm = RImmWithOnes;
842  MaskEnd = LS;
843  }
844  }
845 
846  if (!RMin)
847  return selectI64ImmDirect(CurDAG, dl, Imm);
848 
849  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
850  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
851  };
852 
853  SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
854  return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
855  getI32Imm(64 - RMin), getI32Imm(MaskEnd));
856 }
857 
858 // Select a 64-bit constant.
859 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
860  SDLoc dl(N);
861 
862  // Get 64 bit value.
863  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
864  return selectI64Imm(CurDAG, dl, Imm);
865 }
866 
867 namespace {
868 
869 class BitPermutationSelector {
870  struct ValueBit {
871  SDValue V;
872 
873  // The bit number in the value, using a convention where bit 0 is the
874  // lowest-order bit.
875  unsigned Idx;
876 
877  enum Kind {
878  ConstZero,
879  Variable
880  } K;
881 
882  ValueBit(SDValue V, unsigned I, Kind K = Variable)
883  : V(V), Idx(I), K(K) {}
884  ValueBit(Kind K = Variable)
885  : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
886 
887  bool isZero() const {
888  return K == ConstZero;
889  }
890 
891  bool hasValue() const {
892  return K == Variable;
893  }
894 
895  SDValue getValue() const {
896  assert(hasValue() && "Cannot get the value of a constant bit");
897  return V;
898  }
899 
900  unsigned getValueBitIndex() const {
901  assert(hasValue() && "Cannot get the value bit index of a constant bit");
902  return Idx;
903  }
904  };
905 
906  // A bit group has the same underlying value and the same rotate factor.
907  struct BitGroup {
908  SDValue V;
909  unsigned RLAmt;
910  unsigned StartIdx, EndIdx;
911 
912  // This rotation amount assumes that the lower 32 bits of the quantity are
913  // replicated in the high 32 bits by the rotation operator (which is done
914  // by rlwinm and friends in 64-bit mode).
915  bool Repl32;
916  // Did converting to Repl32 == true change the rotation factor? If it did,
917  // it decreased it by 32.
918  bool Repl32CR;
919  // Was this group coalesced after setting Repl32 to true?
920  bool Repl32Coalesced;
921 
922  BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
923  : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
924  Repl32Coalesced(false) {
925  DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
926  " [" << S << ", " << E << "]\n");
927  }
928  };
929 
930  // Information on each (Value, RLAmt) pair (like the number of groups
931  // associated with each) used to choose the lowering method.
932  struct ValueRotInfo {
933  SDValue V;
934  unsigned RLAmt = std::numeric_limits<unsigned>::max();
935  unsigned NumGroups = 0;
936  unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
937  bool Repl32 = false;
938 
939  ValueRotInfo() = default;
940 
941  // For sorting (in reverse order) by NumGroups, and then by
942  // FirstGroupStartIdx.
943  bool operator < (const ValueRotInfo &Other) const {
944  // We need to sort so that the non-Repl32 come first because, when we're
945  // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
946  // masking operation.
947  if (Repl32 < Other.Repl32)
948  return true;
949  else if (Repl32 > Other.Repl32)
950  return false;
951  else if (NumGroups > Other.NumGroups)
952  return true;
953  else if (NumGroups < Other.NumGroups)
954  return false;
955  else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
956  return true;
957  return false;
958  }
959  };
960 
961  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
962  using ValueBitsMemoizer =
964  ValueBitsMemoizer Memoizer;
965 
966  // Return a pair of bool and a SmallVector pointer to a memoization entry.
967  // The bool is true if something interesting was deduced, otherwise if we're
968  // providing only a generic representation of V (or something else likewise
969  // uninteresting for instruction selection) through the SmallVector.
970  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
971  unsigned NumBits) {
972  auto &ValueEntry = Memoizer[V];
973  if (ValueEntry)
974  return std::make_pair(ValueEntry->first, &ValueEntry->second);
975  ValueEntry.reset(new ValueBitsMemoizedValue());
976  bool &Interesting = ValueEntry->first;
977  SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
978  Bits.resize(NumBits);
979 
980  switch (V.getOpcode()) {
981  default: break;
982  case ISD::ROTL:
983  if (isa<ConstantSDNode>(V.getOperand(1))) {
984  unsigned RotAmt = V.getConstantOperandVal(1);
985 
986  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
987 
988  for (unsigned i = 0; i < NumBits; ++i)
989  Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
990 
991  return std::make_pair(Interesting = true, &Bits);
992  }
993  break;
994  case ISD::SHL:
995  if (isa<ConstantSDNode>(V.getOperand(1))) {
996  unsigned ShiftAmt = V.getConstantOperandVal(1);
997 
998  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
999 
1000  for (unsigned i = ShiftAmt; i < NumBits; ++i)
1001  Bits[i] = LHSBits[i - ShiftAmt];
1002 
1003  for (unsigned i = 0; i < ShiftAmt; ++i)
1004  Bits[i] = ValueBit(ValueBit::ConstZero);
1005 
1006  return std::make_pair(Interesting = true, &Bits);
1007  }
1008  break;
1009  case ISD::SRL:
1010  if (isa<ConstantSDNode>(V.getOperand(1))) {
1011  unsigned ShiftAmt = V.getConstantOperandVal(1);
1012 
1013  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1014 
1015  for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1016  Bits[i] = LHSBits[i + ShiftAmt];
1017 
1018  for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1019  Bits[i] = ValueBit(ValueBit::ConstZero);
1020 
1021  return std::make_pair(Interesting = true, &Bits);
1022  }
1023  break;
1024  case ISD::AND:
1025  if (isa<ConstantSDNode>(V.getOperand(1))) {
1026  uint64_t Mask = V.getConstantOperandVal(1);
1027 
1028  const SmallVector<ValueBit, 64> *LHSBits;
1029  // Mark this as interesting, only if the LHS was also interesting. This
1030  // prevents the overall procedure from matching a single immediate 'and'
1031  // (which is non-optimal because such an and might be folded with other
1032  // things if we don't select it here).
1033  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1034 
1035  for (unsigned i = 0; i < NumBits; ++i)
1036  if (((Mask >> i) & 1) == 1)
1037  Bits[i] = (*LHSBits)[i];
1038  else
1039  Bits[i] = ValueBit(ValueBit::ConstZero);
1040 
1041  return std::make_pair(Interesting, &Bits);
1042  }
1043  break;
1044  case ISD::OR: {
1045  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1046  const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1047 
1048  bool AllDisjoint = true;
1049  for (unsigned i = 0; i < NumBits; ++i)
1050  if (LHSBits[i].isZero())
1051  Bits[i] = RHSBits[i];
1052  else if (RHSBits[i].isZero())
1053  Bits[i] = LHSBits[i];
1054  else {
1055  AllDisjoint = false;
1056  break;
1057  }
1058 
1059  if (!AllDisjoint)
1060  break;
1061 
1062  return std::make_pair(Interesting = true, &Bits);
1063  }
1064  case ISD::ZERO_EXTEND: {
1065  // We support only the case with zero extension from i32 to i64 so far.
1066  if (V.getValueType() != MVT::i64 ||
1067  V.getOperand(0).getValueType() != MVT::i32)
1068  break;
1069 
1070  const SmallVector<ValueBit, 64> *LHSBits;
1071  const unsigned NumOperandBits = 32;
1072  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1073  NumOperandBits);
1074 
1075  for (unsigned i = 0; i < NumOperandBits; ++i)
1076  Bits[i] = (*LHSBits)[i];
1077 
1078  for (unsigned i = NumOperandBits; i < NumBits; ++i)
1079  Bits[i] = ValueBit(ValueBit::ConstZero);
1080 
1081  return std::make_pair(Interesting, &Bits);
1082  }
1083  }
1084 
1085  for (unsigned i = 0; i < NumBits; ++i)
1086  Bits[i] = ValueBit(V, i);
1087 
1088  return std::make_pair(Interesting = false, &Bits);
1089  }
1090 
1091  // For each value (except the constant ones), compute the left-rotate amount
1092  // to get it from its original to final position.
1093  void computeRotationAmounts() {
1094  HasZeros = false;
1095  RLAmt.resize(Bits.size());
1096  for (unsigned i = 0; i < Bits.size(); ++i)
1097  if (Bits[i].hasValue()) {
1098  unsigned VBI = Bits[i].getValueBitIndex();
1099  if (i >= VBI)
1100  RLAmt[i] = i - VBI;
1101  else
1102  RLAmt[i] = Bits.size() - (VBI - i);
1103  } else if (Bits[i].isZero()) {
1104  HasZeros = true;
1105  RLAmt[i] = UINT32_MAX;
1106  } else {
1107  llvm_unreachable("Unknown value bit type");
1108  }
1109  }
1110 
1111  // Collect groups of consecutive bits with the same underlying value and
1112  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1113  // they break up groups.
1114  void collectBitGroups(bool LateMask) {
1115  BitGroups.clear();
1116 
1117  unsigned LastRLAmt = RLAmt[0];
1118  SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1119  unsigned LastGroupStartIdx = 0;
1120  for (unsigned i = 1; i < Bits.size(); ++i) {
1121  unsigned ThisRLAmt = RLAmt[i];
1122  SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1123  if (LateMask && !ThisValue) {
1124  ThisValue = LastValue;
1125  ThisRLAmt = LastRLAmt;
1126  // If we're doing late masking, then the first bit group always starts
1127  // at zero (even if the first bits were zero).
1128  if (BitGroups.empty())
1129  LastGroupStartIdx = 0;
1130  }
1131 
1132  // If this bit has the same underlying value and the same rotate factor as
1133  // the last one, then they're part of the same group.
1134  if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1135  continue;
1136 
1137  if (LastValue.getNode())
1138  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1139  i-1));
1140  LastRLAmt = ThisRLAmt;
1141  LastValue = ThisValue;
1142  LastGroupStartIdx = i;
1143  }
1144  if (LastValue.getNode())
1145  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1146  Bits.size()-1));
1147 
1148  if (BitGroups.empty())
1149  return;
1150 
1151  // We might be able to combine the first and last groups.
1152  if (BitGroups.size() > 1) {
1153  // If the first and last groups are the same, then remove the first group
1154  // in favor of the last group, making the ending index of the last group
1155  // equal to the ending index of the to-be-removed first group.
1156  if (BitGroups[0].StartIdx == 0 &&
1157  BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1158  BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1159  BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1160  DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1161  BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1162  BitGroups.erase(BitGroups.begin());
1163  }
1164  }
1165  }
1166 
1167  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1168  // associated with each. If there is a degeneracy, pick the one that occurs
1169  // first (in the final value).
1170  void collectValueRotInfo() {
1171  ValueRots.clear();
1172 
1173  for (auto &BG : BitGroups) {
1174  unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1175  ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1176  VRI.V = BG.V;
1177  VRI.RLAmt = BG.RLAmt;
1178  VRI.Repl32 = BG.Repl32;
1179  VRI.NumGroups += 1;
1180  VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1181  }
1182 
1183  // Now that we've collected the various ValueRotInfo instances, we need to
1184  // sort them.
1185  ValueRotsVec.clear();
1186  for (auto &I : ValueRots) {
1187  ValueRotsVec.push_back(I.second);
1188  }
1189  std::sort(ValueRotsVec.begin(), ValueRotsVec.end());
1190  }
1191 
1192  // In 64-bit mode, rlwinm and friends have a rotation operator that
1193  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1194  // indices of these instructions can only be in the lower 32 bits, so they
1195  // can only represent some 64-bit bit groups. However, when they can be used,
1196  // the 32-bit replication can be used to represent, as a single bit group,
1197  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1198  // groups when possible. Returns true if any of the bit groups were
1199  // converted.
1200  void assignRepl32BitGroups() {
1201  // If we have bits like this:
1202  //
1203  // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1204  // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1205  // Groups: | RLAmt = 8 | RLAmt = 40 |
1206  //
1207  // But, making use of a 32-bit operation that replicates the low-order 32
1208  // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1209  // of 8.
1210 
1211  auto IsAllLow32 = [this](BitGroup & BG) {
1212  if (BG.StartIdx <= BG.EndIdx) {
1213  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1214  if (!Bits[i].hasValue())
1215  continue;
1216  if (Bits[i].getValueBitIndex() >= 32)
1217  return false;
1218  }
1219  } else {
1220  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1221  if (!Bits[i].hasValue())
1222  continue;
1223  if (Bits[i].getValueBitIndex() >= 32)
1224  return false;
1225  }
1226  for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1227  if (!Bits[i].hasValue())
1228  continue;
1229  if (Bits[i].getValueBitIndex() >= 32)
1230  return false;
1231  }
1232  }
1233 
1234  return true;
1235  };
1236 
1237  for (auto &BG : BitGroups) {
1238  if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1239  if (IsAllLow32(BG)) {
1240  if (BG.RLAmt >= 32) {
1241  BG.RLAmt -= 32;
1242  BG.Repl32CR = true;
1243  }
1244 
1245  BG.Repl32 = true;
1246 
1247  DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
1248  BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
1249  " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1250  }
1251  }
1252  }
1253 
1254  // Now walk through the bit groups, consolidating where possible.
1255  for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1256  // We might want to remove this bit group by merging it with the previous
1257  // group (which might be the ending group).
1258  auto IP = (I == BitGroups.begin()) ?
1259  std::prev(BitGroups.end()) : std::prev(I);
1260  if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1261  I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1262 
1263  DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
1264  I->V.getNode() << " RLAmt = " << I->RLAmt <<
1265  " [" << I->StartIdx << ", " << I->EndIdx <<
1266  "] with group with range [" <<
1267  IP->StartIdx << ", " << IP->EndIdx << "]\n");
1268 
1269  IP->EndIdx = I->EndIdx;
1270  IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1271  IP->Repl32Coalesced = true;
1272  I = BitGroups.erase(I);
1273  continue;
1274  } else {
1275  // There is a special case worth handling: If there is a single group
1276  // covering the entire upper 32 bits, and it can be merged with both
1277  // the next and previous groups (which might be the same group), then
1278  // do so. If it is the same group (so there will be only one group in
1279  // total), then we need to reverse the order of the range so that it
1280  // covers the entire 64 bits.
1281  if (I->StartIdx == 32 && I->EndIdx == 63) {
1282  assert(std::next(I) == BitGroups.end() &&
1283  "bit group ends at index 63 but there is another?");
1284  auto IN = BitGroups.begin();
1285 
1286  if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1287  (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1288  IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1289  IsAllLow32(*I)) {
1290 
1291  DEBUG(dbgs() << "\tcombining bit group for " <<
1292  I->V.getNode() << " RLAmt = " << I->RLAmt <<
1293  " [" << I->StartIdx << ", " << I->EndIdx <<
1294  "] with 32-bit replicated groups with ranges [" <<
1295  IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
1296  IN->StartIdx << ", " << IN->EndIdx << "]\n");
1297 
1298  if (IP == IN) {
1299  // There is only one other group; change it to cover the whole
1300  // range (backward, so that it can still be Repl32 but cover the
1301  // whole 64-bit range).
1302  IP->StartIdx = 31;
1303  IP->EndIdx = 30;
1304  IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1305  IP->Repl32Coalesced = true;
1306  I = BitGroups.erase(I);
1307  } else {
1308  // There are two separate groups, one before this group and one
1309  // after us (at the beginning). We're going to remove this group,
1310  // but also the group at the very beginning.
1311  IP->EndIdx = IN->EndIdx;
1312  IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1313  IP->Repl32Coalesced = true;
1314  I = BitGroups.erase(I);
1315  BitGroups.erase(BitGroups.begin());
1316  }
1317 
1318  // This must be the last group in the vector (and we might have
1319  // just invalidated the iterator above), so break here.
1320  break;
1321  }
1322  }
1323  }
1324 
1325  ++I;
1326  }
1327  }
1328 
1329  SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1330  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1331  }
1332 
1333  uint64_t getZerosMask() {
1334  uint64_t Mask = 0;
1335  for (unsigned i = 0; i < Bits.size(); ++i) {
1336  if (Bits[i].hasValue())
1337  continue;
1338  Mask |= (UINT64_C(1) << i);
1339  }
1340 
1341  return ~Mask;
1342  }
1343 
1344  // This method extends an input value to 64 bit if input is 32-bit integer.
1345  // While selecting instructions in BitPermutationSelector in 64-bit mode,
1346  // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1347  // In such case, we extend it to 64 bit to be consistent with other values.
1348  SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1349  if (V.getValueSizeInBits() == 64)
1350  return V;
1351 
1352  assert(V.getValueSizeInBits() == 32);
1353  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1354  SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1355  MVT::i64), 0);
1356  SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1357  MVT::i64, ImDef, V,
1358  SubRegIdx), 0);
1359  return ExtVal;
1360  }
1361 
1362  // Depending on the number of groups for a particular value, it might be
1363  // better to rotate, mask explicitly (using andi/andis), and then or the
1364  // result. Select this part of the result first.
1365  void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1367  return;
1368 
1369  for (ValueRotInfo &VRI : ValueRotsVec) {
1370  unsigned Mask = 0;
1371  for (unsigned i = 0; i < Bits.size(); ++i) {
1372  if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1373  continue;
1374  if (RLAmt[i] != VRI.RLAmt)
1375  continue;
1376  Mask |= (1u << i);
1377  }
1378 
1379  // Compute the masks for andi/andis that would be necessary.
1380  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1381  assert((ANDIMask != 0 || ANDISMask != 0) &&
1382  "No set bits in mask for value bit groups");
1383  bool NeedsRotate = VRI.RLAmt != 0;
1384 
1385  // We're trying to minimize the number of instructions. If we have one
1386  // group, using one of andi/andis can break even. If we have three
1387  // groups, we can use both andi and andis and break even (to use both
1388  // andi and andis we also need to or the results together). We need four
1389  // groups if we also need to rotate. To use andi/andis we need to do more
1390  // than break even because rotate-and-mask instructions tend to be easier
1391  // to schedule.
1392 
1393  // FIXME: We've biased here against using andi/andis, which is right for
1394  // POWER cores, but not optimal everywhere. For example, on the A2,
1395  // andi/andis have single-cycle latency whereas the rotate-and-mask
1396  // instructions take two cycles, and it would be better to bias toward
1397  // andi/andis in break-even cases.
1398 
1399  unsigned NumAndInsts = (unsigned) NeedsRotate +
1400  (unsigned) (ANDIMask != 0) +
1401  (unsigned) (ANDISMask != 0) +
1402  (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1403  (unsigned) (bool) Res;
1404 
1405  DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1406  " RL: " << VRI.RLAmt << ":" <<
1407  "\n\t\t\tisel using masking: " << NumAndInsts <<
1408  " using rotates: " << VRI.NumGroups << "\n");
1409 
1410  if (NumAndInsts >= VRI.NumGroups)
1411  continue;
1412 
1413  DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1414 
1415  if (InstCnt) *InstCnt += NumAndInsts;
1416 
1417  SDValue VRot;
1418  if (VRI.RLAmt) {
1419  SDValue Ops[] =
1420  { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1421  getI32Imm(31, dl) };
1422  VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1423  Ops), 0);
1424  } else {
1425  VRot = VRI.V;
1426  }
1427 
1428  SDValue ANDIVal, ANDISVal;
1429  if (ANDIMask != 0)
1430  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1431  VRot, getI32Imm(ANDIMask, dl)), 0);
1432  if (ANDISMask != 0)
1433  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1434  VRot, getI32Imm(ANDISMask, dl)), 0);
1435 
1436  SDValue TotalVal;
1437  if (!ANDIVal)
1438  TotalVal = ANDISVal;
1439  else if (!ANDISVal)
1440  TotalVal = ANDIVal;
1441  else
1442  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1443  ANDIVal, ANDISVal), 0);
1444 
1445  if (!Res)
1446  Res = TotalVal;
1447  else
1448  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1449  Res, TotalVal), 0);
1450 
1451  // Now, remove all groups with this underlying value and rotation
1452  // factor.
1453  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1454  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1455  });
1456  }
1457  }
1458 
1459  // Instruction selection for the 32-bit case.
1460  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1461  SDLoc dl(N);
1462  SDValue Res;
1463 
1464  if (InstCnt) *InstCnt = 0;
1465 
1466  // Take care of cases that should use andi/andis first.
1467  SelectAndParts32(dl, Res, InstCnt);
1468 
1469  // If we've not yet selected a 'starting' instruction, and we have no zeros
1470  // to fill in, select the (Value, RLAmt) with the highest priority (largest
1471  // number of groups), and start with this rotated value.
1472  if ((!HasZeros || LateMask) && !Res) {
1473  ValueRotInfo &VRI = ValueRotsVec[0];
1474  if (VRI.RLAmt) {
1475  if (InstCnt) *InstCnt += 1;
1476  SDValue Ops[] =
1477  { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1478  getI32Imm(31, dl) };
1479  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1480  0);
1481  } else {
1482  Res = VRI.V;
1483  }
1484 
1485  // Now, remove all groups with this underlying value and rotation factor.
1486  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1487  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1488  });
1489  }
1490 
1491  if (InstCnt) *InstCnt += BitGroups.size();
1492 
1493  // Insert the other groups (one at a time).
1494  for (auto &BG : BitGroups) {
1495  if (!Res) {
1496  SDValue Ops[] =
1497  { BG.V, getI32Imm(BG.RLAmt, dl),
1498  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1499  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1500  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1501  } else {
1502  SDValue Ops[] =
1503  { Res, BG.V, getI32Imm(BG.RLAmt, dl),
1504  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1505  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1506  Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1507  }
1508  }
1509 
1510  if (LateMask) {
1511  unsigned Mask = (unsigned) getZerosMask();
1512 
1513  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1514  assert((ANDIMask != 0 || ANDISMask != 0) &&
1515  "No set bits in zeros mask?");
1516 
1517  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1518  (unsigned) (ANDISMask != 0) +
1519  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1520 
1521  SDValue ANDIVal, ANDISVal;
1522  if (ANDIMask != 0)
1523  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1524  Res, getI32Imm(ANDIMask, dl)), 0);
1525  if (ANDISMask != 0)
1526  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1527  Res, getI32Imm(ANDISMask, dl)), 0);
1528 
1529  if (!ANDIVal)
1530  Res = ANDISVal;
1531  else if (!ANDISVal)
1532  Res = ANDIVal;
1533  else
1534  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1535  ANDIVal, ANDISVal), 0);
1536  }
1537 
1538  return Res.getNode();
1539  }
1540 
1541  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1542  unsigned MaskStart, unsigned MaskEnd,
1543  bool IsIns) {
1544  // In the notation used by the instructions, 'start' and 'end' are reversed
1545  // because bits are counted from high to low order.
1546  unsigned InstMaskStart = 64 - MaskEnd - 1,
1547  InstMaskEnd = 64 - MaskStart - 1;
1548 
1549  if (Repl32)
1550  return 1;
1551 
1552  if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1553  InstMaskEnd == 63 - RLAmt)
1554  return 1;
1555 
1556  return 2;
1557  }
1558 
1559  // For 64-bit values, not all combinations of rotates and masks are
1560  // available. Produce one if it is available.
1561  SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
1562  bool Repl32, unsigned MaskStart, unsigned MaskEnd,
1563  unsigned *InstCnt = nullptr) {
1564  // In the notation used by the instructions, 'start' and 'end' are reversed
1565  // because bits are counted from high to low order.
1566  unsigned InstMaskStart = 64 - MaskEnd - 1,
1567  InstMaskEnd = 64 - MaskStart - 1;
1568 
1569  if (InstCnt) *InstCnt += 1;
1570 
1571  if (Repl32) {
1572  // This rotation amount assumes that the lower 32 bits of the quantity
1573  // are replicated in the high 32 bits by the rotation operator (which is
1574  // done by rlwinm and friends).
1575  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1576  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1577  SDValue Ops[] =
1578  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1579  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1580  return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1581  Ops), 0);
1582  }
1583 
1584  if (InstMaskEnd == 63) {
1585  SDValue Ops[] =
1586  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1587  getI32Imm(InstMaskStart, dl) };
1588  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1589  }
1590 
1591  if (InstMaskStart == 0) {
1592  SDValue Ops[] =
1593  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1594  getI32Imm(InstMaskEnd, dl) };
1595  return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1596  }
1597 
1598  if (InstMaskEnd == 63 - RLAmt) {
1599  SDValue Ops[] =
1600  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1601  getI32Imm(InstMaskStart, dl) };
1602  return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1603  }
1604 
1605  // We cannot do this with a single instruction, so we'll use two. The
1606  // problem is that we're not free to choose both a rotation amount and mask
1607  // start and end independently. We can choose an arbitrary mask start and
1608  // end, but then the rotation amount is fixed. Rotation, however, can be
1609  // inverted, and so by applying an "inverse" rotation first, we can get the
1610  // desired result.
1611  if (InstCnt) *InstCnt += 1;
1612 
1613  // The rotation mask for the second instruction must be MaskStart.
1614  unsigned RLAmt2 = MaskStart;
1615  // The first instruction must rotate V so that the overall rotation amount
1616  // is RLAmt.
1617  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1618  if (RLAmt1)
1619  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1620  return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
1621  }
1622 
1623  // For 64-bit values, not all combinations of rotates and masks are
1624  // available. Produce a rotate-mask-and-insert if one is available.
1625  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
1626  unsigned RLAmt, bool Repl32, unsigned MaskStart,
1627  unsigned MaskEnd, unsigned *InstCnt = nullptr) {
1628  // In the notation used by the instructions, 'start' and 'end' are reversed
1629  // because bits are counted from high to low order.
1630  unsigned InstMaskStart = 64 - MaskEnd - 1,
1631  InstMaskEnd = 64 - MaskStart - 1;
1632 
1633  if (InstCnt) *InstCnt += 1;
1634 
1635  if (Repl32) {
1636  // This rotation amount assumes that the lower 32 bits of the quantity
1637  // are replicated in the high 32 bits by the rotation operator (which is
1638  // done by rlwinm and friends).
1639  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1640  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1641  SDValue Ops[] =
1642  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1643  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1644  return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
1645  Ops), 0);
1646  }
1647 
1648  if (InstMaskEnd == 63 - RLAmt) {
1649  SDValue Ops[] =
1650  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1651  getI32Imm(InstMaskStart, dl) };
1652  return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
1653  }
1654 
1655  // We cannot do this with a single instruction, so we'll use two. The
1656  // problem is that we're not free to choose both a rotation amount and mask
1657  // start and end independently. We can choose an arbitrary mask start and
1658  // end, but then the rotation amount is fixed. Rotation, however, can be
1659  // inverted, and so by applying an "inverse" rotation first, we can get the
1660  // desired result.
1661  if (InstCnt) *InstCnt += 1;
1662 
1663  // The rotation mask for the second instruction must be MaskStart.
1664  unsigned RLAmt2 = MaskStart;
1665  // The first instruction must rotate V so that the overall rotation amount
1666  // is RLAmt.
1667  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1668  if (RLAmt1)
1669  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1670  return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
1671  }
1672 
1673  void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1675  return;
1676 
1677  // The idea here is the same as in the 32-bit version, but with additional
1678  // complications from the fact that Repl32 might be true. Because we
1679  // aggressively convert bit groups to Repl32 form (which, for small
1680  // rotation factors, involves no other change), and then coalesce, it might
1681  // be the case that a single 64-bit masking operation could handle both
1682  // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
1683  // form allowed coalescing, then we must use a 32-bit rotaton in order to
1684  // completely capture the new combined bit group.
1685 
1686  for (ValueRotInfo &VRI : ValueRotsVec) {
1687  uint64_t Mask = 0;
1688 
1689  // We need to add to the mask all bits from the associated bit groups.
1690  // If Repl32 is false, we need to add bits from bit groups that have
1691  // Repl32 true, but are trivially convertable to Repl32 false. Such a
1692  // group is trivially convertable if it overlaps only with the lower 32
1693  // bits, and the group has not been coalesced.
1694  auto MatchingBG = [VRI](const BitGroup &BG) {
1695  if (VRI.V != BG.V)
1696  return false;
1697 
1698  unsigned EffRLAmt = BG.RLAmt;
1699  if (!VRI.Repl32 && BG.Repl32) {
1700  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
1701  !BG.Repl32Coalesced) {
1702  if (BG.Repl32CR)
1703  EffRLAmt += 32;
1704  } else {
1705  return false;
1706  }
1707  } else if (VRI.Repl32 != BG.Repl32) {
1708  return false;
1709  }
1710 
1711  return VRI.RLAmt == EffRLAmt;
1712  };
1713 
1714  for (auto &BG : BitGroups) {
1715  if (!MatchingBG(BG))
1716  continue;
1717 
1718  if (BG.StartIdx <= BG.EndIdx) {
1719  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
1720  Mask |= (UINT64_C(1) << i);
1721  } else {
1722  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
1723  Mask |= (UINT64_C(1) << i);
1724  for (unsigned i = 0; i <= BG.EndIdx; ++i)
1725  Mask |= (UINT64_C(1) << i);
1726  }
1727  }
1728 
1729  // We can use the 32-bit andi/andis technique if the mask does not
1730  // require any higher-order bits. This can save an instruction compared
1731  // to always using the general 64-bit technique.
1732  bool Use32BitInsts = isUInt<32>(Mask);
1733  // Compute the masks for andi/andis that would be necessary.
1734  unsigned ANDIMask = (Mask & UINT16_MAX),
1735  ANDISMask = (Mask >> 16) & UINT16_MAX;
1736 
1737  bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
1738 
1739  unsigned NumAndInsts = (unsigned) NeedsRotate +
1740  (unsigned) (bool) Res;
1741  if (Use32BitInsts)
1742  NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
1743  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1744  else
1745  NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
1746 
1747  unsigned NumRLInsts = 0;
1748  bool FirstBG = true;
1749  bool MoreBG = false;
1750  for (auto &BG : BitGroups) {
1751  if (!MatchingBG(BG)) {
1752  MoreBG = true;
1753  continue;
1754  }
1755  NumRLInsts +=
1756  SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
1757  !FirstBG);
1758  FirstBG = false;
1759  }
1760 
1761  DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1762  " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
1763  "\n\t\t\tisel using masking: " << NumAndInsts <<
1764  " using rotates: " << NumRLInsts << "\n");
1765 
1766  // When we'd use andi/andis, we bias toward using the rotates (andi only
1767  // has a record form, and is cracked on POWER cores). However, when using
1768  // general 64-bit constant formation, bias toward the constant form,
1769  // because that exposes more opportunities for CSE.
1770  if (NumAndInsts > NumRLInsts)
1771  continue;
1772  // When merging multiple bit groups, instruction or is used.
1773  // But when rotate is used, rldimi can inert the rotated value into any
1774  // register, so instruction or can be avoided.
1775  if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
1776  continue;
1777 
1778  DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1779 
1780  if (InstCnt) *InstCnt += NumAndInsts;
1781 
1782  SDValue VRot;
1783  // We actually need to generate a rotation if we have a non-zero rotation
1784  // factor or, in the Repl32 case, if we care about any of the
1785  // higher-order replicated bits. In the latter case, we generate a mask
1786  // backward so that it actually includes the entire 64 bits.
1787  if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
1788  VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1789  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
1790  else
1791  VRot = VRI.V;
1792 
1793  SDValue TotalVal;
1794  if (Use32BitInsts) {
1795  assert((ANDIMask != 0 || ANDISMask != 0) &&
1796  "No set bits in mask when using 32-bit ands for 64-bit value");
1797 
1798  SDValue ANDIVal, ANDISVal;
1799  if (ANDIMask != 0)
1800  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
1801  ExtendToInt64(VRot, dl),
1802  getI32Imm(ANDIMask, dl)),
1803  0);
1804  if (ANDISMask != 0)
1805  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
1806  ExtendToInt64(VRot, dl),
1807  getI32Imm(ANDISMask, dl)),
1808  0);
1809 
1810  if (!ANDIVal)
1811  TotalVal = ANDISVal;
1812  else if (!ANDISVal)
1813  TotalVal = ANDIVal;
1814  else
1815  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1816  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
1817  } else {
1818  TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
1819  TotalVal =
1820  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
1821  ExtendToInt64(VRot, dl), TotalVal),
1822  0);
1823  }
1824 
1825  if (!Res)
1826  Res = TotalVal;
1827  else
1828  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1829  ExtendToInt64(Res, dl), TotalVal),
1830  0);
1831 
1832  // Now, remove all groups with this underlying value and rotation
1833  // factor.
1834  eraseMatchingBitGroups(MatchingBG);
1835  }
1836  }
1837 
1838  // Instruction selection for the 64-bit case.
1839  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
1840  SDLoc dl(N);
1841  SDValue Res;
1842 
1843  if (InstCnt) *InstCnt = 0;
1844 
1845  // Take care of cases that should use andi/andis first.
1846  SelectAndParts64(dl, Res, InstCnt);
1847 
1848  // If we've not yet selected a 'starting' instruction, and we have no zeros
1849  // to fill in, select the (Value, RLAmt) with the highest priority (largest
1850  // number of groups), and start with this rotated value.
1851  if ((!HasZeros || LateMask) && !Res) {
1852  // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
1853  // groups will come first, and so the VRI representing the largest number
1854  // of groups might not be first (it might be the first Repl32 groups).
1855  unsigned MaxGroupsIdx = 0;
1856  if (!ValueRotsVec[0].Repl32) {
1857  for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
1858  if (ValueRotsVec[i].Repl32) {
1859  if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
1860  MaxGroupsIdx = i;
1861  break;
1862  }
1863  }
1864 
1865  ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
1866  bool NeedsRotate = false;
1867  if (VRI.RLAmt) {
1868  NeedsRotate = true;
1869  } else if (VRI.Repl32) {
1870  for (auto &BG : BitGroups) {
1871  if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
1872  BG.Repl32 != VRI.Repl32)
1873  continue;
1874 
1875  // We don't need a rotate if the bit group is confined to the lower
1876  // 32 bits.
1877  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
1878  continue;
1879 
1880  NeedsRotate = true;
1881  break;
1882  }
1883  }
1884 
1885  if (NeedsRotate)
1886  Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1887  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
1888  InstCnt);
1889  else
1890  Res = VRI.V;
1891 
1892  // Now, remove all groups with this underlying value and rotation factor.
1893  if (Res)
1894  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1895  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
1896  BG.Repl32 == VRI.Repl32;
1897  });
1898  }
1899 
1900  // Because 64-bit rotates are more flexible than inserts, we might have a
1901  // preference regarding which one we do first (to save one instruction).
1902  if (!Res)
1903  for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
1904  if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
1905  false) <
1906  SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
1907  true)) {
1908  if (I != BitGroups.begin()) {
1909  BitGroup BG = *I;
1910  BitGroups.erase(I);
1911  BitGroups.insert(BitGroups.begin(), BG);
1912  }
1913 
1914  break;
1915  }
1916  }
1917 
1918  // Insert the other groups (one at a time).
1919  for (auto &BG : BitGroups) {
1920  if (!Res)
1921  Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
1922  BG.EndIdx, InstCnt);
1923  else
1924  Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
1925  BG.StartIdx, BG.EndIdx, InstCnt);
1926  }
1927 
1928  if (LateMask) {
1929  uint64_t Mask = getZerosMask();
1930 
1931  // We can use the 32-bit andi/andis technique if the mask does not
1932  // require any higher-order bits. This can save an instruction compared
1933  // to always using the general 64-bit technique.
1934  bool Use32BitInsts = isUInt<32>(Mask);
1935  // Compute the masks for andi/andis that would be necessary.
1936  unsigned ANDIMask = (Mask & UINT16_MAX),
1937  ANDISMask = (Mask >> 16) & UINT16_MAX;
1938 
1939  if (Use32BitInsts) {
1940  assert((ANDIMask != 0 || ANDISMask != 0) &&
1941  "No set bits in mask when using 32-bit ands for 64-bit value");
1942 
1943  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1944  (unsigned) (ANDISMask != 0) +
1945  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1946 
1947  SDValue ANDIVal, ANDISVal;
1948  if (ANDIMask != 0)
1949  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
1950  ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
1951  if (ANDISMask != 0)
1952  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
1953  ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
1954 
1955  if (!ANDIVal)
1956  Res = ANDISVal;
1957  else if (!ANDISVal)
1958  Res = ANDIVal;
1959  else
1960  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1961  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
1962  } else {
1963  if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
1964 
1965  SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
1966  Res =
1967  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
1968  ExtendToInt64(Res, dl), MaskVal), 0);
1969  }
1970  }
1971 
1972  return Res.getNode();
1973  }
1974 
1975  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
1976  // Fill in BitGroups.
1977  collectBitGroups(LateMask);
1978  if (BitGroups.empty())
1979  return nullptr;
1980 
1981  // For 64-bit values, figure out when we can use 32-bit instructions.
1982  if (Bits.size() == 64)
1983  assignRepl32BitGroups();
1984 
1985  // Fill in ValueRotsVec.
1986  collectValueRotInfo();
1987 
1988  if (Bits.size() == 32) {
1989  return Select32(N, LateMask, InstCnt);
1990  } else {
1991  assert(Bits.size() == 64 && "Not 64 bits here?");
1992  return Select64(N, LateMask, InstCnt);
1993  }
1994 
1995  return nullptr;
1996  }
1997 
1998  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
1999  BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
2000  }
2001 
2003 
2004  bool HasZeros;
2006 
2007  SmallVector<BitGroup, 16> BitGroups;
2008 
2009  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2010  SmallVector<ValueRotInfo, 16> ValueRotsVec;
2011 
2012  SelectionDAG *CurDAG;
2013 
2014 public:
2015  BitPermutationSelector(SelectionDAG *DAG)
2016  : CurDAG(DAG) {}
2017 
2018  // Here we try to match complex bit permutations into a set of
2019  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2020  // known to produce optimial code for common cases (like i32 byte swapping).
2021  SDNode *Select(SDNode *N) {
2022  Memoizer.clear();
2023  auto Result =
2024  getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2025  if (!Result.first)
2026  return nullptr;
2027  Bits = std::move(*Result.second);
2028 
2029  DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2030  " selection for: ");
2031  DEBUG(N->dump(CurDAG));
2032 
2033  // Fill it RLAmt and set HasZeros.
2034  computeRotationAmounts();
2035 
2036  if (!HasZeros)
2037  return Select(N, false);
2038 
2039  // We currently have two techniques for handling results with zeros: early
2040  // masking (the default) and late masking. Late masking is sometimes more
2041  // efficient, but because the structure of the bit groups is different, it
2042  // is hard to tell without generating both and comparing the results. With
2043  // late masking, we ignore zeros in the resulting value when inserting each
2044  // set of bit groups, and then mask in the zeros at the end. With early
2045  // masking, we only insert the non-zero parts of the result at every step.
2046 
2047  unsigned InstCnt, InstCntLateMask;
2048  DEBUG(dbgs() << "\tEarly masking:\n");
2049  SDNode *RN = Select(N, false, &InstCnt);
2050  DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2051 
2052  DEBUG(dbgs() << "\tLate masking:\n");
2053  SDNode *RNLM = Select(N, true, &InstCntLateMask);
2054  DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
2055  " instructions\n");
2056 
2057  if (InstCnt <= InstCntLateMask) {
2058  DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2059  return RN;
2060  }
2061 
2062  DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2063  return RNLM;
2064  }
2065 };
2066 
2067 } // end anonymous namespace
2068 
2069 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
2070  if (N->getValueType(0) != MVT::i32 &&
2071  N->getValueType(0) != MVT::i64)
2072  return false;
2073 
2074  if (!UseBitPermRewriter)
2075  return false;
2076 
2077  switch (N->getOpcode()) {
2078  default: break;
2079  case ISD::ROTL:
2080  case ISD::SHL:
2081  case ISD::SRL:
2082  case ISD::AND:
2083  case ISD::OR: {
2084  BitPermutationSelector BPS(CurDAG);
2085  if (SDNode *New = BPS.Select(N)) {
2086  ReplaceNode(N, New);
2087  return true;
2088  }
2089  return false;
2090  }
2091  }
2092 
2093  return false;
2094 }
2095 
2096 /// SelectCC - Select a comparison of the specified values with the specified
2097 /// condition code, returning the CR# of the expression.
2098 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2099  const SDLoc &dl) {
2100  // Always select the LHS.
2101  unsigned Opc;
2102 
2103  if (LHS.getValueType() == MVT::i32) {
2104  unsigned Imm;
2105  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2106  if (isInt32Immediate(RHS, Imm)) {
2107  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
2108  if (isUInt<16>(Imm))
2109  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
2110  getI32Imm(Imm & 0xFFFF, dl)),
2111  0);
2112  // If this is a 16-bit signed immediate, fold it.
2113  if (isInt<16>((int)Imm))
2114  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
2115  getI32Imm(Imm & 0xFFFF, dl)),
2116  0);
2117 
2118  // For non-equality comparisons, the default code would materialize the
2119  // constant, then compare against it, like this:
2120  // lis r2, 4660
2121  // ori r2, r2, 22136
2122  // cmpw cr0, r3, r2
2123  // Since we are just comparing for equality, we can emit this instead:
2124  // xoris r0,r3,0x1234
2125  // cmplwi cr0,r0,0x5678
2126  // beq cr0,L6
2127  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
2128  getI32Imm(Imm >> 16, dl)), 0);
2129  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
2130  getI32Imm(Imm & 0xFFFF, dl)), 0);
2131  }
2132  Opc = PPC::CMPLW;
2133  } else if (ISD::isUnsignedIntSetCC(CC)) {
2134  if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
2135  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
2136  getI32Imm(Imm & 0xFFFF, dl)), 0);
2137  Opc = PPC::CMPLW;
2138  } else {
2139  int16_t SImm;
2140  if (isIntS16Immediate(RHS, SImm))
2141  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
2142  getI32Imm((int)SImm & 0xFFFF,
2143  dl)),
2144  0);
2145  Opc = PPC::CMPW;
2146  }
2147  } else if (LHS.getValueType() == MVT::i64) {
2148  uint64_t Imm;
2149  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2150  if (isInt64Immediate(RHS.getNode(), Imm)) {
2151  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
2152  if (isUInt<16>(Imm))
2153  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
2154  getI32Imm(Imm & 0xFFFF, dl)),
2155  0);
2156  // If this is a 16-bit signed immediate, fold it.
2157  if (isInt<16>(Imm))
2158  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
2159  getI32Imm(Imm & 0xFFFF, dl)),
2160  0);
2161 
2162  // For non-equality comparisons, the default code would materialize the
2163  // constant, then compare against it, like this:
2164  // lis r2, 4660
2165  // ori r2, r2, 22136
2166  // cmpd cr0, r3, r2
2167  // Since we are just comparing for equality, we can emit this instead:
2168  // xoris r0,r3,0x1234
2169  // cmpldi cr0,r0,0x5678
2170  // beq cr0,L6
2171  if (isUInt<32>(Imm)) {
2172  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
2173  getI64Imm(Imm >> 16, dl)), 0);
2174  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
2175  getI64Imm(Imm & 0xFFFF, dl)),
2176  0);
2177  }
2178  }
2179  Opc = PPC::CMPLD;
2180  } else if (ISD::isUnsignedIntSetCC(CC)) {
2181  if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
2182  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
2183  getI64Imm(Imm & 0xFFFF, dl)), 0);
2184  Opc = PPC::CMPLD;
2185  } else {
2186  int16_t SImm;
2187  if (isIntS16Immediate(RHS, SImm))
2188  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
2189  getI64Imm(SImm & 0xFFFF, dl)),
2190  0);
2191  Opc = PPC::CMPD;
2192  }
2193  } else if (LHS.getValueType() == MVT::f32) {
2194  Opc = PPC::FCMPUS;
2195  } else {
2196  assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
2197  Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
2198  }
2199  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
2200 }
2201 
2203  switch (CC) {
2204  case ISD::SETUEQ:
2205  case ISD::SETONE:
2206  case ISD::SETOLE:
2207  case ISD::SETOGE:
2208  llvm_unreachable("Should be lowered by legalize!");
2209  default: llvm_unreachable("Unknown condition!");
2210  case ISD::SETOEQ:
2211  case ISD::SETEQ: return PPC::PRED_EQ;
2212  case ISD::SETUNE:
2213  case ISD::SETNE: return PPC::PRED_NE;
2214  case ISD::SETOLT:
2215  case ISD::SETLT: return PPC::PRED_LT;
2216  case ISD::SETULE:
2217  case ISD::SETLE: return PPC::PRED_LE;
2218  case ISD::SETOGT:
2219  case ISD::SETGT: return PPC::PRED_GT;
2220  case ISD::SETUGE:
2221  case ISD::SETGE: return PPC::PRED_GE;
2222  case ISD::SETO: return PPC::PRED_NU;
2223  case ISD::SETUO: return PPC::PRED_UN;
2224  // These two are invalid for floating point. Assume we have int.
2225  case ISD::SETULT: return PPC::PRED_LT;
2226  case ISD::SETUGT: return PPC::PRED_GT;
2227  }
2228 }
2229 
2230 /// getCRIdxForSetCC - Return the index of the condition register field
2231 /// associated with the SetCC condition, and whether or not the field is
2232 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
2233 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
2234  Invert = false;
2235  switch (CC) {
2236  default: llvm_unreachable("Unknown condition!");
2237  case ISD::SETOLT:
2238  case ISD::SETLT: return 0; // Bit #0 = SETOLT
2239  case ISD::SETOGT:
2240  case ISD::SETGT: return 1; // Bit #1 = SETOGT
2241  case ISD::SETOEQ:
2242  case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
2243  case ISD::SETUO: return 3; // Bit #3 = SETUO
2244  case ISD::SETUGE:
2245  case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
2246  case ISD::SETULE:
2247  case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
2248  case ISD::SETUNE:
2249  case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
2250  case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
2251  case ISD::SETUEQ:
2252  case ISD::SETOGE:
2253  case ISD::SETOLE:
2254  case ISD::SETONE:
2255  llvm_unreachable("Invalid branch code: should be expanded by legalize");
2256  // These are invalid for floating point. Assume integer.
2257  case ISD::SETULT: return 0;
2258  case ISD::SETUGT: return 1;
2259  }
2260 }
2261 
2262 // getVCmpInst: return the vector compare instruction for the specified
2263 // vector type and condition code. Since this is for altivec specific code,
2264 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
2265 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
2266  bool HasVSX, bool &Swap, bool &Negate) {
2267  Swap = false;
2268  Negate = false;
2269 
2270  if (VecVT.isFloatingPoint()) {
2271  /* Handle some cases by swapping input operands. */
2272  switch (CC) {
2273  case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
2274  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
2275  case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
2276  case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
2277  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
2278  case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
2279  default: break;
2280  }
2281  /* Handle some cases by negating the result. */
2282  switch (CC) {
2283  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
2284  case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
2285  case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
2286  case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
2287  default: break;
2288  }
2289  /* We have instructions implementing the remaining cases. */
2290  switch (CC) {
2291  case ISD::SETEQ:
2292  case ISD::SETOEQ:
2293  if (VecVT == MVT::v4f32)
2294  return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
2295  else if (VecVT == MVT::v2f64)
2296  return PPC::XVCMPEQDP;
2297  break;
2298  case ISD::SETGT:
2299  case ISD::SETOGT:
2300  if (VecVT == MVT::v4f32)
2301  return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
2302  else if (VecVT == MVT::v2f64)
2303  return PPC::XVCMPGTDP;
2304  break;
2305  case ISD::SETGE:
2306  case ISD::SETOGE:
2307  if (VecVT == MVT::v4f32)
2308  return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
2309  else if (VecVT == MVT::v2f64)
2310  return PPC::XVCMPGEDP;
2311  break;
2312  default:
2313  break;
2314  }
2315  llvm_unreachable("Invalid floating-point vector compare condition");
2316  } else {
2317  /* Handle some cases by swapping input operands. */
2318  switch (CC) {
2319  case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
2320  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
2321  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
2322  case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
2323  default: break;
2324  }
2325  /* Handle some cases by negating the result. */
2326  switch (CC) {
2327  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
2328  case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
2329  case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
2330  case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
2331  default: break;
2332  }
2333  /* We have instructions implementing the remaining cases. */
2334  switch (CC) {
2335  case ISD::SETEQ:
2336  case ISD::SETUEQ:
2337  if (VecVT == MVT::v16i8)
2338  return PPC::VCMPEQUB;
2339  else if (VecVT == MVT::v8i16)
2340  return PPC::VCMPEQUH;
2341  else if (VecVT == MVT::v4i32)
2342  return PPC::VCMPEQUW;
2343  else if (VecVT == MVT::v2i64)
2344  return PPC::VCMPEQUD;
2345  break;
2346  case ISD::SETGT:
2347  if (VecVT == MVT::v16i8)
2348  return PPC::VCMPGTSB;
2349  else if (VecVT == MVT::v8i16)
2350  return PPC::VCMPGTSH;
2351  else if (VecVT == MVT::v4i32)
2352  return PPC::VCMPGTSW;
2353  else if (VecVT == MVT::v2i64)
2354  return PPC::VCMPGTSD;
2355  break;
2356  case ISD::SETUGT:
2357  if (VecVT == MVT::v16i8)
2358  return PPC::VCMPGTUB;
2359  else if (VecVT == MVT::v8i16)
2360  return PPC::VCMPGTUH;
2361  else if (VecVT == MVT::v4i32)
2362  return PPC::VCMPGTUW;
2363  else if (VecVT == MVT::v2i64)
2364  return PPC::VCMPGTUD;
2365  break;
2366  default:
2367  break;
2368  }
2369  llvm_unreachable("Invalid integer vector compare condition");
2370  }
2371 }
2372 
2373 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
2374  SDLoc dl(N);
2375  unsigned Imm;
2376  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2377  EVT PtrVT =
2378  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
2379  bool isPPC64 = (PtrVT == MVT::i64);
2380 
2381  if (!PPCSubTarget->useCRBits() &&
2382  isInt32Immediate(N->getOperand(1), Imm)) {
2383  // We can codegen setcc op, imm very efficiently compared to a brcond.
2384  // Check for those cases here.
2385  // setcc op, 0
2386  if (Imm == 0) {
2387  SDValue Op = N->getOperand(0);
2388  switch (CC) {
2389  default: break;
2390  case ISD::SETEQ: {
2391  Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
2392  SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
2393  getI32Imm(31, dl) };
2394  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2395  return true;
2396  }
2397  case ISD::SETNE: {
2398  if (isPPC64) break;
2399  SDValue AD =
2400  SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2401  Op, getI32Imm(~0U, dl)), 0);
2402  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
2403  return true;
2404  }
2405  case ISD::SETLT: {
2406  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
2407  getI32Imm(31, dl) };
2408  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2409  return true;
2410  }
2411  case ISD::SETGT: {
2412  SDValue T =
2413  SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
2414  T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
2415  SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
2416  getI32Imm(31, dl) };
2417  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2418  return true;
2419  }
2420  }
2421  } else if (Imm == ~0U) { // setcc op, -1
2422  SDValue Op = N->getOperand(0);
2423  switch (CC) {
2424  default: break;
2425  case ISD::SETEQ:
2426  if (isPPC64) break;
2427  Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2428  Op, getI32Imm(1, dl)), 0);
2429  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
2430  SDValue(CurDAG->getMachineNode(PPC::LI, dl,
2431  MVT::i32,
2432  getI32Imm(0, dl)),
2433  0), Op.getValue(1));
2434  return true;
2435  case ISD::SETNE: {
2436  if (isPPC64) break;
2437  Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
2438  SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2439  Op, getI32Imm(~0U, dl));
2440  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
2441  SDValue(AD, 1));
2442  return true;
2443  }
2444  case ISD::SETLT: {
2445  SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
2446  getI32Imm(1, dl)), 0);
2447  SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
2448  Op), 0);
2449  SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
2450  getI32Imm(31, dl) };
2451  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2452  return true;
2453  }
2454  case ISD::SETGT: {
2455  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
2456  getI32Imm(31, dl) };
2457  Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2458  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
2459  return true;
2460  }
2461  }
2462  }
2463  }
2464 
2465  SDValue LHS = N->getOperand(0);
2466  SDValue RHS = N->getOperand(1);
2467 
2468  // Altivec Vector compare instructions do not set any CR register by default and
2469  // vector compare operations return the same type as the operands.
2470  if (LHS.getValueType().isVector()) {
2471  if (PPCSubTarget->hasQPX())
2472  return false;
2473 
2474  EVT VecVT = LHS.getValueType();
2475  bool Swap, Negate;
2476  unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
2477  PPCSubTarget->hasVSX(), Swap, Negate);
2478  if (Swap)
2479  std::swap(LHS, RHS);
2480 
2481  EVT ResVT = VecVT.changeVectorElementTypeToInteger();
2482  if (Negate) {
2483  SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
2484  CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
2485  ResVT, VCmp, VCmp);
2486  return true;
2487  }
2488 
2489  CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
2490  return true;
2491  }
2492 
2493  if (PPCSubTarget->useCRBits())
2494  return false;
2495 
2496  bool Inv;
2497  unsigned Idx = getCRIdxForSetCC(CC, Inv);
2498  SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
2499  SDValue IntCR;
2500 
2501  // Force the ccreg into CR7.
2502  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
2503 
2504  SDValue InFlag(nullptr, 0); // Null incoming flag value.
2505  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
2506  InFlag).getValue(1);
2507 
2508  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
2509  CCReg), 0);
2510 
2511  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
2512  getI32Imm(31, dl), getI32Imm(31, dl) };
2513  if (!Inv) {
2514  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2515  return true;
2516  }
2517 
2518  // Get the specified bit.
2519  SDValue Tmp =
2520  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2521  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
2522  return true;
2523 }
2524 
2525 /// Does this node represent a load/store node whose address can be represented
2526 /// with a register plus an immediate that's a multiple of \p Val:
2527 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
2528  LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
2529  StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
2530  SDValue AddrOp;
2531  if (LDN)
2532  AddrOp = LDN->getOperand(1);
2533  else if (STN)
2534  AddrOp = STN->getOperand(2);
2535 
2536  short Imm = 0;
2537  if (AddrOp.getOpcode() == ISD::ADD) {
2538  // If op0 is a frame index that is under aligned, we can't do it either,
2539  // because it is translated to r31 or r1 + slot + offset. We won't know the
2540  // slot number until the stack frame is finalized.
2541  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddrOp.getOperand(0))) {
2542  const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
2543  unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
2544  if ((SlotAlign % Val) != 0)
2545  return false;
2546  }
2547  return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
2548  }
2549 
2550  // If the address comes from the outside, the offset will be zero.
2551  return AddrOp.getOpcode() == ISD::CopyFromReg;
2552 }
2553 
2554 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
2555  // Transfer memoperands.
2557  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2558  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
2559 }
2560 
2561 // Select - Convert the specified operand from a target-independent to a
2562 // target-specific node if it hasn't already been changed.
2564  SDLoc dl(N);
2565  if (N->isMachineOpcode()) {
2566  N->setNodeId(-1);
2567  return; // Already selected.
2568  }
2569 
2570  // In case any misguided DAG-level optimizations form an ADD with a
2571  // TargetConstant operand, crash here instead of miscompiling (by selecting
2572  // an r+r add instead of some kind of r+i add).
2573  if (N->getOpcode() == ISD::ADD &&
2575  llvm_unreachable("Invalid ADD with TargetConstant operand");
2576 
2577  // Try matching complex bit permutations before doing anything else.
2578  if (tryBitPermutation(N))
2579  return;
2580 
2581  switch (N->getOpcode()) {
2582  default: break;
2583 
2584  case ISD::Constant:
2585  if (N->getValueType(0) == MVT::i64) {
2586  ReplaceNode(N, selectI64Imm(CurDAG, N));
2587  return;
2588  }
2589  break;
2590 
2591  case ISD::SETCC:
2592  if (trySETCC(N))
2593  return;
2594  break;
2595 
2596  case PPCISD::GlobalBaseReg:
2597  ReplaceNode(N, getGlobalBaseReg());
2598  return;
2599 
2600  case ISD::FrameIndex:
2601  selectFrameIndex(N, N);
2602  return;
2603 
2604  case PPCISD::MFOCRF: {
2605  SDValue InFlag = N->getOperand(1);
2606  ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
2607  N->getOperand(0), InFlag));
2608  return;
2609  }
2610 
2612  ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
2613  MVT::Other, N->getOperand(0)));
2614  return;
2615 
2616  case PPCISD::SRA_ADDZE: {
2617  SDValue N0 = N->getOperand(0);
2618  SDValue ShiftAmt =
2619  CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
2620  getConstantIntValue(), dl,
2621  N->getValueType(0));
2622  if (N->getValueType(0) == MVT::i64) {
2623  SDNode *Op =
2624  CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
2625  N0, ShiftAmt);
2626  CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
2627  SDValue(Op, 1));
2628  return;
2629  } else {
2630  assert(N->getValueType(0) == MVT::i32 &&
2631  "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
2632  SDNode *Op =
2633  CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
2634  N0, ShiftAmt);
2635  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
2636  SDValue(Op, 1));
2637  return;
2638  }
2639  }
2640 
2641  case ISD::LOAD: {
2642  // Handle preincrement loads.
2643  LoadSDNode *LD = cast<LoadSDNode>(N);
2644  EVT LoadedVT = LD->getMemoryVT();
2645 
2646  // Normal loads are handled by code generated from the .td file.
2647  if (LD->getAddressingMode() != ISD::PRE_INC)
2648  break;
2649 
2650  SDValue Offset = LD->getOffset();
2651  if (Offset.getOpcode() == ISD::TargetConstant ||
2652  Offset.getOpcode() == ISD::TargetGlobalAddress) {
2653 
2654  unsigned Opcode;
2655  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
2656  if (LD->getValueType(0) != MVT::i64) {
2657  // Handle PPC32 integer and normal FP loads.
2658  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2659  switch (LoadedVT.getSimpleVT().SimpleTy) {
2660  default: llvm_unreachable("Invalid PPC load type!");
2661  case MVT::f64: Opcode = PPC::LFDU; break;
2662  case MVT::f32: Opcode = PPC::LFSU; break;
2663  case MVT::i32: Opcode = PPC::LWZU; break;
2664  case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
2665  case MVT::i1:
2666  case MVT::i8: Opcode = PPC::LBZU; break;
2667  }
2668  } else {
2669  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
2670  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2671  switch (LoadedVT.getSimpleVT().SimpleTy) {
2672  default: llvm_unreachable("Invalid PPC load type!");
2673  case MVT::i64: Opcode = PPC::LDU; break;
2674  case MVT::i32: Opcode = PPC::LWZU8; break;
2675  case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
2676  case MVT::i1:
2677  case MVT::i8: Opcode = PPC::LBZU8; break;
2678  }
2679  }
2680 
2681  SDValue Chain = LD->getChain();
2682  SDValue Base = LD->getBasePtr();
2683  SDValue Ops[] = { Offset, Base, Chain };
2684  SDNode *MN = CurDAG->getMachineNode(
2685  Opcode, dl, LD->getValueType(0),
2686  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
2687  transferMemOperands(N, MN);
2688  ReplaceNode(N, MN);
2689  return;
2690  } else {
2691  unsigned Opcode;
2692  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
2693  if (LD->getValueType(0) != MVT::i64) {
2694  // Handle PPC32 integer and normal FP loads.
2695  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2696  switch (LoadedVT.getSimpleVT().SimpleTy) {
2697  default: llvm_unreachable("Invalid PPC load type!");
2698  case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
2699  case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
2700  case MVT::f64: Opcode = PPC::LFDUX; break;
2701  case MVT::f32: Opcode = PPC::LFSUX; break;
2702  case MVT::i32: Opcode = PPC::LWZUX; break;
2703  case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
2704  case MVT::i1:
2705  case MVT::i8: Opcode = PPC::LBZUX; break;
2706  }
2707  } else {
2708  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
2709  assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
2710  "Invalid sext update load");
2711  switch (LoadedVT.getSimpleVT().SimpleTy) {
2712  default: llvm_unreachable("Invalid PPC load type!");
2713  case MVT::i64: Opcode = PPC::LDUX; break;
2714  case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
2715  case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
2716  case MVT::i1:
2717  case MVT::i8: Opcode = PPC::LBZUX8; break;
2718  }
2719  }
2720 
2721  SDValue Chain = LD->getChain();
2722  SDValue Base = LD->getBasePtr();
2723  SDValue Ops[] = { Base, Offset, Chain };
2724  SDNode *MN = CurDAG->getMachineNode(
2725  Opcode, dl, LD->getValueType(0),
2726  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
2727  transferMemOperands(N, MN);
2728  ReplaceNode(N, MN);
2729  return;
2730  }
2731  }
2732 
2733  case ISD::AND: {
2734  unsigned Imm, Imm2, SH, MB, ME;
2735  uint64_t Imm64;
2736 
2737  // If this is an and of a value rotated between 0 and 31 bits and then and'd
2738  // with a mask, emit rlwinm
2739  if (isInt32Immediate(N->getOperand(1), Imm) &&
2740  isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
2741  SDValue Val = N->getOperand(0).getOperand(0);
2742  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
2743  getI32Imm(ME, dl) };
2744  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2745  return;
2746  }
2747  // If this is just a masked value where the input is not handled above, and
2748  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
2749  if (isInt32Immediate(N->getOperand(1), Imm) &&
2750  isRunOfOnes(Imm, MB, ME) &&
2751  N->getOperand(0).getOpcode() != ISD::ROTL) {
2752  SDValue Val = N->getOperand(0);
2753  SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
2754  getI32Imm(ME, dl) };
2755  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2756  return;
2757  }
2758  // If this is a 64-bit zero-extension mask, emit rldicl.
2759  if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
2760  isMask_64(Imm64)) {
2761  SDValue Val = N->getOperand(0);
2762  MB = 64 - countTrailingOnes(Imm64);
2763  SH = 0;
2764 
2765  if (Val.getOpcode() == ISD::ANY_EXTEND) {
2766  auto Op0 = Val.getOperand(0);
2767  if ( Op0.getOpcode() == ISD::SRL &&
2768  isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
2769 
2770  auto ResultType = Val.getNode()->getValueType(0);
2771  auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2772  ResultType);
2773  SDValue IDVal (ImDef, 0);
2774 
2775  Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2776  ResultType, IDVal, Op0.getOperand(0),
2777  getI32Imm(1, dl)), 0);
2778  SH = 64 - Imm;
2779  }
2780  }
2781 
2782  // If the operand is a logical right shift, we can fold it into this
2783  // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
2784  // for n <= mb. The right shift is really a left rotate followed by a
2785  // mask, and this mask is a more-restrictive sub-mask of the mask implied
2786  // by the shift.
2787  if (Val.getOpcode() == ISD::SRL &&
2788  isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
2789  assert(Imm < 64 && "Illegal shift amount");
2790  Val = Val.getOperand(0);
2791  SH = 64 - Imm;
2792  }
2793 
2794  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
2795  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
2796  return;
2797  }
2798  // If this is a negated 64-bit zero-extension mask,
2799  // i.e. the immediate is a sequence of ones from most significant side
2800  // and all zero for reminder, we should use rldicr.
2801  if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
2802  isMask_64(~Imm64)) {
2803  SDValue Val = N->getOperand(0);
2804  MB = 63 - countTrailingOnes(~Imm64);
2805  SH = 0;
2806  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
2807  CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
2808  return;
2809  }
2810 
2811  // AND X, 0 -> 0, not "rlwinm 32".
2812  if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
2813  ReplaceUses(SDValue(N, 0), N->getOperand(1));
2814  return;
2815  }
2816  // ISD::OR doesn't get all the bitfield insertion fun.
2817  // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
2818  // bitfield insert.
2819  if (isInt32Immediate(N->getOperand(1), Imm) &&
2820  N->getOperand(0).getOpcode() == ISD::OR &&
2821  isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
2822  // The idea here is to check whether this is equivalent to:
2823  // (c1 & m) | (x & ~m)
2824  // where m is a run-of-ones mask. The logic here is that, for each bit in
2825  // c1 and c2:
2826  // - if both are 1, then the output will be 1.
2827  // - if both are 0, then the output will be 0.
2828  // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
2829  // come from x.
2830  // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
2831  // be 0.
2832  // If that last condition is never the case, then we can form m from the
2833  // bits that are the same between c1 and c2.
2834  unsigned MB, ME;
2835  if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
2836  SDValue Ops[] = { N->getOperand(0).getOperand(0),
2837  N->getOperand(0).getOperand(1),
2838  getI32Imm(0, dl), getI32Imm(MB, dl),
2839  getI32Imm(ME, dl) };
2840  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
2841  return;
2842  }
2843  }
2844 
2845  // Other cases are autogenerated.
2846  break;
2847  }
2848  case ISD::OR: {
2849  if (N->getValueType(0) == MVT::i32)
2850  if (tryBitfieldInsert(N))
2851  return;
2852 
2853  int16_t Imm;
2854  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
2855  isIntS16Immediate(N->getOperand(1), Imm)) {
2856  KnownBits LHSKnown;
2857  CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
2858 
2859  // If this is equivalent to an add, then we can fold it with the
2860  // FrameIndex calculation.
2861  if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
2862  selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
2863  return;
2864  }
2865  }
2866 
2867  // OR with a 32-bit immediate can be handled by ori + oris
2868  // without creating an immediate in a GPR.
2869  uint64_t Imm64 = 0;
2870  bool IsPPC64 = PPCSubTarget->isPPC64();
2871  if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
2872  (Imm64 & ~0xFFFFFFFFuLL) == 0) {
2873  // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
2874  uint64_t ImmHi = Imm64 >> 16;
2875  uint64_t ImmLo = Imm64 & 0xFFFF;
2876  if (ImmHi != 0 && ImmLo != 0) {
2877  SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
2878  N->getOperand(0),
2879  getI16Imm(ImmLo, dl));
2880  SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
2881  CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
2882  return;
2883  }
2884  }
2885 
2886  // Other cases are autogenerated.
2887  break;
2888  }
2889  case ISD::XOR: {
2890  // XOR with a 32-bit immediate can be handled by xori + xoris
2891  // without creating an immediate in a GPR.
2892  uint64_t Imm64 = 0;
2893  bool IsPPC64 = PPCSubTarget->isPPC64();
2894  if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
2895  (Imm64 & ~0xFFFFFFFFuLL) == 0) {
2896  // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
2897  uint64_t ImmHi = Imm64 >> 16;
2898  uint64_t ImmLo = Imm64 & 0xFFFF;
2899  if (ImmHi != 0 && ImmLo != 0) {
2900  SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
2901  N->getOperand(0),
2902  getI16Imm(ImmLo, dl));
2903  SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
2904  CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
2905  return;
2906  }
2907  }
2908 
2909  break;
2910  }
2911  case ISD::ADD: {
2912  int16_t Imm;
2913  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
2914  isIntS16Immediate(N->getOperand(1), Imm)) {
2915  selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
2916  return;
2917  }
2918 
2919  break;
2920  }
2921  case ISD::SHL: {
2922  unsigned Imm, SH, MB, ME;
2923  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
2924  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
2925  SDValue Ops[] = { N->getOperand(0).getOperand(0),
2926  getI32Imm(SH, dl), getI32Imm(MB, dl),
2927  getI32Imm(ME, dl) };
2928  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2929  return;
2930  }
2931 
2932  // Other cases are autogenerated.
2933  break;
2934  }
2935  case ISD::SRL: {
2936  unsigned Imm, SH, MB, ME;
2937  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
2938  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
2939  SDValue Ops[] = { N->getOperand(0).getOperand(0),
2940  getI32Imm(SH, dl), getI32Imm(MB, dl),
2941  getI32Imm(ME, dl) };
2942  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2943  return;
2944  }
2945 
2946  // Other cases are autogenerated.
2947  break;
2948  }
2949  // FIXME: Remove this once the ANDI glue bug is fixed:
2951  case PPCISD::ANDIo_1_GT_BIT: {
2952  if (!ANDIGlueBug)
2953  break;
2954 
2955  EVT InVT = N->getOperand(0).getValueType();
2956  assert((InVT == MVT::i64 || InVT == MVT::i32) &&
2957  "Invalid input type for ANDIo_1_EQ_BIT");
2958 
2959  unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
2960  SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
2961  N->getOperand(0),
2962  CurDAG->getTargetConstant(1, dl, InVT)),
2963  0);
2964  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2965  SDValue SRIdxVal =
2966  CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
2967  PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
2968 
2969  CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
2970  SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
2971  return;
2972  }
2973  case ISD::SELECT_CC: {
2974  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2975  EVT PtrVT =
2976  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
2977  bool isPPC64 = (PtrVT == MVT::i64);
2978 
2979  // If this is a select of i1 operands, we'll pattern match it.
2980  if (PPCSubTarget->useCRBits() &&
2981  N->getOperand(0).getValueType() == MVT::i1)
2982  break;
2983 
2984  // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
2985  if (!isPPC64)
2986  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2987  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2988  if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
2989  if (N1C->isNullValue() && N3C->isNullValue() &&
2990  N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
2991  // FIXME: Implement this optzn for PPC64.
2992  N->getValueType(0) == MVT::i32) {
2993  SDNode *Tmp =
2994  CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2995  N->getOperand(0), getI32Imm(~0U, dl));
2996  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
2997  N->getOperand(0), SDValue(Tmp, 1));
2998  return;
2999  }
3000 
3001  SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
3002 
3003  if (N->getValueType(0) == MVT::i1) {
3004  // An i1 select is: (c & t) | (!c & f).
3005  bool Inv;
3006  unsigned Idx = getCRIdxForSetCC(CC, Inv);
3007 
3008  unsigned SRI;
3009  switch (Idx) {
3010  default: llvm_unreachable("Invalid CC index");
3011  case 0: SRI = PPC::sub_lt; break;
3012  case 1: SRI = PPC::sub_gt; break;
3013  case 2: SRI = PPC::sub_eq; break;
3014  case 3: SRI = PPC::sub_un; break;
3015  }
3016 
3017  SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
3018 
3019  SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
3020  CCBit, CCBit), 0);
3021  SDValue C = Inv ? NotCCBit : CCBit,
3022  NotC = Inv ? CCBit : NotCCBit;
3023 
3024  SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
3025  C, N->getOperand(2)), 0);
3026  SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
3027  NotC, N->getOperand(3)), 0);
3028 
3029  CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
3030  return;
3031  }
3032 
3033  unsigned BROpc = getPredicateForSetCC(CC);
3034 
3035  unsigned SelectCCOp;
3036  if (N->getValueType(0) == MVT::i32)
3037  SelectCCOp = PPC::SELECT_CC_I4;
3038  else if (N->getValueType(0) == MVT::i64)
3039  SelectCCOp = PPC::SELECT_CC_I8;
3040  else if (N->getValueType(0) == MVT::f32)
3041  if (PPCSubTarget->hasP8Vector())
3042  SelectCCOp = PPC::SELECT_CC_VSSRC;
3043  else
3044  SelectCCOp = PPC::SELECT_CC_F4;
3045  else if (N->getValueType(0) == MVT::f64)
3046  if (PPCSubTarget->hasVSX())
3047  SelectCCOp = PPC::SELECT_CC_VSFRC;
3048  else
3049  SelectCCOp = PPC::SELECT_CC_F8;
3050  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
3051  SelectCCOp = PPC::SELECT_CC_QFRC;
3052  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
3053  SelectCCOp = PPC::SELECT_CC_QSRC;
3054  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
3055  SelectCCOp = PPC::SELECT_CC_QBRC;
3056  else if (N->getValueType(0) == MVT::v2f64 ||
3057  N->getValueType(0) == MVT::v2i64)
3058  SelectCCOp = PPC::SELECT_CC_VSRC;
3059  else
3060  SelectCCOp = PPC::SELECT_CC_VRRC;
3061 
3062  SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
3063  getI32Imm(BROpc, dl) };
3064  CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
3065  return;
3066  }
3067  case ISD::VSELECT:
3068  if (PPCSubTarget->hasVSX()) {
3069  SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
3070  CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
3071  return;
3072  }
3073  break;
3074 
3075  case ISD::VECTOR_SHUFFLE:
3076  if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
3077  N->getValueType(0) == MVT::v2i64)) {
3078  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
3079 
3080  SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
3081  Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
3082  unsigned DM[2];
3083 
3084  for (int i = 0; i < 2; ++i)
3085  if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
3086  DM[i] = 0;
3087  else
3088  DM[i] = 1;
3089 
3090  if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
3091  Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
3092  isa<LoadSDNode>(Op1.getOperand(0))) {
3093  LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
3094  SDValue Base, Offset;
3095 
3096  if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
3097  (LD->getMemoryVT() == MVT::f64 ||
3098  LD->getMemoryVT() == MVT::i64) &&
3099  SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
3100  SDValue Chain = LD->getChain();
3101  SDValue Ops[] = { Base, Offset, Chain };
3103  MemOp[0] = LD->getMemOperand();
3104  SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
3105  N->getValueType(0), Ops);
3106  cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
3107  return;
3108  }
3109  }
3110 
3111  // For little endian, we must swap the input operands and adjust
3112  // the mask elements (reverse and invert them).
3113  if (PPCSubTarget->isLittleEndian()) {
3114  std::swap(Op1, Op2);
3115  unsigned tmp = DM[0];
3116  DM[0] = 1 - DM[1];
3117  DM[1] = 1 - tmp;
3118  }
3119 
3120  SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
3121  MVT::i32);
3122  SDValue Ops[] = { Op1, Op2, DMV };
3123  CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
3124  return;
3125  }
3126 
3127  break;
3128  case PPCISD::BDNZ:
3129  case PPCISD::BDZ: {
3130  bool IsPPC64 = PPCSubTarget->isPPC64();
3131  SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
3132  CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
3133  ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
3134  : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
3135  MVT::Other, Ops);
3136  return;
3137  }
3138  case PPCISD::COND_BRANCH: {
3139  // Op #0 is the Chain.
3140  // Op #1 is the PPC::PRED_* number.
3141  // Op #2 is the CR#
3142  // Op #3 is the Dest MBB
3143  // Op #4 is the Flag.
3144  // Prevent PPC::PRED_* from being selected into LI.
3145  unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3146  if (EnableBranchHint)
3147  PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
3148 
3149  SDValue Pred = getI32Imm(PCC, dl);
3150  SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
3151  N->getOperand(0), N->getOperand(4) };
3152  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
3153  return;
3154  }
3155  case ISD::BR_CC: {
3156  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
3157  unsigned PCC = getPredicateForSetCC(CC);
3158 
3159  if (N->getOperand(2).getValueType() == MVT::i1) {
3160  unsigned Opc;
3161  bool Swap;
3162  switch (PCC) {
3163  default: llvm_unreachable("Unexpected Boolean-operand predicate");
3164  case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
3165  case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
3166  case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
3167  case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
3168  case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
3169  case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
3170  }
3171 
3172  SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
3173  N->getOperand(Swap ? 3 : 2),
3174  N->getOperand(Swap ? 2 : 3)), 0);
3175  CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
3176  N->getOperand(0));
3177  return;
3178  }
3179 
3180  if (EnableBranchHint)
3181  PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
3182 
3183  SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
3184  SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
3185  N->getOperand(4), N->getOperand(0) };
3186  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
3187  return;
3188  }
3189  case ISD::BRIND: {
3190  // FIXME: Should custom lower this.
3191  SDValue Chain = N->getOperand(0);
3192  SDValue Target = N->getOperand(1);
3193  unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
3194  unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
3195  Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
3196  Chain), 0);
3197  CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
3198  return;
3199  }
3200  case PPCISD::TOC_ENTRY: {
3201  assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
3202  "Only supported for 64-bit ABI and 32-bit SVR4");
3203  if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
3204  SDValue GA = N->getOperand(0);
3205  SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
3206  N->getOperand(1));
3207  transferMemOperands(N, MN);
3208  ReplaceNode(N, MN);
3209  return;
3210  }
3211 
3212  // For medium and large code model, we generate two instructions as
3213  // described below. Otherwise we allow SelectCodeCommon to handle this,
3214  // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
3215  CodeModel::Model CModel = TM.getCodeModel();
3216  if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
3217  break;
3218 
3219  // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
3220  // If it must be toc-referenced according to PPCSubTarget, we generate:
3221  // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
3222  // Otherwise we generate:
3223  // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
3224  SDValue GA = N->getOperand(0);
3225  SDValue TOCbase = N->getOperand(1);
3226  SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
3227  TOCbase, GA);
3228 
3229  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
3230  CModel == CodeModel::Large) {
3231  SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
3232  SDValue(Tmp, 0));
3233  transferMemOperands(N, MN);
3234  ReplaceNode(N, MN);
3235  return;
3236  }
3237 
3238  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
3239  const GlobalValue *GV = G->getGlobal();
3240  unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
3241  if (GVFlags & PPCII::MO_NLP_FLAG) {
3242  SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
3243  SDValue(Tmp, 0));
3244  transferMemOperands(N, MN);
3245  ReplaceNode(N, MN);
3246  return;
3247  }
3248  }
3249 
3250  ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
3251  SDValue(Tmp, 0), GA));
3252  return;
3253  }
3254  case PPCISD::PPC32_PICGOT:
3255  // Generate a PIC-safe GOT reference.
3256  assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
3257  "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
3258  CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
3259  PPCLowering->getPointerTy(CurDAG->getDataLayout()),
3260  MVT::i32);
3261  return;
3262 
3263  case PPCISD::VADD_SPLAT: {
3264  // This expands into one of three sequences, depending on whether
3265  // the first operand is odd or even, positive or negative.
3266  assert(isa<ConstantSDNode>(N->getOperand(0)) &&
3267  isa<ConstantSDNode>(N->getOperand(1)) &&
3268  "Invalid operand on VADD_SPLAT!");
3269 
3270  int Elt = N->getConstantOperandVal(0);
3271  int EltSize = N->getConstantOperandVal(1);
3272  unsigned Opc1, Opc2, Opc3;
3273  EVT VT;
3274 
3275  if (EltSize == 1) {
3276  Opc1 = PPC::VSPLTISB;
3277  Opc2 = PPC::VADDUBM;
3278  Opc3 = PPC::VSUBUBM;
3279  VT = MVT::v16i8;
3280  } else if (EltSize == 2) {
3281  Opc1 = PPC::VSPLTISH;
3282  Opc2 = PPC::VADDUHM;
3283  Opc3 = PPC::VSUBUHM;
3284  VT = MVT::v8i16;
3285  } else {
3286  assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
3287  Opc1 = PPC::VSPLTISW;
3288  Opc2 = PPC::VADDUWM;
3289  Opc3 = PPC::VSUBUWM;
3290  VT = MVT::v4i32;
3291  }
3292 
3293  if ((Elt & 1) == 0) {
3294  // Elt is even, in the range [-32,-18] + [16,30].
3295  //
3296  // Convert: VADD_SPLAT elt, size
3297  // Into: tmp = VSPLTIS[BHW] elt
3298  // VADDU[BHW]M tmp, tmp
3299  // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
3300  SDValue EltVal = getI32Imm(Elt >> 1, dl);
3301  SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3302  SDValue TmpVal = SDValue(Tmp, 0);
3303  ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
3304  return;
3305  } else if (Elt > 0) {
3306  // Elt is odd and positive, in the range [17,31].
3307  //
3308  // Convert: VADD_SPLAT elt, size
3309  // Into: tmp1 = VSPLTIS[BHW] elt-16
3310  // tmp2 = VSPLTIS[BHW] -16
3311  // VSUBU[BHW]M tmp1, tmp2
3312  SDValue EltVal = getI32Imm(Elt - 16, dl);
3313  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3314  EltVal = getI32Imm(-16, dl);
3315  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3316  ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
3317  SDValue(Tmp2, 0)));
3318  return;
3319  } else {
3320  // Elt is odd and negative, in the range [-31,-17].
3321  //
3322  // Convert: VADD_SPLAT elt, size
3323  // Into: tmp1 = VSPLTIS[BHW] elt+16
3324  // tmp2 = VSPLTIS[BHW] -16
3325  // VADDU[BHW]M tmp1, tmp2
3326  SDValue EltVal = getI32Imm(Elt + 16, dl);
3327  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3328  EltVal = getI32Imm(-16, dl);
3329  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3330  ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
3331  SDValue(Tmp2, 0)));
3332  return;
3333  }
3334  }
3335  }
3336 
3337  SelectCode(N);
3338 }
3339 
3340 // If the target supports the cmpb instruction, do the idiom recognition here.
3341 // We don't do this as a DAG combine because we don't want to do it as nodes
3342 // are being combined (because we might miss part of the eventual idiom). We
3343 // don't want to do it during instruction selection because we want to reuse
3344 // the logic for lowering the masking operations already part of the
3345 // instruction selector.
3346 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
3347  SDLoc dl(N);
3348 
3349  assert(N->getOpcode() == ISD::OR &&
3350  "Only OR nodes are supported for CMPB");
3351 
3352  SDValue Res;
3353  if (!PPCSubTarget->hasCMPB())
3354  return Res;
3355 
3356  if (N->getValueType(0) != MVT::i32 &&
3357  N->getValueType(0) != MVT::i64)
3358  return Res;
3359 
3360  EVT VT = N->getValueType(0);
3361 
3362  SDValue RHS, LHS;
3363  bool BytesFound[8] = {false, false, false, false, false, false, false, false};
3364  uint64_t Mask = 0, Alt = 0;
3365 
3366  auto IsByteSelectCC = [this](SDValue O, unsigned &b,
3367  uint64_t &Mask, uint64_t &Alt,
3368  SDValue &LHS, SDValue &RHS) {
3369  if (O.getOpcode() != ISD::SELECT_CC)
3370  return false;
3371  ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
3372 
3373  if (!isa<ConstantSDNode>(O.getOperand(2)) ||
3374  !isa<ConstantSDNode>(O.getOperand(3)))
3375  return false;
3376 
3377  uint64_t PM = O.getConstantOperandVal(2);
3378  uint64_t PAlt = O.getConstantOperandVal(3);
3379  for (b = 0; b < 8; ++b) {
3380  uint64_t Mask = UINT64_C(0xFF) << (8*b);
3381  if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
3382  break;
3383  }
3384 
3385  if (b == 8)
3386  return false;
3387  Mask |= PM;
3388  Alt |= PAlt;
3389 
3390  if (!isa<ConstantSDNode>(O.getOperand(1)) ||
3391  O.getConstantOperandVal(1) != 0) {
3392  SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
3393  if (Op0.getOpcode() == ISD::TRUNCATE)
3394  Op0 = Op0.getOperand(0);
3395  if (Op1.getOpcode() == ISD::TRUNCATE)
3396  Op1 = Op1.getOperand(0);
3397 
3398  if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
3399  Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
3400  isa<ConstantSDNode>(Op0.getOperand(1))) {
3401 
3402  unsigned Bits = Op0.getValueSizeInBits();
3403  if (b != Bits/8-1)
3404  return false;
3405  if (Op0.getConstantOperandVal(1) != Bits-8)
3406  return false;
3407 
3408  LHS = Op0.getOperand(0);
3409  RHS = Op1.getOperand(0);
3410  return true;
3411  }
3412 
3413  // When we have small integers (i16 to be specific), the form present
3414  // post-legalization uses SETULT in the SELECT_CC for the
3415  // higher-order byte, depending on the fact that the
3416  // even-higher-order bytes are known to all be zero, for example:
3417  // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
3418  // (so when the second byte is the same, because all higher-order
3419  // bits from bytes 3 and 4 are known to be zero, the result of the
3420  // xor can be at most 255)
3421  if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
3422  isa<ConstantSDNode>(O.getOperand(1))) {
3423 
3424  uint64_t ULim = O.getConstantOperandVal(1);
3425  if (ULim != (UINT64_C(1) << b*8))
3426  return false;
3427 
3428  // Now we need to make sure that the upper bytes are known to be
3429  // zero.
3430  unsigned Bits = Op0.getValueSizeInBits();
3431  if (!CurDAG->MaskedValueIsZero(
3432  Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
3433  return false;
3434 
3435  LHS = Op0.getOperand(0);
3436  RHS = Op0.getOperand(1);
3437  return true;
3438  }
3439 
3440  return false;
3441  }
3442 
3443  if (CC != ISD::SETEQ)
3444  return false;
3445 
3446  SDValue Op = O.getOperand(0);
3447  if (Op.getOpcode() == ISD::AND) {
3448  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3449  return false;
3450  if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
3451  return false;
3452 
3453  SDValue XOR = Op.getOperand(0);
3454  if (XOR.getOpcode() == ISD::TRUNCATE)
3455  XOR = XOR.getOperand(0);
3456  if (XOR.getOpcode() != ISD::XOR)
3457  return false;
3458 
3459  LHS = XOR.getOperand(0);
3460  RHS = XOR.getOperand(1);
3461  return true;
3462  } else if (Op.getOpcode() == ISD::SRL) {
3463  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3464  return false;
3465  unsigned Bits = Op.getValueSizeInBits();
3466  if (b != Bits/8-1)
3467  return false;
3468  if (Op.getConstantOperandVal(1) != Bits-8)
3469  return false;
3470 
3471  SDValue XOR = Op.getOperand(0);
3472  if (XOR.getOpcode() == ISD::TRUNCATE)
3473  XOR = XOR.getOperand(0);
3474  if (XOR.getOpcode() != ISD::XOR)
3475  return false;
3476 
3477  LHS = XOR.getOperand(0);
3478  RHS = XOR.getOperand(1);
3479  return true;
3480  }
3481 
3482  return false;
3483  };
3484 
3485  SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
3486  while (!Queue.empty()) {
3487  SDValue V = Queue.pop_back_val();
3488 
3489  for (const SDValue &O : V.getNode()->ops()) {
3490  unsigned b;
3491  uint64_t M = 0, A = 0;
3492  SDValue OLHS, ORHS;
3493  if (O.getOpcode() == ISD::OR) {
3494  Queue.push_back(O);
3495  } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
3496  if (!LHS) {
3497  LHS = OLHS;
3498  RHS = ORHS;
3499  BytesFound[b] = true;
3500  Mask |= M;
3501  Alt |= A;
3502  } else if ((LHS == ORHS && RHS == OLHS) ||
3503  (RHS == ORHS && LHS == OLHS)) {
3504  BytesFound[b] = true;
3505  Mask |= M;
3506  Alt |= A;
3507  } else {
3508  return Res;
3509  }
3510  } else {
3511  return Res;
3512  }
3513  }
3514  }
3515 
3516  unsigned LastB = 0, BCnt = 0;
3517  for (unsigned i = 0; i < 8; ++i)
3518  if (BytesFound[LastB]) {
3519  ++BCnt;
3520  LastB = i;
3521  }
3522 
3523  if (!LastB || BCnt < 2)
3524  return Res;
3525 
3526  // Because we'll be zero-extending the output anyway if don't have a specific
3527  // value for each input byte (via the Mask), we can 'anyext' the inputs.
3528  if (LHS.getValueType() != VT) {
3529  LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
3530  RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
3531  }
3532 
3533  Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
3534 
3535  bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
3536  if (NonTrivialMask && !Alt) {
3537  // Res = Mask & CMPB
3538  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
3539  CurDAG->getConstant(Mask, dl, VT));
3540  } else if (Alt) {
3541  // Res = (CMPB & Mask) | (~CMPB & Alt)
3542  // Which, as suggested here:
3543  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
3544  // can be written as:
3545  // Res = Alt ^ ((Alt ^ Mask) & CMPB)
3546  // useful because the (Alt ^ Mask) can be pre-computed.
3547  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
3548  CurDAG->getConstant(Mask ^ Alt, dl, VT));
3549  Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
3550  CurDAG->getConstant(Alt, dl, VT));
3551  }
3552 
3553  return Res;
3554 }
3555 
3556 // When CR bit registers are enabled, an extension of an i1 variable to a i32
3557 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
3558 // involves constant materialization of a 0 or a 1 or both. If the result of
3559 // the extension is then operated upon by some operator that can be constant
3560 // folded with a constant 0 or 1, and that constant can be materialized using
3561 // only one instruction (like a zero or one), then we should fold in those
3562 // operations with the select.
3563 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
3564  if (!PPCSubTarget->useCRBits())
3565  return;
3566 
3567  if (N->getOpcode() != ISD::ZERO_EXTEND &&
3568  N->getOpcode() != ISD::SIGN_EXTEND &&
3569  N->getOpcode() != ISD::ANY_EXTEND)
3570  return;
3571 
3572  if (N->getOperand(0).getValueType() != MVT::i1)
3573  return;
3574 
3575  if (!N->hasOneUse())
3576  return;
3577 
3578  SDLoc dl(N);
3579  EVT VT = N->getValueType(0);
3580  SDValue Cond = N->getOperand(0);
3581  SDValue ConstTrue =
3582  CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
3583  SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
3584 
3585  do {
3586  SDNode *User = *N->use_begin();
3587  if (User->getNumOperands() != 2)
3588  break;
3589 
3590  auto TryFold = [this, N, User, dl](SDValue Val) {
3591  SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
3592  SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
3593  SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
3594 
3595  return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
3596  User->getValueType(0),
3597  O0.getNode(), O1.getNode());
3598  };
3599 
3600  // FIXME: When the semantics of the interaction between select and undef
3601  // are clearly defined, it may turn out to be unnecessary to break here.
3602  SDValue TrueRes = TryFold(ConstTrue);
3603  if (!TrueRes || TrueRes.isUndef())
3604  break;
3605  SDValue FalseRes = TryFold(ConstFalse);
3606  if (!FalseRes || FalseRes.isUndef())
3607  break;
3608 
3609  // For us to materialize these using one instruction, we must be able to
3610  // represent them as signed 16-bit integers.
3611  uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
3612  False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
3613  if (!isInt<16>(True) || !isInt<16>(False))
3614  break;
3615 
3616  // We can replace User with a new SELECT node, and try again to see if we
3617  // can fold the select with its user.
3618  Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
3619  N = User;
3620  ConstTrue = TrueRes;
3621  ConstFalse = FalseRes;
3622  } while (N->hasOneUse());
3623 }
3624 
3625 void PPCDAGToDAGISel::PreprocessISelDAG() {
3626  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
3627  ++Position;
3628 
3629  bool MadeChange = false;
3630  while (Position != CurDAG->allnodes_begin()) {
3631  SDNode *N = &*--Position;
3632  if (N->use_empty())
3633  continue;
3634 
3635  SDValue Res;
3636  switch (N->getOpcode()) {
3637  default: break;
3638  case ISD::OR:
3639  Res = combineToCMPB(N);
3640  break;
3641  }
3642 
3643  if (!Res)
3644  foldBoolExts(Res, N);
3645 
3646  if (Res) {
3647  DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
3648  DEBUG(N->dump(CurDAG));
3649  DEBUG(dbgs() << "\nNew: ");
3650  DEBUG(Res.getNode()->dump(CurDAG));
3651  DEBUG(dbgs() << "\n");
3652 
3653  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
3654  MadeChange = true;
3655  }
3656  }
3657 
3658  if (MadeChange)
3659  CurDAG->RemoveDeadNodes();
3660 }
3661 
3662 /// PostprocessISelDAG - Perform some late peephole optimizations
3663 /// on the DAG representation.
3664 void PPCDAGToDAGISel::PostprocessISelDAG() {
3665  // Skip peepholes at -O0.
3666  if (TM.getOptLevel() == CodeGenOpt::None)
3667  return;
3668 
3669  PeepholePPC64();
3670  PeepholeCROps();
3671  PeepholePPC64ZExt();
3672 }
3673 
3674 // Check if all users of this node will become isel where the second operand
3675 // is the constant zero. If this is so, and if we can negate the condition,
3676 // then we can flip the true and false operands. This will allow the zero to
3677 // be folded with the isel so that we don't need to materialize a register
3678 // containing zero.
3679 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
3680  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
3681  UI != UE; ++UI) {
3682  SDNode *User = *UI;
3683  if (!User->isMachineOpcode())
3684  return false;
3685  if (User->getMachineOpcode() != PPC::SELECT_I4 &&
3686  User->getMachineOpcode() != PPC::SELECT_I8)
3687  return false;
3688 
3689  SDNode *Op2 = User->getOperand(2).getNode();
3690  if (!Op2->isMachineOpcode())
3691  return false;
3692 
3693  if (Op2->getMachineOpcode() != PPC::LI &&
3694  Op2->getMachineOpcode() != PPC::LI8)
3695  return false;
3696 
3698  if (!C)
3699  return false;
3700 
3701  if (!C->isNullValue())
3702  return false;
3703  }
3704 
3705  return true;
3706 }
3707 
3708 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
3709  SmallVector<SDNode *, 4> ToReplace;
3710  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
3711  UI != UE; ++UI) {
3712  SDNode *User = *UI;
3713  assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
3714  User->getMachineOpcode() == PPC::SELECT_I8) &&
3715  "Must have all select users");
3716  ToReplace.push_back(User);
3717  }
3718 
3719  for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
3720  UE = ToReplace.end(); UI != UE; ++UI) {
3721  SDNode *User = *UI;
3722  SDNode *ResNode =
3723  CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
3724  User->getValueType(0), User->getOperand(0),
3725  User->getOperand(2),
3726  User->getOperand(1));
3727 
3728  DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
3729  DEBUG(User->dump(CurDAG));
3730  DEBUG(dbgs() << "\nNew: ");
3731  DEBUG(ResNode->dump(CurDAG));
3732  DEBUG(dbgs() << "\n");
3733 
3734  ReplaceUses(User, ResNode);
3735  }
3736 }
3737 
3738 void PPCDAGToDAGISel::PeepholeCROps() {
3739  bool IsModified;
3740  do {
3741  IsModified = false;
3742  for (SDNode &Node : CurDAG->allnodes()) {
3743  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
3744  if (!MachineNode || MachineNode->use_empty())
3745  continue;
3746  SDNode *ResNode = MachineNode;
3747 
3748  bool Op1Set = false, Op1Unset = false,
3749  Op1Not = false,
3750  Op2Set = false, Op2Unset = false,
3751  Op2Not = false;
3752 
3753  unsigned Opcode = MachineNode->getMachineOpcode();
3754  switch (Opcode) {
3755  default: break;
3756  case PPC::CRAND:
3757  case PPC::CRNAND:
3758  case PPC::CROR:
3759  case PPC::CRXOR:
3760  case PPC::CRNOR:
3761  case PPC::CREQV:
3762  case PPC::CRANDC:
3763  case PPC::CRORC: {
3764  SDValue Op = MachineNode->getOperand(1);
3765  if (Op.isMachineOpcode()) {
3766  if (Op.getMachineOpcode() == PPC::CRSET)
3767  Op2Set = true;
3768  else if (Op.getMachineOpcode() == PPC::CRUNSET)
3769  Op2Unset = true;
3770  else if (Op.getMachineOpcode() == PPC::CRNOR &&
3771  Op.getOperand(0) == Op.getOperand(1))
3772  Op2Not = true;
3773  }
3775  }
3776  case PPC::BC:
3777  case PPC::BCn:
3778  case PPC::SELECT_I4:
3779  case PPC::SELECT_I8:
3780  case PPC::SELECT_F4:
3781  case PPC::SELECT_F8:
3782  case PPC::SELECT_QFRC:
3783  case PPC::SELECT_QSRC:
3784  case PPC::SELECT_QBRC:
3785  case PPC::SELECT_VRRC:
3786  case PPC::SELECT_VSFRC:
3787  case PPC::SELECT_VSSRC:
3788  case PPC::SELECT_VSRC: {
3789  SDValue Op = MachineNode->getOperand(0);
3790  if (Op.isMachineOpcode()) {
3791  if (Op.getMachineOpcode() == PPC::CRSET)
3792  Op1Set = true;
3793  else if (Op.getMachineOpcode() == PPC::CRUNSET)
3794  Op1Unset = true;
3795  else if (Op.getMachineOpcode() == PPC::CRNOR &&
3796  Op.getOperand(0) == Op.getOperand(1))
3797  Op1Not = true;
3798  }
3799  }
3800  break;
3801  }
3802 
3803  bool SelectSwap = false;
3804  switch (Opcode) {
3805  default: break;
3806  case PPC::CRAND:
3807  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3808  // x & x = x
3809  ResNode = MachineNode->getOperand(0).getNode();
3810  else if (Op1Set)
3811  // 1 & y = y
3812  ResNode = MachineNode->getOperand(1).getNode();
3813  else if (Op2Set)
3814  // x & 1 = x
3815  ResNode = MachineNode->getOperand(0).getNode();
3816  else if (Op1Unset || Op2Unset)
3817  // x & 0 = 0 & y = 0
3818  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3819  MVT::i1);
3820  else if (Op1Not)
3821  // ~x & y = andc(y, x)
3822  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3823  MVT::i1, MachineNode->getOperand(1),
3824  MachineNode->getOperand(0).
3825  getOperand(0));
3826  else if (Op2Not)
3827  // x & ~y = andc(x, y)
3828  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3829  MVT::i1, MachineNode->getOperand(0),
3830  MachineNode->getOperand(1).
3831  getOperand(0));
3832  else if (AllUsersSelectZero(MachineNode)) {
3833  ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
3834  MVT::i1, MachineNode->getOperand(0),
3835  MachineNode->getOperand(1));
3836  SelectSwap = true;
3837  }
3838  break;
3839  case PPC::CRNAND:
3840  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3841  // nand(x, x) -> nor(x, x)
3842  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3843  MVT::i1, MachineNode->getOperand(0),
3844  MachineNode->getOperand(0));
3845  else if (Op1Set)
3846  // nand(1, y) -> nor(y, y)
3847  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3848  MVT::i1, MachineNode->getOperand(1),
3849  MachineNode->getOperand(1));
3850  else if (Op2Set)
3851  // nand(x, 1) -> nor(x, x)
3852  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3853  MVT::i1, MachineNode->getOperand(0),
3854  MachineNode->getOperand(0));
3855  else if (Op1Unset || Op2Unset)
3856  // nand(x, 0) = nand(0, y) = 1
3857  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3858  MVT::i1);
3859  else if (Op1Not)
3860  // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
3861  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3862  MVT::i1, MachineNode->getOperand(0).
3863  getOperand(0),
3864  MachineNode->getOperand(1));
3865  else if (Op2Not)
3866  // nand(x, ~y) = ~x | y = orc(y, x)
3867  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3868  MVT::i1, MachineNode->getOperand(1).
3869  getOperand(0),
3870  MachineNode->getOperand(0));
3871  else if (AllUsersSelectZero(MachineNode)) {
3872  ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
3873  MVT::i1, MachineNode->getOperand(0),
3874  MachineNode->getOperand(1));
3875  SelectSwap = true;
3876  }
3877  break;
3878  case PPC::CROR:
3879  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3880  // x | x = x
3881  ResNode = MachineNode->getOperand(0).getNode();
3882  else if (Op1Set || Op2Set)
3883  // x | 1 = 1 | y = 1
3884  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3885  MVT::i1);
3886  else if (Op1Unset)
3887  // 0 | y = y
3888  ResNode = MachineNode->getOperand(1).getNode();
3889  else if (Op2Unset)
3890  // x | 0 = x
3891  ResNode = MachineNode->getOperand(0).getNode();
3892  else if (Op1Not)
3893  // ~x | y = orc(y, x)
3894  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3895  MVT::i1, MachineNode->getOperand(1),
3896  MachineNode->getOperand(0).
3897  getOperand(0));
3898  else if (Op2Not)
3899  // x | ~y = orc(x, y)
3900  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3901  MVT::i1, MachineNode->getOperand(0),
3902  MachineNode->getOperand(1).
3903  getOperand(0));
3904  else if (AllUsersSelectZero(MachineNode)) {
3905  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3906  MVT::i1, MachineNode->getOperand(0),
3907  MachineNode->getOperand(1));
3908  SelectSwap = true;
3909  }
3910  break;
3911  case PPC::CRXOR:
3912  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3913  // xor(x, x) = 0
3914  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3915  MVT::i1);
3916  else if (Op1Set)
3917  // xor(1, y) -> nor(y, y)
3918  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3919  MVT::i1, MachineNode->getOperand(1),
3920  MachineNode->getOperand(1));
3921  else if (Op2Set)
3922  // xor(x, 1) -> nor(x, x)
3923  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3924  MVT::i1, MachineNode->getOperand(0),
3925  MachineNode->getOperand(0));
3926  else if (Op1Unset)
3927  // xor(0, y) = y
3928  ResNode = MachineNode->getOperand(1).getNode();
3929  else if (Op2Unset)
3930  // xor(x, 0) = x
3931  ResNode = MachineNode->getOperand(0).getNode();
3932  else if (Op1Not)
3933  // xor(~x, y) = eqv(x, y)
3934  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3935  MVT::i1, MachineNode->getOperand(0).
3936  getOperand(0),
3937  MachineNode->getOperand(1));
3938  else if (Op2Not)
3939  // xor(x, ~y) = eqv(x, y)
3940  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3941  MVT::i1, MachineNode->getOperand(0),
3942  MachineNode->getOperand(1).
3943  getOperand(0));
3944  else if (AllUsersSelectZero(MachineNode)) {
3945  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3946  MVT::i1, MachineNode->getOperand(0),
3947  MachineNode->getOperand(1));
3948  SelectSwap = true;
3949  }
3950  break;
3951  case PPC::CRNOR:
3952  if (Op1Set || Op2Set)
3953  // nor(1, y) -> 0
3954  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3955  MVT::i1);
3956  else if (Op1Unset)
3957  // nor(0, y) = ~y -> nor(y, y)
3958  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3959  MVT::i1, MachineNode->getOperand(1),
3960  MachineNode->getOperand(1));
3961  else if (Op2Unset)
3962  // nor(x, 0) = ~x
3963  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3964  MVT::i1, MachineNode->getOperand(0),
3965  MachineNode->getOperand(0));
3966  else if (Op1Not)
3967  // nor(~x, y) = andc(x, y)
3968  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3969  MVT::i1, MachineNode->getOperand(0).
3970  getOperand(0),
3971  MachineNode->getOperand(1));
3972  else if (Op2Not)
3973  // nor(x, ~y) = andc(y, x)
3974  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3975  MVT::i1, MachineNode->getOperand(1).
3976  getOperand(0),
3977  MachineNode->getOperand(0));
3978  else if (AllUsersSelectZero(MachineNode)) {
3979  ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
3980  MVT::i1, MachineNode->getOperand(0),
3981  MachineNode->getOperand(1));
3982  SelectSwap = true;
3983  }
3984  break;
3985  case PPC::CREQV:
3986  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3987  // eqv(x, x) = 1
3988  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3989  MVT::i1);
3990  else if (Op1Set)
3991  // eqv(1, y) = y
3992  ResNode = MachineNode->getOperand(1).getNode();
3993  else if (Op2Set)
3994  // eqv(x, 1) = x
3995  ResNode = MachineNode->getOperand(0).getNode();
3996  else if (Op1Unset)
3997  // eqv(0, y) = ~y -> nor(y, y)
3998  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3999  MVT::i1, MachineNode->getOperand(1),
4000  MachineNode->getOperand(1));
4001  else if (Op2Unset)
4002  // eqv(x, 0) = ~x
4003  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
4004  MVT::i1, MachineNode->getOperand(0),
4005  MachineNode->getOperand(0));
4006  else if (Op1Not)
4007  // eqv(~x, y) = xor(x, y)
4008  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
4009  MVT::i1, MachineNode->getOperand(0).
4010  getOperand(0),
4011  MachineNode->getOperand(1));
4012  else if (Op2Not)
4013  // eqv(x, ~y) = xor(x, y)
4014  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
4015  MVT::i1, MachineNode->getOperand(0),
4016  MachineNode->getOperand(1).
4017  getOperand(0));
4018  else if (AllUsersSelectZero(MachineNode)) {
4019  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
4020  MVT::i1, MachineNode->getOperand(0),
4021  MachineNode->getOperand(1));
4022  SelectSwap = true;
4023  }
4024  break;
4025  case PPC::CRANDC:
4026  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
4027  // andc(x, x) = 0
4028  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
4029  MVT::i1);
4030  else if (Op1Set)
4031  // andc(1, y) = ~y
4032  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
4033  MVT::i1, MachineNode->getOperand(1),
4034  MachineNode->getOperand(1));
4035  else if (Op1Unset || Op2Set)
4036  // andc(0, y) = andc(x, 1) = 0
4037  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
4038  MVT::i1);
4039  else if (Op2Unset)
4040  // andc(x, 0) = x
4041  ResNode = MachineNode->getOperand(0).getNode();
4042  else if (Op1Not)
4043  // andc(~x, y) = ~(x | y) = nor(x, y)
4044  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
4045  MVT::i1, MachineNode->getOperand(0).
4046  getOperand(0),
4047  MachineNode->getOperand(1));
4048  else if (Op2Not)
4049  // andc(x, ~y) = x & y
4050  ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
4051  MVT::i1, MachineNode->getOperand(0),
4052  MachineNode->getOperand(1).
4053  getOperand(0));
4054  else if (AllUsersSelectZero(MachineNode)) {
4055  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
4056  MVT::i1, MachineNode->getOperand(1),
4057  MachineNode->getOperand(0));
4058  SelectSwap = true;
4059  }
4060  break;
4061  case PPC::CRORC:
4062  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
4063  // orc(x, x) = 1
4064  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
4065  MVT::i1);
4066  else if (Op1Set || Op2Unset)
4067  // orc(1, y) = orc(x, 0) = 1
4068  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
4069  MVT::i1);
4070  else if (Op2Set)
4071  // orc(x, 1) = x
4072  ResNode = MachineNode->getOperand(0).getNode();
4073  else if (Op1Unset)
4074  // orc(0, y) = ~y
4075  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
4076  MVT::i1, MachineNode->getOperand(1),
4077  MachineNode->getOperand(1));
4078  else if (Op1Not)
4079  // orc(~x, y) = ~(x & y) = nand(x, y)
4080  ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
4081  MVT::i1, MachineNode->getOperand(0).
4082  getOperand(0),
4083  MachineNode->getOperand(1));
4084  else if (Op2Not)
4085  // orc(x, ~y) = x | y
4086  ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
4087  MVT::i1, MachineNode->getOperand(0),
4088  MachineNode->getOperand(1).
4089  getOperand(0));
4090  else if (AllUsersSelectZero(MachineNode)) {
4091  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
4092  MVT::i1, MachineNode->getOperand(1),
4093  MachineNode->getOperand(0));
4094  SelectSwap = true;
4095  }
4096  break;
4097  case PPC::SELECT_I4:
4098  case PPC::SELECT_I8:
4099  case PPC::SELECT_F4:
4100  case PPC::SELECT_F8:
4101  case PPC::SELECT_QFRC:
4102  case PPC::SELECT_QSRC:
4103  case PPC::SELECT_QBRC:
4104  case PPC::SELECT_VRRC:
4105  case PPC::SELECT_VSFRC:
4106  case PPC::SELECT_VSSRC:
4107  case PPC::SELECT_VSRC:
4108  if (Op1Set)
4109  ResNode = MachineNode->getOperand(1).getNode();
4110  else if (Op1Unset)
4111  ResNode = MachineNode->getOperand(2).getNode();
4112  else if (Op1Not)
4113  ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
4114  SDLoc(MachineNode),
4115  MachineNode->getValueType(0),
4116  MachineNode->getOperand(0).
4117  getOperand(0),
4118  MachineNode->getOperand(2),
4119  MachineNode->getOperand(1));
4120  break;
4121  case PPC::BC:
4122  case PPC::BCn:
4123  if (Op1Not)
4124  ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
4125  PPC::BC,
4126  SDLoc(MachineNode),
4127  MVT::Other,
4128  MachineNode->getOperand(0).
4129  getOperand(0),
4130  MachineNode->getOperand(1),
4131  MachineNode->getOperand(2));
4132  // FIXME: Handle Op1Set, Op1Unset here too.
4133  break;
4134  }
4135 
4136  // If we're inverting this node because it is used only by selects that
4137  // we'd like to swap, then swap the selects before the node replacement.
4138  if (SelectSwap)
4139  SwapAllSelectUsers(MachineNode);
4140 
4141  if (ResNode != MachineNode) {
4142  DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
4143  DEBUG(MachineNode->dump(CurDAG));
4144  DEBUG(dbgs() << "\nNew: ");
4145  DEBUG(ResNode->dump(CurDAG));
4146  DEBUG(dbgs() << "\n");
4147 
4148  ReplaceUses(MachineNode, ResNode);
4149  IsModified = true;
4150  }
4151  }
4152  if (IsModified)
4153  CurDAG->RemoveDeadNodes();
4154  } while (IsModified);
4155 }
4156 
4157 // Gather the set of 32-bit operations that are known to have their
4158 // higher-order 32 bits zero, where ToPromote contains all such operations.
4160  SmallPtrSetImpl<SDNode *> &ToPromote) {
4161  if (!Op32.isMachineOpcode())
4162  return false;
4163 
4164  // First, check for the "frontier" instructions (those that will clear the
4165  // higher-order 32 bits.
4166 
4167  // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
4168  // around. If it does not, then these instructions will clear the
4169  // higher-order bits.
4170  if ((Op32.getMachineOpcode() == PPC::RLWINM ||
4171  Op32.getMachineOpcode() == PPC::RLWNM) &&
4172  Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
4173  ToPromote.insert(Op32.getNode());
4174  return true;
4175  }
4176 
4177  // SLW and SRW always clear the higher-order bits.
4178  if (Op32.getMachineOpcode() == PPC::SLW ||
4179  Op32.getMachineOpcode() == PPC::SRW) {
4180  ToPromote.insert(Op32.getNode());
4181  return true;
4182  }
4183 
4184  // For LI and LIS, we need the immediate to be positive (so that it is not
4185  // sign extended).
4186  if (Op32.getMachineOpcode() == PPC::LI ||
4187  Op32.getMachineOpcode() == PPC::LIS) {
4188  if (!isUInt<15>(Op32.getConstantOperandVal(0)))
4189  return false;
4190 
4191  ToPromote.insert(Op32.getNode());
4192  return true;
4193  }
4194 
4195  // LHBRX and LWBRX always clear the higher-order bits.
4196  if (Op32.getMachineOpcode() == PPC::LHBRX ||
4197  Op32.getMachineOpcode() == PPC::LWBRX) {
4198  ToPromote.insert(Op32.getNode());
4199  return true;
4200  }
4201 
4202  // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
4203  if (Op32.getMachineOpcode() == PPC::CNTLZW ||
4204  Op32.getMachineOpcode() == PPC::CNTTZW) {
4205  ToPromote.insert(Op32.getNode());
4206  return true;
4207  }
4208 
4209  // Next, check for those instructions we can look through.
4210 
4211  // Assuming the mask does not wrap around, then the higher-order bits are
4212  // taken directly from the first operand.
4213  if (Op32.getMachineOpcode() == PPC::RLWIMI &&
4214  Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
4215  SmallPtrSet<SDNode *, 16> ToPromote1;
4216  if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
4217  return false;
4218 
4219  ToPromote.insert(Op32.getNode());
4220  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4221  return true;
4222  }
4223 
4224  // For OR, the higher-order bits are zero if that is true for both operands.
4225  // For SELECT_I4, the same is true (but the relevant operand numbers are
4226  // shifted by 1).
4227  if (Op32.getMachineOpcode() == PPC::OR ||
4228  Op32.getMachineOpcode() == PPC::SELECT_I4) {
4229  unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
4230  SmallPtrSet<SDNode *, 16> ToPromote1;
4231  if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
4232  return false;
4233  if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
4234  return false;
4235 
4236  ToPromote.insert(Op32.getNode());
4237  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4238  return true;
4239  }
4240 
4241  // For ORI and ORIS, we need the higher-order bits of the first operand to be
4242  // zero, and also for the constant to be positive (so that it is not sign
4243  // extended).
4244  if (Op32.getMachineOpcode() == PPC::ORI ||
4245  Op32.getMachineOpcode() == PPC::ORIS) {
4246  SmallPtrSet<SDNode *, 16> ToPromote1;
4247  if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
4248  return false;
4249  if (!isUInt<15>(Op32.getConstantOperandVal(1)))
4250  return false;
4251 
4252  ToPromote.insert(Op32.getNode());
4253  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4254  return true;
4255  }
4256 
4257  // The higher-order bits of AND are zero if that is true for at least one of
4258  // the operands.
4259  if (Op32.getMachineOpcode() == PPC::AND) {
4260  SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
4261  bool Op0OK =
4262  PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
4263  bool Op1OK =
4264  PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
4265  if (!Op0OK && !Op1OK)
4266  return false;
4267 
4268  ToPromote.insert(Op32.getNode());
4269 
4270  if (Op0OK)
4271  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4272 
4273  if (Op1OK)
4274  ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
4275 
4276  return true;
4277  }
4278 
4279  // For ANDI and ANDIS, the higher-order bits are zero if either that is true
4280  // of the first operand, or if the second operand is positive (so that it is
4281  // not sign extended).
4282  if (Op32.getMachineOpcode() == PPC::ANDIo ||
4283  Op32.getMachineOpcode() == PPC::ANDISo) {
4284  SmallPtrSet<SDNode *, 16> ToPromote1;
4285  bool Op0OK =
4286  PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
4287  bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
4288  if (!Op0OK && !Op1OK)
4289  return false;
4290 
4291  ToPromote.insert(Op32.getNode());
4292 
4293  if (Op0OK)
4294  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4295 
4296  return true;
4297  }
4298 
4299  return false;
4300 }
4301 
4302 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
4303  if (!PPCSubTarget->isPPC64())
4304  return;
4305 
4306  // When we zero-extend from i32 to i64, we use a pattern like this:
4307  // def : Pat<(i64 (zext i32:$in)),
4308  // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
4309  // 0, 32)>;
4310  // There are several 32-bit shift/rotate instructions, however, that will
4311  // clear the higher-order bits of their output, rendering the RLDICL
4312  // unnecessary. When that happens, we remove it here, and redefine the
4313  // relevant 32-bit operation to be a 64-bit operation.
4314 
4315  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
4316  ++Position;
4317 
4318  bool MadeChange = false;
4319  while (Position != CurDAG->allnodes_begin()) {
4320  SDNode *N = &*--Position;
4321  // Skip dead nodes and any non-machine opcodes.
4322  if (N->use_empty() || !N->isMachineOpcode())
4323  continue;
4324 
4325  if (N->getMachineOpcode() != PPC::RLDICL)
4326  continue;
4327 
4328  if (N->getConstantOperandVal(1) != 0 ||
4329  N->getConstantOperandVal(2) != 32)
4330  continue;
4331 
4332  SDValue ISR = N->getOperand(0);
4333  if (!ISR.isMachineOpcode() ||
4334  ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
4335  continue;
4336 
4337  if (!ISR.hasOneUse())
4338  continue;
4339 
4340  if (ISR.getConstantOperandVal(2) != PPC::sub_32)
4341  continue;
4342 
4343  SDValue IDef = ISR.getOperand(0);
4344  if (!IDef.isMachineOpcode() ||
4345  IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
4346  continue;
4347 
4348  // We now know that we're looking at a canonical i32 -> i64 zext. See if we
4349  // can get rid of it.
4350 
4351  SDValue Op32 = ISR->getOperand(1);
4352  if (!Op32.isMachineOpcode())
4353  continue;
4354 
4355  // There are some 32-bit instructions that always clear the high-order 32
4356  // bits, there are also some instructions (like AND) that we can look
4357  // through.
4358  SmallPtrSet<SDNode *, 16> ToPromote;
4359  if (!PeepholePPC64ZExtGather(Op32, ToPromote))
4360  continue;
4361 
4362  // If the ToPromote set contains nodes that have uses outside of the set
4363  // (except for the original INSERT_SUBREG), then abort the transformation.
4364  bool OutsideUse = false;
4365  for (SDNode *PN : ToPromote) {
4366  for (SDNode *UN : PN->uses()) {
4367  if (!ToPromote.count(UN) && UN != ISR.getNode()) {
4368  OutsideUse = true;
4369  break;
4370  }
4371  }
4372 
4373  if (OutsideUse)
4374  break;
4375  }
4376  if (OutsideUse)
4377  continue;
4378 
4379  MadeChange = true;
4380 
4381  // We now know that this zero extension can be removed by promoting to
4382  // nodes in ToPromote to 64-bit operations, where for operations in the
4383  // frontier of the set, we need to insert INSERT_SUBREGs for their
4384  // operands.
4385  for (SDNode *PN : ToPromote) {
4386  unsigned NewOpcode;
4387  switch (PN->getMachineOpcode()) {
4388  default:
4389  llvm_unreachable("Don't know the 64-bit variant of this instruction");
4390  case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
4391  case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
4392  case PPC::SLW: NewOpcode = PPC::SLW8; break;
4393  case PPC::SRW: NewOpcode = PPC::SRW8; break;
4394  case PPC::LI: NewOpcode = PPC::LI8; break;
4395  case PPC::LIS: NewOpcode = PPC::LIS8; break;
4396  case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
4397  case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
4398  case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
4399  case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
4400  case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
4401  case PPC::OR: NewOpcode = PPC::OR8; break;
4402  case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
4403  case PPC::ORI: NewOpcode = PPC::ORI8; break;
4404  case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
4405  case PPC::AND: NewOpcode = PPC::AND8; break;
4406  case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break;
4407  case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break;
4408  }
4409 
4410  // Note: During the replacement process, the nodes will be in an
4411  // inconsistent state (some instructions will have operands with values
4412  // of the wrong type). Once done, however, everything should be right
4413  // again.
4414 
4416  for (const SDValue &V : PN->ops()) {
4417  if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
4418  !isa<ConstantSDNode>(V)) {
4419  SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
4420  SDNode *ReplOp =
4421  CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
4422  ISR.getNode()->getVTList(), ReplOpOps);
4423  Ops.push_back(SDValue(ReplOp, 0));
4424  } else {
4425  Ops.push_back(V);
4426  }
4427  }
4428 
4429  // Because all to-be-promoted nodes only have users that are other
4430  // promoted nodes (or the original INSERT_SUBREG), we can safely replace
4431  // the i32 result value type with i64.
4432 
4433  SmallVector<EVT, 2> NewVTs;
4434  SDVTList VTs = PN->getVTList();
4435  for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
4436  if (VTs.VTs[i] == MVT::i32)
4437  NewVTs.push_back(MVT::i64);
4438  else
4439  NewVTs.push_back(VTs.VTs[i]);
4440 
4441  DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
4442  DEBUG(PN->dump(CurDAG));
4443 
4444  CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
4445 
4446  DEBUG(dbgs() << "\nNew: ");
4447  DEBUG(PN->dump(CurDAG));
4448  DEBUG(dbgs() << "\n");
4449  }
4450 
4451  // Now we replace the original zero extend and its associated INSERT_SUBREG
4452  // with the value feeding the INSERT_SUBREG (which has now been promoted to
4453  // return an i64).
4454 
4455  DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
4456  DEBUG(N->dump(CurDAG));
4457  DEBUG(dbgs() << "\nNew: ");
4458  DEBUG(Op32.getNode()->dump(CurDAG));
4459  DEBUG(dbgs() << "\n");
4460 
4461  ReplaceUses(N, Op32.getNode());
4462  }
4463 
4464  if (MadeChange)
4465  CurDAG->RemoveDeadNodes();
4466 }
4467 
4468 void PPCDAGToDAGISel::PeepholePPC64() {
4469  // These optimizations are currently supported only for 64-bit SVR4.
4470  if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
4471  return;
4472 
4473  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
4474  ++Position;
4475 
4476  while (Position != CurDAG->allnodes_begin()) {
4477  SDNode *N = &*--Position;
4478  // Skip dead nodes and any non-machine opcodes.
4479  if (N->use_empty() || !N->isMachineOpcode())
4480  continue;
4481 
4482  unsigned FirstOp;
4483  unsigned StorageOpcode = N->getMachineOpcode();
4484 
4485  switch (StorageOpcode) {
4486  default: continue;
4487 
4488  case PPC::LBZ:
4489  case PPC::LBZ8:
4490  case PPC::LD:
4491  case PPC::LFD:
4492  case PPC::LFS:
4493  case PPC::LHA:
4494  case PPC::LHA8:
4495  case PPC::LHZ:
4496  case PPC::LHZ8:
4497  case PPC::LWA:
4498  case PPC::LWZ:
4499  case PPC::LWZ8:
4500  FirstOp = 0;
4501  break;
4502 
4503  case PPC::STB:
4504  case PPC::STB8:
4505  case PPC::STD:
4506  case PPC::STFD:
4507  case PPC::STFS:
4508  case PPC::STH:
4509  case PPC::STH8:
4510  case PPC::STW:
4511  case PPC::STW8:
4512  FirstOp = 1;
4513  break;
4514  }
4515 
4516  // If this is a load or store with a zero offset, or within the alignment,
4517  // we may be able to fold an add-immediate into the memory operation.
4518  // The check against alignment is below, as it can't occur until we check
4519  // the arguments to N
4520  if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
4521  continue;
4522 
4523  SDValue Base = N->getOperand(FirstOp + 1);
4524  if (!Base.isMachineOpcode())
4525  continue;
4526 
4527  unsigned Flags = 0;
4528  bool ReplaceFlags = true;
4529 
4530  // When the feeding operation is an add-immediate of some sort,
4531  // determine whether we need to add relocation information to the
4532  // target flags on the immediate operand when we fold it into the
4533  // load instruction.
4534  //
4535  // For something like ADDItocL, the relocation information is
4536  // inferred from the opcode; when we process it in the AsmPrinter,
4537  // we add the necessary relocation there. A load, though, can receive
4538  // relocation from various flavors of ADDIxxx, so we need to carry
4539  // the relocation information in the target flags.
4540  switch (Base.getMachineOpcode()) {
4541  default: continue;
4542 
4543  case PPC::ADDI8:
4544  case PPC::ADDI:
4545  // In some cases (such as TLS) the relocation information
4546  // is already in place on the operand, so copying the operand
4547  // is sufficient.
4548  ReplaceFlags = false;
4549  // For these cases, the immediate may not be divisible by 4, in
4550  // which case the fold is illegal for DS-form instructions. (The
4551  // other cases provide aligned addresses and are always safe.)
4552  if ((StorageOpcode == PPC::LWA ||
4553  StorageOpcode == PPC::LD ||
4554  StorageOpcode == PPC::STD) &&
4555  (!isa<ConstantSDNode>(Base.getOperand(1)) ||
4556  Base.getConstantOperandVal(1) % 4 != 0))
4557  continue;
4558  break;
4559  case PPC::ADDIdtprelL:
4560  Flags = PPCII::MO_DTPREL_LO;
4561  break;
4562  case PPC::ADDItlsldL:
4563  Flags = PPCII::MO_TLSLD_LO;
4564  break;
4565  case PPC::ADDItocL:
4566  Flags = PPCII::MO_TOC_LO;
4567  break;
4568  }
4569 
4570  SDValue ImmOpnd = Base.getOperand(1);
4571 
4572  // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
4573  // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
4574  // we might have needed different @ha relocation values for the offset
4575  // pointers).
4576  int MaxDisplacement = 7;
4577  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
4578  const GlobalValue *GV = GA->getGlobal();
4579  MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
4580  }
4581 
4582  bool UpdateHBase = false;
4583  SDValue HBase = Base.getOperand(0);
4584 
4585  int Offset = N->getConstantOperandVal(FirstOp);
4586  if (ReplaceFlags) {
4587  if (Offset < 0 || Offset > MaxDisplacement) {
4588  // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
4589  // one use, then we can do this for any offset, we just need to also
4590  // update the offset (i.e. the symbol addend) on the addis also.
4591  if (Base.getMachineOpcode() != PPC::ADDItocL)
4592  continue;
4593 
4594  if (!HBase.isMachineOpcode() ||
4595  HBase.getMachineOpcode() != PPC::ADDIStocHA)
4596  continue;
4597 
4598  if (!Base.hasOneUse() || !HBase.hasOneUse())
4599  continue;
4600 
4601  SDValue HImmOpnd = HBase.getOperand(1);
4602  if (HImmOpnd != ImmOpnd)
4603  continue;
4604 
4605  UpdateHBase = true;
4606  }
4607  } else {
4608  // If we're directly folding the addend from an addi instruction, then:
4609  // 1. In general, the offset on the memory access must be zero.
4610  // 2. If the addend is a constant, then it can be combined with a
4611  // non-zero offset, but only if the result meets the encoding
4612  // requirements.
4613  if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
4614  Offset += C->getSExtValue();
4615 
4616  if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD ||
4617  StorageOpcode == PPC::STD) && (Offset % 4) != 0)
4618  continue;
4619 
4620  if (!isInt<16>(Offset))
4621  continue;
4622 
4623  ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
4624  ImmOpnd.getValueType());
4625  } else if (Offset != 0) {
4626  continue;
4627  }
4628  }
4629 
4630  // We found an opportunity. Reverse the operands from the add
4631  // immediate and substitute them into the load or store. If
4632  // needed, update the target flags for the immediate operand to
4633  // reflect the necessary relocation information.
4634  DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
4635  DEBUG(Base->dump(CurDAG));
4636  DEBUG(dbgs() << "\nN: ");
4637  DEBUG(N->dump(CurDAG));
4638  DEBUG(dbgs() << "\n");
4639 
4640  // If the relocation information isn't already present on the
4641  // immediate operand, add it now.
4642  if (ReplaceFlags) {
4643  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
4644  SDLoc dl(GA);
4645  const GlobalValue *GV = GA->getGlobal();
4646  // We can't perform this optimization for data whose alignment
4647  // is insufficient for the instruction encoding.
4648  if (GV->getAlignment() < 4 &&
4649  (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
4650  StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
4651  DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
4652  continue;
4653  }
4654  ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
4655  } else if (ConstantPoolSDNode *CP =
4656  dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
4657  const Constant *C = CP->getConstVal();
4658  ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
4659  CP->getAlignment(),
4660  Offset, Flags);
4661  }
4662  }
4663 
4664  if (FirstOp == 1) // Store
4665  (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
4666  Base.getOperand(0), N->getOperand(3));
4667  else // Load
4668  (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
4669  N->getOperand(2));
4670 
4671  if (UpdateHBase)
4672  (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
4673  ImmOpnd);
4674 
4675  // The add-immediate may now be dead, in which case remove it.
4676  if (Base.getNode()->use_empty())
4677  CurDAG->RemoveDeadNode(Base.getNode());
4678  }
4679 }
4680 
4681 /// createPPCISelDag - This pass converts a legalized DAG into a
4682 /// PowerPC-specific DAG, ready for instruction scheduling.
4683 ///
4685  CodeGenOpt::Level OptLevel) {
4686  return new PPCDAGToDAGISel(TM, OptLevel);
4687 }
uint64_t CallInst * C
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:341
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
bool isUndef() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
Definition: MathExtras.h:236
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:617
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
GPRC = address of GLOBAL_OFFSET_TABLE.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:342
static unsigned index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:807
const SDValue & getBasePtr() const
unsigned char classifyGlobalReference(const GlobalValue *GV) const
classifyGlobalReference - Classify a global variable reference for the current subtarget accourding t...
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
SDVTList getVTList() const
static unsigned selectI64ImmInstrCountDirect(int64_t Imm)
bool hasVSX() const
Definition: PPCSubtarget.h:242
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
const SDValue & getChain() const
bool hasQPX() const
Definition: PPCSubtarget.h:241
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden)
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned second
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
A debug info location.
Definition: DebugLoc.h:34
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
unsigned int NumVTs
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:404
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:955
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:379
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Reg
All possible values of the reg field in the ModR/M byte.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getID() const
Return the register class ID number.
bool isTargetELF() const
Definition: PPCSubtarget.h:300
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:736
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:403
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:398
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:916
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:125
unsigned getAlignment() const
Definition: Globals.cpp:97
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:558
ArrayRef< SDUse > ops() const
This class is used to represent ISD::STORE nodes.
TargetInstrInfo - Interface to description of machine instruction set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:629
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static SDNode * selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm)
bool isMachineOpcode() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
MO_NLP_FLAG - If this bit is set, the symbol reference is actually to the non_lazy_ptr for the global...
Definition: PPC.h:83
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
const SDValue & getOperand(unsigned Num) const
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC)
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:180
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Common code between 32-bit and 64-bit PowerPC targets.
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
bool isLittleEndian() const
Definition: PPCSubtarget.h:225
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineBasicBlock * MBB
MBB - The current block.
bool hasP8Vector() const
Definition: PPCSubtarget.h:243
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge&#39;s probability, relative to other out-edges of the Src.
bool use_empty() const
Return true if there are no uses of this node.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
void dump() const
Dump this node, for debugging.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:186
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode *> &ToPromote)
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:209
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:179
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
bool hasCMPB() const
Definition: PPCSubtarget.h:252
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
An SDNode that represents everything that will be needed to construct a MachineInstr.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
These values identify relocations on immediates folded into memory operations.
Definition: PPC.h:102
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after execu...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
CodeModel::Model getCodeModel() const
Returns the code model.
iterator_range< use_iterator > uses()
BranchProbabilityInfo * BPI
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:445
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:448
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2...
int getMaskElt(unsigned Idx) const
static SDNode * selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:538
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:362
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG, ready for instruction scheduling.
iterator begin() const
Definition: SmallPtrSet.h:397
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
static unsigned selectI64ImmInstrCount(int64_t Imm)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:581
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
The CMPB instruction (takes two operands of i32 or i64).
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:217
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
bool isDarwin() const
isDarwin - True if this is any darwin platform.
Definition: PPCSubtarget.h:296
iterator end() const
Definition: SmallPtrSet.h:402
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side...
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
static uint64_t Rot64(uint64_t Imm, unsigned R)
static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, const SDValue &DestMBB)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
LLVM Value Representation.
Definition: Value.h:73
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
#define DEBUG(X)
Definition: Debug.h:118
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition: Module.cpp:473
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:412
bool isSVR4ABI() const
Definition: PPCSubtarget.h:305
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
Conversion operators.
Definition: ISDOpcodes.h:442
const SDValue & getOperand(unsigned i) const
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:451
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:350
XXPERMDI - The PPC XXPERMDI instruction.
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:601
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
void resize(size_type N)
Definition: SmallVector.h:355
This class is used to represent ISD::LOAD nodes.