LLVM  7.0.0svn
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a pattern matching instruction selector for PowerPC,
11 // converting from a legalized dag to a PPC dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
17 #include "PPC.h"
18 #include "PPCISelLowering.h"
19 #include "PPCMachineFunctionInfo.h"
20 #include "PPCSubtarget.h"
21 #include "PPCTargetMachine.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GlobalValue.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/KnownBits.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <limits>
63 #include <memory>
64 #include <new>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "ppc-codegen"
71 
72 STATISTIC(NumSextSetcc,
73  "Number of (sext(setcc)) nodes expanded into GPR sequence.");
74 STATISTIC(NumZextSetcc,
75  "Number of (zext(setcc)) nodes expanded into GPR sequence.");
76 STATISTIC(SignExtensionsAdded,
77  "Number of sign extensions for compare inputs added.");
78 STATISTIC(ZeroExtensionsAdded,
79  "Number of zero extensions for compare inputs added.");
80 STATISTIC(NumLogicOpsOnComparison,
81  "Number of logical ops on i1 values calculated in GPR.");
82 STATISTIC(OmittedForNonExtendUses,
83  "Number of compares not eliminated as they have non-extending uses.");
84 
85 // FIXME: Remove this once the bug has been fixed!
86 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
87 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
88 
89 static cl::opt<bool>
90  UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
91  cl::desc("use aggressive ppc isel for bit permutations"),
92  cl::Hidden);
94  "ppc-bit-perm-rewriter-stress-rotates",
95  cl::desc("stress rotate selection in aggressive ppc isel for "
96  "bit permutations"),
97  cl::Hidden);
98 
100  "ppc-use-branch-hint", cl::init(true),
101  cl::desc("Enable static hinting of branches on ppc"),
102  cl::Hidden);
103 
105  "ppc-tls-opt", cl::init(true),
106  cl::desc("Enable tls optimization peephole"),
107  cl::Hidden);
108 
112 
114  "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
115  cl::desc("Specify the types of comparisons to emit GPR-only code for."),
116  cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
117  clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
118  clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
119  clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
120  clEnumValN(ICGPR_NonExtIn, "nonextin",
121  "Only comparisons where inputs don't need [sz]ext."),
122  clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
123  clEnumValN(ICGPR_ZextI32, "zexti32",
124  "Only i32 comparisons with zext result."),
125  clEnumValN(ICGPR_ZextI64, "zexti64",
126  "Only i64 comparisons with zext result."),
127  clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
128  clEnumValN(ICGPR_SextI32, "sexti32",
129  "Only i32 comparisons with sext result."),
130  clEnumValN(ICGPR_SextI64, "sexti64",
131  "Only i64 comparisons with sext result.")));
132 namespace {
133 
134  //===--------------------------------------------------------------------===//
135  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
136  /// instructions for SelectionDAG operations.
137  ///
138  class PPCDAGToDAGISel : public SelectionDAGISel {
139  const PPCTargetMachine &TM;
140  const PPCSubtarget *PPCSubTarget;
141  const PPCTargetLowering *PPCLowering;
142  unsigned GlobalBaseReg;
143 
144  public:
145  explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
146  : SelectionDAGISel(tm, OptLevel), TM(tm) {}
147 
148  bool runOnMachineFunction(MachineFunction &MF) override {
149  // Make sure we re-emit a set of the global base reg if necessary
150  GlobalBaseReg = 0;
151  PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
152  PPCLowering = PPCSubTarget->getTargetLowering();
154 
155  if (!PPCSubTarget->isSVR4ABI())
156  InsertVRSaveCode(MF);
157 
158  return true;
159  }
160 
161  void PreprocessISelDAG() override;
162  void PostprocessISelDAG() override;
163 
164  /// getI16Imm - Return a target constant with the specified value, of type
165  /// i16.
166  inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
167  return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
168  }
169 
170  /// getI32Imm - Return a target constant with the specified value, of type
171  /// i32.
172  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
173  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
174  }
175 
176  /// getI64Imm - Return a target constant with the specified value, of type
177  /// i64.
178  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
179  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
180  }
181 
182  /// getSmallIPtrImm - Return a target constant of pointer type.
183  inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
184  return CurDAG->getTargetConstant(
185  Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
186  }
187 
188  /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
189  /// rotate and mask opcode and mask operation.
190  static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
191  unsigned &SH, unsigned &MB, unsigned &ME);
192 
193  /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
194  /// base register. Return the virtual register that holds this value.
195  SDNode *getGlobalBaseReg();
196 
197  void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
198 
199  // Select - Convert the specified operand from a target-independent to a
200  // target-specific node if it hasn't already been changed.
201  void Select(SDNode *N) override;
202 
203  bool tryBitfieldInsert(SDNode *N);
204  bool tryBitPermutation(SDNode *N);
205  bool tryIntCompareInGPR(SDNode *N);
206 
207  // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
208  // an X-Form load instruction with the offset being a relocation coming from
209  // the PPCISD::ADD_TLS.
210  bool tryTLSXFormLoad(LoadSDNode *N);
211  // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
212  // an X-Form store instruction with the offset being a relocation coming from
213  // the PPCISD::ADD_TLS.
214  bool tryTLSXFormStore(StoreSDNode *N);
215  /// SelectCC - Select a comparison of the specified values with the
216  /// specified condition code, returning the CR# of the expression.
217  SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
218  const SDLoc &dl);
219 
220  /// SelectAddrImm - Returns true if the address N can be represented by
221  /// a base register plus a signed 16-bit displacement [r+imm].
222  bool SelectAddrImm(SDValue N, SDValue &Disp,
223  SDValue &Base) {
224  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
225  }
226 
227  /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
228  /// immediate field. Note that the operand at this point is already the
229  /// result of a prior SelectAddressRegImm call.
230  bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
231  if (N.getOpcode() == ISD::TargetConstant ||
233  Out = N;
234  return true;
235  }
236 
237  return false;
238  }
239 
240  /// SelectAddrIdx - Given the specified addressed, check to see if it can be
241  /// represented as an indexed [r+r] operation. Returns false if it can
242  /// be represented by [r+imm], which are preferred.
243  bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
244  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
245  }
246 
247  /// SelectAddrIdxOnly - Given the specified addressed, force it to be
248  /// represented as an indexed [r+r] operation.
249  bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
250  return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
251  }
252 
253  /// SelectAddrImmX4 - Returns true if the address N can be represented by
254  /// a base register plus a signed 16-bit displacement that is a multiple of 4.
255  /// Suitable for use by STD and friends.
256  bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
257  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
258  }
259 
260  bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
261  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
262  }
263 
264  // Select an address into a single register.
265  bool SelectAddr(SDValue N, SDValue &Base) {
266  Base = N;
267  return true;
268  }
269 
270  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
271  /// inline asm expressions. It is always correct to compute the value into
272  /// a register. The case of adding a (possibly relocatable) constant to a
273  /// register can be improved, but it is wrong to substitute Reg+Reg for
274  /// Reg in an asm, because the load or store opcode would have to change.
275  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
276  unsigned ConstraintID,
277  std::vector<SDValue> &OutOps) override {
278  switch(ConstraintID) {
279  default:
280  errs() << "ConstraintID: " << ConstraintID << "\n";
281  llvm_unreachable("Unexpected asm memory constraint");
289  // We need to make sure that this one operand does not end up in r0
290  // (because we might end up lowering this as 0(%op)).
291  const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
292  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
293  SDLoc dl(Op);
294  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
295  SDValue NewOp =
296  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
297  dl, Op.getValueType(),
298  Op, RC), 0);
299 
300  OutOps.push_back(NewOp);
301  return false;
302  }
303  return true;
304  }
305 
306  void InsertVRSaveCode(MachineFunction &MF);
307 
308  StringRef getPassName() const override {
309  return "PowerPC DAG->DAG Pattern Instruction Selection";
310  }
311 
312 // Include the pieces autogenerated from the target description.
313 #include "PPCGenDAGISel.inc"
314 
315 private:
316  bool trySETCC(SDNode *N);
317 
318  void PeepholePPC64();
319  void PeepholePPC64ZExt();
320  void PeepholeCROps();
321 
322  SDValue combineToCMPB(SDNode *N);
323  void foldBoolExts(SDValue &Res, SDNode *&N);
324 
325  bool AllUsersSelectZero(SDNode *N);
326  void SwapAllSelectUsers(SDNode *N);
327 
328  bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
329  void transferMemOperands(SDNode *N, SDNode *Result);
330  MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr);
331  };
332 
333 } // end anonymous namespace
334 
335 /// InsertVRSaveCode - Once the entire function has been instruction selected,
336 /// all virtual registers are created and all machine instructions are built,
337 /// check to see if we need to save/restore VRSAVE. If so, do it.
338 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
339  // Check to see if this function uses vector registers, which means we have to
340  // save and restore the VRSAVE register and update it with the regs we use.
341  //
342  // In this case, there will be virtual registers of vector type created
343  // by the scheduler. Detect them now.
344  bool HasVectorVReg = false;
345  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
347  if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
348  HasVectorVReg = true;
349  break;
350  }
351  }
352  if (!HasVectorVReg) return; // nothing to do.
353 
354  // If we have a vector register, we want to emit code into the entry and exit
355  // blocks to save and restore the VRSAVE register. We do this here (instead
356  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
357  //
358  // 1. This (trivially) reduces the load on the register allocator, by not
359  // having to represent the live range of the VRSAVE register.
360  // 2. This (more significantly) allows us to create a temporary virtual
361  // register to hold the saved VRSAVE value, allowing this temporary to be
362  // register allocated, instead of forcing it to be spilled to the stack.
363 
364  // Create two vregs - one to hold the VRSAVE register that is live-in to the
365  // function and one for the value after having bits or'd into it.
366  unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
367  unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
368 
369  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
370  MachineBasicBlock &EntryBB = *Fn.begin();
371  DebugLoc dl;
372  // Emit the following code into the entry block:
373  // InVRSAVE = MFVRSAVE
374  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
375  // MTVRSAVE UpdatedVRSAVE
376  MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
377  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
378  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
379  UpdatedVRSAVE).addReg(InVRSAVE);
380  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
381 
382  // Find all return blocks, outputting a restore in each epilog.
383  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
384  if (BB->isReturnBlock()) {
385  IP = BB->end(); --IP;
386 
387  // Skip over all terminator instructions, which are part of the return
388  // sequence.
390  while (I2 != BB->begin() && (--I2)->isTerminator())
391  IP = I2;
392 
393  // Emit: MTVRSAVE InVRSave
394  BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
395  }
396  }
397 }
398 
399 /// getGlobalBaseReg - Output the instructions required to put the
400 /// base address to use for accessing globals into a register.
401 ///
402 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
403  if (!GlobalBaseReg) {
404  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
405  // Insert the set of GlobalBaseReg into the first MBB of the function
406  MachineBasicBlock &FirstMBB = MF->front();
407  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
408  const Module *M = MF->getFunction().getParent();
409  DebugLoc dl;
410 
411  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
412  if (PPCSubTarget->isTargetELF()) {
413  GlobalBaseReg = PPC::R30;
414  if (M->getPICLevel() == PICLevel::SmallPIC) {
415  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
416  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
417  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
418  } else {
419  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
420  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
421  unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
422  BuildMI(FirstMBB, MBBI, dl,
423  TII.get(PPC::UpdateGBR), GlobalBaseReg)
424  .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
425  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
426  }
427  } else {
428  GlobalBaseReg =
429  RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
430  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
431  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
432  }
433  } else {
434  // We must ensure that this sequence is dominated by the prologue.
435  // FIXME: This is a bit of a big hammer since we don't get the benefits
436  // of shrink-wrapping whenever we emit this instruction. Considering
437  // this is used in any function where we emit a jump table, this may be
438  // a significant limitation. We should consider inserting this in the
439  // block where it is used and then commoning this sequence up if it
440  // appears in multiple places.
441  // Note: on ISA 3.0 cores, we can use lnia (addpcis) insteand of
442  // MovePCtoLR8.
443  MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
444  GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
445  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
446  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
447  }
448  }
449  return CurDAG->getRegister(GlobalBaseReg,
450  PPCLowering->getPointerTy(CurDAG->getDataLayout()))
451  .getNode();
452 }
453 
454 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
455 /// operand. If so Imm will receive the 32-bit value.
456 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
457  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
458  Imm = cast<ConstantSDNode>(N)->getZExtValue();
459  return true;
460  }
461  return false;
462 }
463 
464 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
465 /// operand. If so Imm will receive the 64-bit value.
466 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
467  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
468  Imm = cast<ConstantSDNode>(N)->getZExtValue();
469  return true;
470  }
471  return false;
472 }
473 
474 // isInt32Immediate - This method tests to see if a constant operand.
475 // If so Imm will receive the 32 bit value.
476 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
477  return isInt32Immediate(N.getNode(), Imm);
478 }
479 
480 /// isInt64Immediate - This method tests to see if the value is a 64-bit
481 /// constant operand. If so Imm will receive the 64-bit value.
482 static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
483  return isInt64Immediate(N.getNode(), Imm);
484 }
485 
486 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
487  const SDValue &DestMBB) {
488  assert(isa<BasicBlockSDNode>(DestMBB));
489 
490  if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
491 
492  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
493  const TerminatorInst *BBTerm = BB->getTerminator();
494 
495  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
496 
497  const BasicBlock *TBB = BBTerm->getSuccessor(0);
498  const BasicBlock *FBB = BBTerm->getSuccessor(1);
499 
500  auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
501  auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
502 
503  // We only want to handle cases which are easy to predict at static time, e.g.
504  // C++ throw statement, that is very likely not taken, or calling never
505  // returned function, e.g. stdlib exit(). So we set Threshold to filter
506  // unwanted cases.
507  //
508  // Below is LLVM branch weight table, we only want to handle case 1, 2
509  //
510  // Case Taken:Nontaken Example
511  // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
512  // 2. Invoke-terminating 1:1048575
513  // 3. Coldblock 4:64 __builtin_expect
514  // 4. Loop Branch 124:4 For loop
515  // 5. PH/ZH/FPH 20:12
516  const uint32_t Threshold = 10000;
517 
518  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
519  return PPC::BR_NO_HINT;
520 
521  DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
522  << BB->getName() << "'\n"
523  << " -> " << TBB->getName() << ": " << TProb << "\n"
524  << " -> " << FBB->getName() << ": " << FProb << "\n");
525 
526  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
527 
528  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
529  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
530  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
531  std::swap(TProb, FProb);
532 
533  return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
534 }
535 
536 // isOpcWithIntImmediate - This method tests to see if the node is a specific
537 // opcode and that it has a immediate integer right operand.
538 // If so Imm will receive the 32 bit value.
539 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
540  return N->getOpcode() == Opc
541  && isInt32Immediate(N->getOperand(1).getNode(), Imm);
542 }
543 
544 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
545  SDLoc dl(SN);
546  int FI = cast<FrameIndexSDNode>(N)->getIndex();
547  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
548  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
549  if (SN->hasOneUse())
550  CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
551  getSmallIPtrImm(Offset, dl));
552  else
553  ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
554  getSmallIPtrImm(Offset, dl)));
555 }
556 
557 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
558  bool isShiftMask, unsigned &SH,
559  unsigned &MB, unsigned &ME) {
560  // Don't even go down this path for i64, since different logic will be
561  // necessary for rldicl/rldicr/rldimi.
562  if (N->getValueType(0) != MVT::i32)
563  return false;
564 
565  unsigned Shift = 32;
566  unsigned Indeterminant = ~0; // bit mask marking indeterminant results
567  unsigned Opcode = N->getOpcode();
568  if (N->getNumOperands() != 2 ||
569  !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
570  return false;
571 
572  if (Opcode == ISD::SHL) {
573  // apply shift left to mask if it comes first
574  if (isShiftMask) Mask = Mask << Shift;
575  // determine which bits are made indeterminant by shift
576  Indeterminant = ~(0xFFFFFFFFu << Shift);
577  } else if (Opcode == ISD::SRL) {
578  // apply shift right to mask if it comes first
579  if (isShiftMask) Mask = Mask >> Shift;
580  // determine which bits are made indeterminant by shift
581  Indeterminant = ~(0xFFFFFFFFu >> Shift);
582  // adjust for the left rotate
583  Shift = 32 - Shift;
584  } else if (Opcode == ISD::ROTL) {
585  Indeterminant = 0;
586  } else {
587  return false;
588  }
589 
590  // if the mask doesn't intersect any Indeterminant bits
591  if (Mask && !(Mask & Indeterminant)) {
592  SH = Shift & 31;
593  // make sure the mask is still a mask (wrap arounds may not be)
594  return isRunOfOnes(Mask, MB, ME);
595  }
596  return false;
597 }
598 
599 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
600  SDValue Base = ST->getBasePtr();
601  if (Base.getOpcode() != PPCISD::ADD_TLS)
602  return false;
603  SDValue Offset = ST->getOffset();
604  if (!Offset.isUndef())
605  return false;
606 
607  SDLoc dl(ST);
608  EVT MemVT = ST->getMemoryVT();
609  EVT RegVT = ST->getValue().getValueType();
610 
611  unsigned Opcode;
612  switch (MemVT.getSimpleVT().SimpleTy) {
613  default:
614  return false;
615  case MVT::i8: {
616  Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
617  break;
618  }
619  case MVT::i16: {
620  Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
621  break;
622  }
623  case MVT::i32: {
624  Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
625  break;
626  }
627  case MVT::i64: {
628  Opcode = PPC::STDXTLS;
629  break;
630  }
631  }
632  SDValue Chain = ST->getChain();
633  SDVTList VTs = ST->getVTList();
634  SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
635  Chain};
636  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
637  transferMemOperands(ST, MN);
638  ReplaceNode(ST, MN);
639  return true;
640 }
641 
642 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
643  SDValue Base = LD->getBasePtr();
644  if (Base.getOpcode() != PPCISD::ADD_TLS)
645  return false;
646  SDValue Offset = LD->getOffset();
647  if (!Offset.isUndef())
648  return false;
649 
650  SDLoc dl(LD);
651  EVT MemVT = LD->getMemoryVT();
652  EVT RegVT = LD->getValueType(0);
653  unsigned Opcode;
654  switch (MemVT.getSimpleVT().SimpleTy) {
655  default:
656  return false;
657  case MVT::i8: {
658  Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
659  break;
660  }
661  case MVT::i16: {
662  Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
663  break;
664  }
665  case MVT::i32: {
666  Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
667  break;
668  }
669  case MVT::i64: {
670  Opcode = PPC::LDXTLS;
671  break;
672  }
673  }
674  SDValue Chain = LD->getChain();
675  SDVTList VTs = LD->getVTList();
676  SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
677  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
678  transferMemOperands(LD, MN);
679  ReplaceNode(LD, MN);
680  return true;
681 }
682 
683 /// Turn an or of two masked values into the rotate left word immediate then
684 /// mask insert (rlwimi) instruction.
685 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
686  SDValue Op0 = N->getOperand(0);
687  SDValue Op1 = N->getOperand(1);
688  SDLoc dl(N);
689 
690  KnownBits LKnown, RKnown;
691  CurDAG->computeKnownBits(Op0, LKnown);
692  CurDAG->computeKnownBits(Op1, RKnown);
693 
694  unsigned TargetMask = LKnown.Zero.getZExtValue();
695  unsigned InsertMask = RKnown.Zero.getZExtValue();
696 
697  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
698  unsigned Op0Opc = Op0.getOpcode();
699  unsigned Op1Opc = Op1.getOpcode();
700  unsigned Value, SH = 0;
701  TargetMask = ~TargetMask;
702  InsertMask = ~InsertMask;
703 
704  // If the LHS has a foldable shift and the RHS does not, then swap it to the
705  // RHS so that we can fold the shift into the insert.
706  if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
707  if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
708  Op0.getOperand(0).getOpcode() == ISD::SRL) {
709  if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
710  Op1.getOperand(0).getOpcode() != ISD::SRL) {
711  std::swap(Op0, Op1);
712  std::swap(Op0Opc, Op1Opc);
713  std::swap(TargetMask, InsertMask);
714  }
715  }
716  } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
717  if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
718  Op1.getOperand(0).getOpcode() != ISD::SRL) {
719  std::swap(Op0, Op1);
720  std::swap(Op0Opc, Op1Opc);
721  std::swap(TargetMask, InsertMask);
722  }
723  }
724 
725  unsigned MB, ME;
726  if (isRunOfOnes(InsertMask, MB, ME)) {
727  if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
728  isInt32Immediate(Op1.getOperand(1), Value)) {
729  Op1 = Op1.getOperand(0);
730  SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
731  }
732  if (Op1Opc == ISD::AND) {
733  // The AND mask might not be a constant, and we need to make sure that
734  // if we're going to fold the masking with the insert, all bits not
735  // know to be zero in the mask are known to be one.
736  KnownBits MKnown;
737  CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
738  bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
739 
740  unsigned SHOpc = Op1.getOperand(0).getOpcode();
741  if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
742  isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
743  // Note that Value must be in range here (less than 32) because
744  // otherwise there would not be any bits set in InsertMask.
745  Op1 = Op1.getOperand(0).getOperand(0);
746  SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
747  }
748  }
749 
750  SH &= 31;
751  SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
752  getI32Imm(ME, dl) };
753  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
754  return true;
755  }
756  }
757  return false;
758 }
759 
760 // Predict the number of instructions that would be generated by calling
761 // selectI64Imm(N).
762 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
763  // Assume no remaining bits.
764  unsigned Remainder = 0;
765  // Assume no shift required.
766  unsigned Shift = 0;
767 
768  // If it can't be represented as a 32 bit value.
769  if (!isInt<32>(Imm)) {
770  Shift = countTrailingZeros<uint64_t>(Imm);
771  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
772 
773  // If the shifted value fits 32 bits.
774  if (isInt<32>(ImmSh)) {
775  // Go with the shifted value.
776  Imm = ImmSh;
777  } else {
778  // Still stuck with a 64 bit value.
779  Remainder = Imm;
780  Shift = 32;
781  Imm >>= 32;
782  }
783  }
784 
785  // Intermediate operand.
786  unsigned Result = 0;
787 
788  // Handle first 32 bits.
789  unsigned Lo = Imm & 0xFFFF;
790 
791  // Simple value.
792  if (isInt<16>(Imm)) {
793  // Just the Lo bits.
794  ++Result;
795  } else if (Lo) {
796  // Handle the Hi bits and Lo bits.
797  Result += 2;
798  } else {
799  // Just the Hi bits.
800  ++Result;
801  }
802 
803  // If no shift, we're done.
804  if (!Shift) return Result;
805 
806  // If Hi word == Lo word,
807  // we can use rldimi to insert the Lo word into Hi word.
808  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
809  ++Result;
810  return Result;
811  }
812 
813  // Shift for next step if the upper 32-bits were not zero.
814  if (Imm)
815  ++Result;
816 
817  // Add in the last bits as required.
818  if ((Remainder >> 16) & 0xFFFF)
819  ++Result;
820  if (Remainder & 0xFFFF)
821  ++Result;
822 
823  return Result;
824 }
825 
826 static uint64_t Rot64(uint64_t Imm, unsigned R) {
827  return (Imm << R) | (Imm >> (64 - R));
828 }
829 
830 static unsigned selectI64ImmInstrCount(int64_t Imm) {
831  unsigned Count = selectI64ImmInstrCountDirect(Imm);
832 
833  // If the instruction count is 1 or 2, we do not need further analysis
834  // since rotate + load constant requires at least 2 instructions.
835  if (Count <= 2)
836  return Count;
837 
838  for (unsigned r = 1; r < 63; ++r) {
839  uint64_t RImm = Rot64(Imm, r);
840  unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
841  Count = std::min(Count, RCount);
842 
843  // See comments in selectI64Imm for an explanation of the logic below.
844  unsigned LS = findLastSet(RImm);
845  if (LS != r-1)
846  continue;
847 
848  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
849  uint64_t RImmWithOnes = RImm | OnesMask;
850 
851  RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
852  Count = std::min(Count, RCount);
853  }
854 
855  return Count;
856 }
857 
858 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
859 // (above) needs to be kept in sync with this function.
860 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
861  int64_t Imm) {
862  // Assume no remaining bits.
863  unsigned Remainder = 0;
864  // Assume no shift required.
865  unsigned Shift = 0;
866 
867  // If it can't be represented as a 32 bit value.
868  if (!isInt<32>(Imm)) {
869  Shift = countTrailingZeros<uint64_t>(Imm);
870  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
871 
872  // If the shifted value fits 32 bits.
873  if (isInt<32>(ImmSh)) {
874  // Go with the shifted value.
875  Imm = ImmSh;
876  } else {
877  // Still stuck with a 64 bit value.
878  Remainder = Imm;
879  Shift = 32;
880  Imm >>= 32;
881  }
882  }
883 
884  // Intermediate operand.
885  SDNode *Result;
886 
887  // Handle first 32 bits.
888  unsigned Lo = Imm & 0xFFFF;
889  unsigned Hi = (Imm >> 16) & 0xFFFF;
890 
891  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
892  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
893  };
894 
895  // Simple value.
896  if (isInt<16>(Imm)) {
897  uint64_t SextImm = SignExtend64(Lo, 16);
898  SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
899  // Just the Lo bits.
900  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
901  } else if (Lo) {
902  // Handle the Hi bits.
903  unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
904  Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
905  // And Lo bits.
906  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
907  SDValue(Result, 0), getI32Imm(Lo));
908  } else {
909  // Just the Hi bits.
910  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
911  }
912 
913  // If no shift, we're done.
914  if (!Shift) return Result;
915 
916  // If Hi word == Lo word,
917  // we can use rldimi to insert the Lo word into Hi word.
918  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
919  SDValue Ops[] =
920  { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
921  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
922  }
923 
924  // Shift for next step if the upper 32-bits were not zero.
925  if (Imm) {
926  Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
927  SDValue(Result, 0),
928  getI32Imm(Shift),
929  getI32Imm(63 - Shift));
930  }
931 
932  // Add in the last bits as required.
933  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
934  Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
935  SDValue(Result, 0), getI32Imm(Hi));
936  }
937  if ((Lo = Remainder & 0xFFFF)) {
938  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
939  SDValue(Result, 0), getI32Imm(Lo));
940  }
941 
942  return Result;
943 }
944 
945 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
946  int64_t Imm) {
947  unsigned Count = selectI64ImmInstrCountDirect(Imm);
948 
949  // If the instruction count is 1 or 2, we do not need further analysis
950  // since rotate + load constant requires at least 2 instructions.
951  if (Count <= 2)
952  return selectI64ImmDirect(CurDAG, dl, Imm);
953 
954  unsigned RMin = 0;
955 
956  int64_t MatImm;
957  unsigned MaskEnd;
958 
959  for (unsigned r = 1; r < 63; ++r) {
960  uint64_t RImm = Rot64(Imm, r);
961  unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
962  if (RCount < Count) {
963  Count = RCount;
964  RMin = r;
965  MatImm = RImm;
966  MaskEnd = 63;
967  }
968 
969  // If the immediate to generate has many trailing zeros, it might be
970  // worthwhile to generate a rotated value with too many leading ones
971  // (because that's free with li/lis's sign-extension semantics), and then
972  // mask them off after rotation.
973 
974  unsigned LS = findLastSet(RImm);
975  // We're adding (63-LS) higher-order ones, and we expect to mask them off
976  // after performing the inverse rotation by (64-r). So we need that:
977  // 63-LS == 64-r => LS == r-1
978  if (LS != r-1)
979  continue;
980 
981  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
982  uint64_t RImmWithOnes = RImm | OnesMask;
983 
984  RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
985  if (RCount < Count) {
986  Count = RCount;
987  RMin = r;
988  MatImm = RImmWithOnes;
989  MaskEnd = LS;
990  }
991  }
992 
993  if (!RMin)
994  return selectI64ImmDirect(CurDAG, dl, Imm);
995 
996  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
997  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
998  };
999 
1000  SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
1001  return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
1002  getI32Imm(64 - RMin), getI32Imm(MaskEnd));
1003 }
1004 
1005 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
1006  unsigned MaxTruncation = 0;
1007  // Cannot use range-based for loop here as we need the actual use (i.e. we
1008  // need the operand number corresponding to the use). A range-based for
1009  // will unbox the use and provide an SDNode*.
1010  for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
1011  Use != UseEnd; ++Use) {
1012  unsigned Opc =
1013  Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
1014  switch (Opc) {
1015  default: return 0;
1016  case ISD::TRUNCATE:
1017  if (Use->isMachineOpcode())
1018  return 0;
1019  MaxTruncation =
1020  std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
1021  continue;
1022  case ISD::STORE: {
1023  if (Use->isMachineOpcode())
1024  return 0;
1025  StoreSDNode *STN = cast<StoreSDNode>(*Use);
1026  unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
1027  if (MemVTSize == 64 || Use.getOperandNo() != 0)
1028  return 0;
1029  MaxTruncation = std::max(MaxTruncation, MemVTSize);
1030  continue;
1031  }
1032  case PPC::STW8:
1033  case PPC::STWX8:
1034  case PPC::STWU8:
1035  case PPC::STWUX8:
1036  if (Use.getOperandNo() != 0)
1037  return 0;
1038  MaxTruncation = std::max(MaxTruncation, 32u);
1039  continue;
1040  case PPC::STH8:
1041  case PPC::STHX8:
1042  case PPC::STHU8:
1043  case PPC::STHUX8:
1044  if (Use.getOperandNo() != 0)
1045  return 0;
1046  MaxTruncation = std::max(MaxTruncation, 16u);
1047  continue;
1048  case PPC::STB8:
1049  case PPC::STBX8:
1050  case PPC::STBU8:
1051  case PPC::STBUX8:
1052  if (Use.getOperandNo() != 0)
1053  return 0;
1054  MaxTruncation = std::max(MaxTruncation, 8u);
1055  continue;
1056  }
1057  }
1058  return MaxTruncation;
1059 }
1060 
1061 // Select a 64-bit constant.
1062 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1063  SDLoc dl(N);
1064 
1065  // Get 64 bit value.
1066  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1067  if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1068  uint64_t SextImm = SignExtend64(Imm, MinSize);
1069  SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1070  if (isInt<16>(SextImm))
1071  return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1072  }
1073  return selectI64Imm(CurDAG, dl, Imm);
1074 }
1075 
1076 namespace {
1077 
1078 class BitPermutationSelector {
1079  struct ValueBit {
1080  SDValue V;
1081 
1082  // The bit number in the value, using a convention where bit 0 is the
1083  // lowest-order bit.
1084  unsigned Idx;
1085 
1086  enum Kind {
1087  ConstZero,
1088  Variable
1089  } K;
1090 
1091  ValueBit(SDValue V, unsigned I, Kind K = Variable)
1092  : V(V), Idx(I), K(K) {}
1093  ValueBit(Kind K = Variable)
1094  : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
1095 
1096  bool isZero() const {
1097  return K == ConstZero;
1098  }
1099 
1100  bool hasValue() const {
1101  return K == Variable;
1102  }
1103 
1104  SDValue getValue() const {
1105  assert(hasValue() && "Cannot get the value of a constant bit");
1106  return V;
1107  }
1108 
1109  unsigned getValueBitIndex() const {
1110  assert(hasValue() && "Cannot get the value bit index of a constant bit");
1111  return Idx;
1112  }
1113  };
1114 
1115  // A bit group has the same underlying value and the same rotate factor.
1116  struct BitGroup {
1117  SDValue V;
1118  unsigned RLAmt;
1119  unsigned StartIdx, EndIdx;
1120 
1121  // This rotation amount assumes that the lower 32 bits of the quantity are
1122  // replicated in the high 32 bits by the rotation operator (which is done
1123  // by rlwinm and friends in 64-bit mode).
1124  bool Repl32;
1125  // Did converting to Repl32 == true change the rotation factor? If it did,
1126  // it decreased it by 32.
1127  bool Repl32CR;
1128  // Was this group coalesced after setting Repl32 to true?
1129  bool Repl32Coalesced;
1130 
1131  BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1132  : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1133  Repl32Coalesced(false) {
1134  DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
1135  " [" << S << ", " << E << "]\n");
1136  }
1137  };
1138 
1139  // Information on each (Value, RLAmt) pair (like the number of groups
1140  // associated with each) used to choose the lowering method.
1141  struct ValueRotInfo {
1142  SDValue V;
1143  unsigned RLAmt = std::numeric_limits<unsigned>::max();
1144  unsigned NumGroups = 0;
1145  unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1146  bool Repl32 = false;
1147 
1148  ValueRotInfo() = default;
1149 
1150  // For sorting (in reverse order) by NumGroups, and then by
1151  // FirstGroupStartIdx.
1152  bool operator < (const ValueRotInfo &Other) const {
1153  // We need to sort so that the non-Repl32 come first because, when we're
1154  // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1155  // masking operation.
1156  if (Repl32 < Other.Repl32)
1157  return true;
1158  else if (Repl32 > Other.Repl32)
1159  return false;
1160  else if (NumGroups > Other.NumGroups)
1161  return true;
1162  else if (NumGroups < Other.NumGroups)
1163  return false;
1164  else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1165  return true;
1166  return false;
1167  }
1168  };
1169 
1170  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1171  using ValueBitsMemoizer =
1173  ValueBitsMemoizer Memoizer;
1174 
1175  // Return a pair of bool and a SmallVector pointer to a memoization entry.
1176  // The bool is true if something interesting was deduced, otherwise if we're
1177  // providing only a generic representation of V (or something else likewise
1178  // uninteresting for instruction selection) through the SmallVector.
1179  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1180  unsigned NumBits) {
1181  auto &ValueEntry = Memoizer[V];
1182  if (ValueEntry)
1183  return std::make_pair(ValueEntry->first, &ValueEntry->second);
1184  ValueEntry.reset(new ValueBitsMemoizedValue());
1185  bool &Interesting = ValueEntry->first;
1186  SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1187  Bits.resize(NumBits);
1188 
1189  switch (V.getOpcode()) {
1190  default: break;
1191  case ISD::ROTL:
1192  if (isa<ConstantSDNode>(V.getOperand(1))) {
1193  unsigned RotAmt = V.getConstantOperandVal(1);
1194 
1195  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1196 
1197  for (unsigned i = 0; i < NumBits; ++i)
1198  Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1199 
1200  return std::make_pair(Interesting = true, &Bits);
1201  }
1202  break;
1203  case ISD::SHL:
1204  if (isa<ConstantSDNode>(V.getOperand(1))) {
1205  unsigned ShiftAmt = V.getConstantOperandVal(1);
1206 
1207  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1208 
1209  for (unsigned i = ShiftAmt; i < NumBits; ++i)
1210  Bits[i] = LHSBits[i - ShiftAmt];
1211 
1212  for (unsigned i = 0; i < ShiftAmt; ++i)
1213  Bits[i] = ValueBit(ValueBit::ConstZero);
1214 
1215  return std::make_pair(Interesting = true, &Bits);
1216  }
1217  break;
1218  case ISD::SRL:
1219  if (isa<ConstantSDNode>(V.getOperand(1))) {
1220  unsigned ShiftAmt = V.getConstantOperandVal(1);
1221 
1222  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1223 
1224  for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1225  Bits[i] = LHSBits[i + ShiftAmt];
1226 
1227  for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1228  Bits[i] = ValueBit(ValueBit::ConstZero);
1229 
1230  return std::make_pair(Interesting = true, &Bits);
1231  }
1232  break;
1233  case ISD::AND:
1234  if (isa<ConstantSDNode>(V.getOperand(1))) {
1235  uint64_t Mask = V.getConstantOperandVal(1);
1236 
1237  const SmallVector<ValueBit, 64> *LHSBits;
1238  // Mark this as interesting, only if the LHS was also interesting. This
1239  // prevents the overall procedure from matching a single immediate 'and'
1240  // (which is non-optimal because such an and might be folded with other
1241  // things if we don't select it here).
1242  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1243 
1244  for (unsigned i = 0; i < NumBits; ++i)
1245  if (((Mask >> i) & 1) == 1)
1246  Bits[i] = (*LHSBits)[i];
1247  else
1248  Bits[i] = ValueBit(ValueBit::ConstZero);
1249 
1250  return std::make_pair(Interesting, &Bits);
1251  }
1252  break;
1253  case ISD::OR: {
1254  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1255  const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1256 
1257  bool AllDisjoint = true;
1258  for (unsigned i = 0; i < NumBits; ++i)
1259  if (LHSBits[i].isZero())
1260  Bits[i] = RHSBits[i];
1261  else if (RHSBits[i].isZero())
1262  Bits[i] = LHSBits[i];
1263  else {
1264  AllDisjoint = false;
1265  break;
1266  }
1267 
1268  if (!AllDisjoint)
1269  break;
1270 
1271  return std::make_pair(Interesting = true, &Bits);
1272  }
1273  case ISD::ZERO_EXTEND: {
1274  // We support only the case with zero extension from i32 to i64 so far.
1275  if (V.getValueType() != MVT::i64 ||
1276  V.getOperand(0).getValueType() != MVT::i32)
1277  break;
1278 
1279  const SmallVector<ValueBit, 64> *LHSBits;
1280  const unsigned NumOperandBits = 32;
1281  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1282  NumOperandBits);
1283 
1284  for (unsigned i = 0; i < NumOperandBits; ++i)
1285  Bits[i] = (*LHSBits)[i];
1286 
1287  for (unsigned i = NumOperandBits; i < NumBits; ++i)
1288  Bits[i] = ValueBit(ValueBit::ConstZero);
1289 
1290  return std::make_pair(Interesting, &Bits);
1291  }
1292  }
1293 
1294  for (unsigned i = 0; i < NumBits; ++i)
1295  Bits[i] = ValueBit(V, i);
1296 
1297  return std::make_pair(Interesting = false, &Bits);
1298  }
1299 
1300  // For each value (except the constant ones), compute the left-rotate amount
1301  // to get it from its original to final position.
1302  void computeRotationAmounts() {
1303  HasZeros = false;
1304  RLAmt.resize(Bits.size());
1305  for (unsigned i = 0; i < Bits.size(); ++i)
1306  if (Bits[i].hasValue()) {
1307  unsigned VBI = Bits[i].getValueBitIndex();
1308  if (i >= VBI)
1309  RLAmt[i] = i - VBI;
1310  else
1311  RLAmt[i] = Bits.size() - (VBI - i);
1312  } else if (Bits[i].isZero()) {
1313  HasZeros = true;
1314  RLAmt[i] = UINT32_MAX;
1315  } else {
1316  llvm_unreachable("Unknown value bit type");
1317  }
1318  }
1319 
1320  // Collect groups of consecutive bits with the same underlying value and
1321  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1322  // they break up groups.
1323  void collectBitGroups(bool LateMask) {
1324  BitGroups.clear();
1325 
1326  unsigned LastRLAmt = RLAmt[0];
1327  SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1328  unsigned LastGroupStartIdx = 0;
1329  for (unsigned i = 1; i < Bits.size(); ++i) {
1330  unsigned ThisRLAmt = RLAmt[i];
1331  SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1332  if (LateMask && !ThisValue) {
1333  ThisValue = LastValue;
1334  ThisRLAmt = LastRLAmt;
1335  // If we're doing late masking, then the first bit group always starts
1336  // at zero (even if the first bits were zero).
1337  if (BitGroups.empty())
1338  LastGroupStartIdx = 0;
1339  }
1340 
1341  // If this bit has the same underlying value and the same rotate factor as
1342  // the last one, then they're part of the same group.
1343  if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1344  continue;
1345 
1346  if (LastValue.getNode())
1347  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1348  i-1));
1349  LastRLAmt = ThisRLAmt;
1350  LastValue = ThisValue;
1351  LastGroupStartIdx = i;
1352  }
1353  if (LastValue.getNode())
1354  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1355  Bits.size()-1));
1356 
1357  if (BitGroups.empty())
1358  return;
1359 
1360  // We might be able to combine the first and last groups.
1361  if (BitGroups.size() > 1) {
1362  // If the first and last groups are the same, then remove the first group
1363  // in favor of the last group, making the ending index of the last group
1364  // equal to the ending index of the to-be-removed first group.
1365  if (BitGroups[0].StartIdx == 0 &&
1366  BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1367  BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1368  BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1369  DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1370  BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1371  BitGroups.erase(BitGroups.begin());
1372  }
1373  }
1374  }
1375 
1376  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1377  // associated with each. If there is a degeneracy, pick the one that occurs
1378  // first (in the final value).
1379  void collectValueRotInfo() {
1380  ValueRots.clear();
1381 
1382  for (auto &BG : BitGroups) {
1383  unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1384  ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1385  VRI.V = BG.V;
1386  VRI.RLAmt = BG.RLAmt;
1387  VRI.Repl32 = BG.Repl32;
1388  VRI.NumGroups += 1;
1389  VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1390  }
1391 
1392  // Now that we've collected the various ValueRotInfo instances, we need to
1393  // sort them.
1394  ValueRotsVec.clear();
1395  for (auto &I : ValueRots) {
1396  ValueRotsVec.push_back(I.second);
1397  }
1398  llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end());
1399  }
1400 
1401  // In 64-bit mode, rlwinm and friends have a rotation operator that
1402  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1403  // indices of these instructions can only be in the lower 32 bits, so they
1404  // can only represent some 64-bit bit groups. However, when they can be used,
1405  // the 32-bit replication can be used to represent, as a single bit group,
1406  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1407  // groups when possible. Returns true if any of the bit groups were
1408  // converted.
1409  void assignRepl32BitGroups() {
1410  // If we have bits like this:
1411  //
1412  // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1413  // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1414  // Groups: | RLAmt = 8 | RLAmt = 40 |
1415  //
1416  // But, making use of a 32-bit operation that replicates the low-order 32
1417  // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1418  // of 8.
1419 
1420  auto IsAllLow32 = [this](BitGroup & BG) {
1421  if (BG.StartIdx <= BG.EndIdx) {
1422  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1423  if (!Bits[i].hasValue())
1424  continue;
1425  if (Bits[i].getValueBitIndex() >= 32)
1426  return false;
1427  }
1428  } else {
1429  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1430  if (!Bits[i].hasValue())
1431  continue;
1432  if (Bits[i].getValueBitIndex() >= 32)
1433  return false;
1434  }
1435  for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1436  if (!Bits[i].hasValue())
1437  continue;
1438  if (Bits[i].getValueBitIndex() >= 32)
1439  return false;
1440  }
1441  }
1442 
1443  return true;
1444  };
1445 
1446  for (auto &BG : BitGroups) {
1447  if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1448  if (IsAllLow32(BG)) {
1449  if (BG.RLAmt >= 32) {
1450  BG.RLAmt -= 32;
1451  BG.Repl32CR = true;
1452  }
1453 
1454  BG.Repl32 = true;
1455 
1456  DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
1457  BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
1458  " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1459  }
1460  }
1461  }
1462 
1463  // Now walk through the bit groups, consolidating where possible.
1464  for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1465  // We might want to remove this bit group by merging it with the previous
1466  // group (which might be the ending group).
1467  auto IP = (I == BitGroups.begin()) ?
1468  std::prev(BitGroups.end()) : std::prev(I);
1469  if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1470  I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1471 
1472  DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
1473  I->V.getNode() << " RLAmt = " << I->RLAmt <<
1474  " [" << I->StartIdx << ", " << I->EndIdx <<
1475  "] with group with range [" <<
1476  IP->StartIdx << ", " << IP->EndIdx << "]\n");
1477 
1478  IP->EndIdx = I->EndIdx;
1479  IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1480  IP->Repl32Coalesced = true;
1481  I = BitGroups.erase(I);
1482  continue;
1483  } else {
1484  // There is a special case worth handling: If there is a single group
1485  // covering the entire upper 32 bits, and it can be merged with both
1486  // the next and previous groups (which might be the same group), then
1487  // do so. If it is the same group (so there will be only one group in
1488  // total), then we need to reverse the order of the range so that it
1489  // covers the entire 64 bits.
1490  if (I->StartIdx == 32 && I->EndIdx == 63) {
1491  assert(std::next(I) == BitGroups.end() &&
1492  "bit group ends at index 63 but there is another?");
1493  auto IN = BitGroups.begin();
1494 
1495  if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1496  (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1497  IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1498  IsAllLow32(*I)) {
1499 
1500  DEBUG(dbgs() << "\tcombining bit group for " <<
1501  I->V.getNode() << " RLAmt = " << I->RLAmt <<
1502  " [" << I->StartIdx << ", " << I->EndIdx <<
1503  "] with 32-bit replicated groups with ranges [" <<
1504  IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
1505  IN->StartIdx << ", " << IN->EndIdx << "]\n");
1506 
1507  if (IP == IN) {
1508  // There is only one other group; change it to cover the whole
1509  // range (backward, so that it can still be Repl32 but cover the
1510  // whole 64-bit range).
1511  IP->StartIdx = 31;
1512  IP->EndIdx = 30;
1513  IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1514  IP->Repl32Coalesced = true;
1515  I = BitGroups.erase(I);
1516  } else {
1517  // There are two separate groups, one before this group and one
1518  // after us (at the beginning). We're going to remove this group,
1519  // but also the group at the very beginning.
1520  IP->EndIdx = IN->EndIdx;
1521  IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1522  IP->Repl32Coalesced = true;
1523  I = BitGroups.erase(I);
1524  BitGroups.erase(BitGroups.begin());
1525  }
1526 
1527  // This must be the last group in the vector (and we might have
1528  // just invalidated the iterator above), so break here.
1529  break;
1530  }
1531  }
1532  }
1533 
1534  ++I;
1535  }
1536  }
1537 
1538  SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1539  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1540  }
1541 
1542  uint64_t getZerosMask() {
1543  uint64_t Mask = 0;
1544  for (unsigned i = 0; i < Bits.size(); ++i) {
1545  if (Bits[i].hasValue())
1546  continue;
1547  Mask |= (UINT64_C(1) << i);
1548  }
1549 
1550  return ~Mask;
1551  }
1552 
1553  // This method extends an input value to 64 bit if input is 32-bit integer.
1554  // While selecting instructions in BitPermutationSelector in 64-bit mode,
1555  // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1556  // In such case, we extend it to 64 bit to be consistent with other values.
1557  SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1558  if (V.getValueSizeInBits() == 64)
1559  return V;
1560 
1561  assert(V.getValueSizeInBits() == 32);
1562  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1563  SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1564  MVT::i64), 0);
1565  SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1566  MVT::i64, ImDef, V,
1567  SubRegIdx), 0);
1568  return ExtVal;
1569  }
1570 
1571  // Depending on the number of groups for a particular value, it might be
1572  // better to rotate, mask explicitly (using andi/andis), and then or the
1573  // result. Select this part of the result first.
1574  void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1576  return;
1577 
1578  for (ValueRotInfo &VRI : ValueRotsVec) {
1579  unsigned Mask = 0;
1580  for (unsigned i = 0; i < Bits.size(); ++i) {
1581  if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1582  continue;
1583  if (RLAmt[i] != VRI.RLAmt)
1584  continue;
1585  Mask |= (1u << i);
1586  }
1587 
1588  // Compute the masks for andi/andis that would be necessary.
1589  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1590  assert((ANDIMask != 0 || ANDISMask != 0) &&
1591  "No set bits in mask for value bit groups");
1592  bool NeedsRotate = VRI.RLAmt != 0;
1593 
1594  // We're trying to minimize the number of instructions. If we have one
1595  // group, using one of andi/andis can break even. If we have three
1596  // groups, we can use both andi and andis and break even (to use both
1597  // andi and andis we also need to or the results together). We need four
1598  // groups if we also need to rotate. To use andi/andis we need to do more
1599  // than break even because rotate-and-mask instructions tend to be easier
1600  // to schedule.
1601 
1602  // FIXME: We've biased here against using andi/andis, which is right for
1603  // POWER cores, but not optimal everywhere. For example, on the A2,
1604  // andi/andis have single-cycle latency whereas the rotate-and-mask
1605  // instructions take two cycles, and it would be better to bias toward
1606  // andi/andis in break-even cases.
1607 
1608  unsigned NumAndInsts = (unsigned) NeedsRotate +
1609  (unsigned) (ANDIMask != 0) +
1610  (unsigned) (ANDISMask != 0) +
1611  (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1612  (unsigned) (bool) Res;
1613 
1614  DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1615  " RL: " << VRI.RLAmt << ":" <<
1616  "\n\t\t\tisel using masking: " << NumAndInsts <<
1617  " using rotates: " << VRI.NumGroups << "\n");
1618 
1619  if (NumAndInsts >= VRI.NumGroups)
1620  continue;
1621 
1622  DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1623 
1624  if (InstCnt) *InstCnt += NumAndInsts;
1625 
1626  SDValue VRot;
1627  if (VRI.RLAmt) {
1628  SDValue Ops[] =
1629  { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1630  getI32Imm(31, dl) };
1631  VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1632  Ops), 0);
1633  } else {
1634  VRot = VRI.V;
1635  }
1636 
1637  SDValue ANDIVal, ANDISVal;
1638  if (ANDIMask != 0)
1639  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1640  VRot, getI32Imm(ANDIMask, dl)), 0);
1641  if (ANDISMask != 0)
1642  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1643  VRot, getI32Imm(ANDISMask, dl)), 0);
1644 
1645  SDValue TotalVal;
1646  if (!ANDIVal)
1647  TotalVal = ANDISVal;
1648  else if (!ANDISVal)
1649  TotalVal = ANDIVal;
1650  else
1651  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1652  ANDIVal, ANDISVal), 0);
1653 
1654  if (!Res)
1655  Res = TotalVal;
1656  else
1657  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1658  Res, TotalVal), 0);
1659 
1660  // Now, remove all groups with this underlying value and rotation
1661  // factor.
1662  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1663  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1664  });
1665  }
1666  }
1667 
1668  // Instruction selection for the 32-bit case.
1669  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1670  SDLoc dl(N);
1671  SDValue Res;
1672 
1673  if (InstCnt) *InstCnt = 0;
1674 
1675  // Take care of cases that should use andi/andis first.
1676  SelectAndParts32(dl, Res, InstCnt);
1677 
1678  // If we've not yet selected a 'starting' instruction, and we have no zeros
1679  // to fill in, select the (Value, RLAmt) with the highest priority (largest
1680  // number of groups), and start with this rotated value.
1681  if ((!HasZeros || LateMask) && !Res) {
1682  ValueRotInfo &VRI = ValueRotsVec[0];
1683  if (VRI.RLAmt) {
1684  if (InstCnt) *InstCnt += 1;
1685  SDValue Ops[] =
1686  { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1687  getI32Imm(31, dl) };
1688  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1689  0);
1690  } else {
1691  Res = VRI.V;
1692  }
1693 
1694  // Now, remove all groups with this underlying value and rotation factor.
1695  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1696  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1697  });
1698  }
1699 
1700  if (InstCnt) *InstCnt += BitGroups.size();
1701 
1702  // Insert the other groups (one at a time).
1703  for (auto &BG : BitGroups) {
1704  if (!Res) {
1705  SDValue Ops[] =
1706  { BG.V, getI32Imm(BG.RLAmt, dl),
1707  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1708  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1709  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1710  } else {
1711  SDValue Ops[] =
1712  { Res, BG.V, getI32Imm(BG.RLAmt, dl),
1713  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1714  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1715  Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1716  }
1717  }
1718 
1719  if (LateMask) {
1720  unsigned Mask = (unsigned) getZerosMask();
1721 
1722  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1723  assert((ANDIMask != 0 || ANDISMask != 0) &&
1724  "No set bits in zeros mask?");
1725 
1726  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1727  (unsigned) (ANDISMask != 0) +
1728  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1729 
1730  SDValue ANDIVal, ANDISVal;
1731  if (ANDIMask != 0)
1732  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1733  Res, getI32Imm(ANDIMask, dl)), 0);
1734  if (ANDISMask != 0)
1735  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1736  Res, getI32Imm(ANDISMask, dl)), 0);
1737 
1738  if (!ANDIVal)
1739  Res = ANDISVal;
1740  else if (!ANDISVal)
1741  Res = ANDIVal;
1742  else
1743  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1744  ANDIVal, ANDISVal), 0);
1745  }
1746 
1747  return Res.getNode();
1748  }
1749 
1750  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1751  unsigned MaskStart, unsigned MaskEnd,
1752  bool IsIns) {
1753  // In the notation used by the instructions, 'start' and 'end' are reversed
1754  // because bits are counted from high to low order.
1755  unsigned InstMaskStart = 64 - MaskEnd - 1,
1756  InstMaskEnd = 64 - MaskStart - 1;
1757 
1758  if (Repl32)
1759  return 1;
1760 
1761  if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1762  InstMaskEnd == 63 - RLAmt)
1763  return 1;
1764 
1765  return 2;
1766  }
1767 
1768  // For 64-bit values, not all combinations of rotates and masks are
1769  // available. Produce one if it is available.
1770  SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
1771  bool Repl32, unsigned MaskStart, unsigned MaskEnd,
1772  unsigned *InstCnt = nullptr) {
1773  // In the notation used by the instructions, 'start' and 'end' are reversed
1774  // because bits are counted from high to low order.
1775  unsigned InstMaskStart = 64 - MaskEnd - 1,
1776  InstMaskEnd = 64 - MaskStart - 1;
1777 
1778  if (InstCnt) *InstCnt += 1;
1779 
1780  if (Repl32) {
1781  // This rotation amount assumes that the lower 32 bits of the quantity
1782  // are replicated in the high 32 bits by the rotation operator (which is
1783  // done by rlwinm and friends).
1784  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1785  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1786  SDValue Ops[] =
1787  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1788  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1789  return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1790  Ops), 0);
1791  }
1792 
1793  if (InstMaskEnd == 63) {
1794  SDValue Ops[] =
1795  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1796  getI32Imm(InstMaskStart, dl) };
1797  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1798  }
1799 
1800  if (InstMaskStart == 0) {
1801  SDValue Ops[] =
1802  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1803  getI32Imm(InstMaskEnd, dl) };
1804  return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1805  }
1806 
1807  if (InstMaskEnd == 63 - RLAmt) {
1808  SDValue Ops[] =
1809  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1810  getI32Imm(InstMaskStart, dl) };
1811  return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1812  }
1813 
1814  // We cannot do this with a single instruction, so we'll use two. The
1815  // problem is that we're not free to choose both a rotation amount and mask
1816  // start and end independently. We can choose an arbitrary mask start and
1817  // end, but then the rotation amount is fixed. Rotation, however, can be
1818  // inverted, and so by applying an "inverse" rotation first, we can get the
1819  // desired result.
1820  if (InstCnt) *InstCnt += 1;
1821 
1822  // The rotation mask for the second instruction must be MaskStart.
1823  unsigned RLAmt2 = MaskStart;
1824  // The first instruction must rotate V so that the overall rotation amount
1825  // is RLAmt.
1826  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1827  if (RLAmt1)
1828  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1829  return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
1830  }
1831 
1832  // For 64-bit values, not all combinations of rotates and masks are
1833  // available. Produce a rotate-mask-and-insert if one is available.
1834  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
1835  unsigned RLAmt, bool Repl32, unsigned MaskStart,
1836  unsigned MaskEnd, unsigned *InstCnt = nullptr) {
1837  // In the notation used by the instructions, 'start' and 'end' are reversed
1838  // because bits are counted from high to low order.
1839  unsigned InstMaskStart = 64 - MaskEnd - 1,
1840  InstMaskEnd = 64 - MaskStart - 1;
1841 
1842  if (InstCnt) *InstCnt += 1;
1843 
1844  if (Repl32) {
1845  // This rotation amount assumes that the lower 32 bits of the quantity
1846  // are replicated in the high 32 bits by the rotation operator (which is
1847  // done by rlwinm and friends).
1848  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1849  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1850  SDValue Ops[] =
1851  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1852  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1853  return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
1854  Ops), 0);
1855  }
1856 
1857  if (InstMaskEnd == 63 - RLAmt) {
1858  SDValue Ops[] =
1859  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1860  getI32Imm(InstMaskStart, dl) };
1861  return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
1862  }
1863 
1864  // We cannot do this with a single instruction, so we'll use two. The
1865  // problem is that we're not free to choose both a rotation amount and mask
1866  // start and end independently. We can choose an arbitrary mask start and
1867  // end, but then the rotation amount is fixed. Rotation, however, can be
1868  // inverted, and so by applying an "inverse" rotation first, we can get the
1869  // desired result.
1870  if (InstCnt) *InstCnt += 1;
1871 
1872  // The rotation mask for the second instruction must be MaskStart.
1873  unsigned RLAmt2 = MaskStart;
1874  // The first instruction must rotate V so that the overall rotation amount
1875  // is RLAmt.
1876  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1877  if (RLAmt1)
1878  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1879  return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
1880  }
1881 
1882  void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1884  return;
1885 
1886  // The idea here is the same as in the 32-bit version, but with additional
1887  // complications from the fact that Repl32 might be true. Because we
1888  // aggressively convert bit groups to Repl32 form (which, for small
1889  // rotation factors, involves no other change), and then coalesce, it might
1890  // be the case that a single 64-bit masking operation could handle both
1891  // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
1892  // form allowed coalescing, then we must use a 32-bit rotaton in order to
1893  // completely capture the new combined bit group.
1894 
1895  for (ValueRotInfo &VRI : ValueRotsVec) {
1896  uint64_t Mask = 0;
1897 
1898  // We need to add to the mask all bits from the associated bit groups.
1899  // If Repl32 is false, we need to add bits from bit groups that have
1900  // Repl32 true, but are trivially convertable to Repl32 false. Such a
1901  // group is trivially convertable if it overlaps only with the lower 32
1902  // bits, and the group has not been coalesced.
1903  auto MatchingBG = [VRI](const BitGroup &BG) {
1904  if (VRI.V != BG.V)
1905  return false;
1906 
1907  unsigned EffRLAmt = BG.RLAmt;
1908  if (!VRI.Repl32 && BG.Repl32) {
1909  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
1910  !BG.Repl32Coalesced) {
1911  if (BG.Repl32CR)
1912  EffRLAmt += 32;
1913  } else {
1914  return false;
1915  }
1916  } else if (VRI.Repl32 != BG.Repl32) {
1917  return false;
1918  }
1919 
1920  return VRI.RLAmt == EffRLAmt;
1921  };
1922 
1923  for (auto &BG : BitGroups) {
1924  if (!MatchingBG(BG))
1925  continue;
1926 
1927  if (BG.StartIdx <= BG.EndIdx) {
1928  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
1929  Mask |= (UINT64_C(1) << i);
1930  } else {
1931  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
1932  Mask |= (UINT64_C(1) << i);
1933  for (unsigned i = 0; i <= BG.EndIdx; ++i)
1934  Mask |= (UINT64_C(1) << i);
1935  }
1936  }
1937 
1938  // We can use the 32-bit andi/andis technique if the mask does not
1939  // require any higher-order bits. This can save an instruction compared
1940  // to always using the general 64-bit technique.
1941  bool Use32BitInsts = isUInt<32>(Mask);
1942  // Compute the masks for andi/andis that would be necessary.
1943  unsigned ANDIMask = (Mask & UINT16_MAX),
1944  ANDISMask = (Mask >> 16) & UINT16_MAX;
1945 
1946  bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
1947 
1948  unsigned NumAndInsts = (unsigned) NeedsRotate +
1949  (unsigned) (bool) Res;
1950  if (Use32BitInsts)
1951  NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
1952  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1953  else
1954  NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
1955 
1956  unsigned NumRLInsts = 0;
1957  bool FirstBG = true;
1958  bool MoreBG = false;
1959  for (auto &BG : BitGroups) {
1960  if (!MatchingBG(BG)) {
1961  MoreBG = true;
1962  continue;
1963  }
1964  NumRLInsts +=
1965  SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
1966  !FirstBG);
1967  FirstBG = false;
1968  }
1969 
1970  DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1971  " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
1972  "\n\t\t\tisel using masking: " << NumAndInsts <<
1973  " using rotates: " << NumRLInsts << "\n");
1974 
1975  // When we'd use andi/andis, we bias toward using the rotates (andi only
1976  // has a record form, and is cracked on POWER cores). However, when using
1977  // general 64-bit constant formation, bias toward the constant form,
1978  // because that exposes more opportunities for CSE.
1979  if (NumAndInsts > NumRLInsts)
1980  continue;
1981  // When merging multiple bit groups, instruction or is used.
1982  // But when rotate is used, rldimi can inert the rotated value into any
1983  // register, so instruction or can be avoided.
1984  if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
1985  continue;
1986 
1987  DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1988 
1989  if (InstCnt) *InstCnt += NumAndInsts;
1990 
1991  SDValue VRot;
1992  // We actually need to generate a rotation if we have a non-zero rotation
1993  // factor or, in the Repl32 case, if we care about any of the
1994  // higher-order replicated bits. In the latter case, we generate a mask
1995  // backward so that it actually includes the entire 64 bits.
1996  if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
1997  VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1998  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
1999  else
2000  VRot = VRI.V;
2001 
2002  SDValue TotalVal;
2003  if (Use32BitInsts) {
2004  assert((ANDIMask != 0 || ANDISMask != 0) &&
2005  "No set bits in mask when using 32-bit ands for 64-bit value");
2006 
2007  SDValue ANDIVal, ANDISVal;
2008  if (ANDIMask != 0)
2009  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2010  ExtendToInt64(VRot, dl),
2011  getI32Imm(ANDIMask, dl)),
2012  0);
2013  if (ANDISMask != 0)
2014  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2015  ExtendToInt64(VRot, dl),
2016  getI32Imm(ANDISMask, dl)),
2017  0);
2018 
2019  if (!ANDIVal)
2020  TotalVal = ANDISVal;
2021  else if (!ANDISVal)
2022  TotalVal = ANDIVal;
2023  else
2024  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2025  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2026  } else {
2027  TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2028  TotalVal =
2029  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2030  ExtendToInt64(VRot, dl), TotalVal),
2031  0);
2032  }
2033 
2034  if (!Res)
2035  Res = TotalVal;
2036  else
2037  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2038  ExtendToInt64(Res, dl), TotalVal),
2039  0);
2040 
2041  // Now, remove all groups with this underlying value and rotation
2042  // factor.
2043  eraseMatchingBitGroups(MatchingBG);
2044  }
2045  }
2046 
2047  // Instruction selection for the 64-bit case.
2048  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2049  SDLoc dl(N);
2050  SDValue Res;
2051 
2052  if (InstCnt) *InstCnt = 0;
2053 
2054  // Take care of cases that should use andi/andis first.
2055  SelectAndParts64(dl, Res, InstCnt);
2056 
2057  // If we've not yet selected a 'starting' instruction, and we have no zeros
2058  // to fill in, select the (Value, RLAmt) with the highest priority (largest
2059  // number of groups), and start with this rotated value.
2060  if ((!HasZeros || LateMask) && !Res) {
2061  // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2062  // groups will come first, and so the VRI representing the largest number
2063  // of groups might not be first (it might be the first Repl32 groups).
2064  unsigned MaxGroupsIdx = 0;
2065  if (!ValueRotsVec[0].Repl32) {
2066  for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2067  if (ValueRotsVec[i].Repl32) {
2068  if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2069  MaxGroupsIdx = i;
2070  break;
2071  }
2072  }
2073 
2074  ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2075  bool NeedsRotate = false;
2076  if (VRI.RLAmt) {
2077  NeedsRotate = true;
2078  } else if (VRI.Repl32) {
2079  for (auto &BG : BitGroups) {
2080  if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2081  BG.Repl32 != VRI.Repl32)
2082  continue;
2083 
2084  // We don't need a rotate if the bit group is confined to the lower
2085  // 32 bits.
2086  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2087  continue;
2088 
2089  NeedsRotate = true;
2090  break;
2091  }
2092  }
2093 
2094  if (NeedsRotate)
2095  Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2096  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2097  InstCnt);
2098  else
2099  Res = VRI.V;
2100 
2101  // Now, remove all groups with this underlying value and rotation factor.
2102  if (Res)
2103  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2104  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2105  BG.Repl32 == VRI.Repl32;
2106  });
2107  }
2108 
2109  // Because 64-bit rotates are more flexible than inserts, we might have a
2110  // preference regarding which one we do first (to save one instruction).
2111  if (!Res)
2112  for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2113  if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2114  false) <
2115  SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2116  true)) {
2117  if (I != BitGroups.begin()) {
2118  BitGroup BG = *I;
2119  BitGroups.erase(I);
2120  BitGroups.insert(BitGroups.begin(), BG);
2121  }
2122 
2123  break;
2124  }
2125  }
2126 
2127  // Insert the other groups (one at a time).
2128  for (auto &BG : BitGroups) {
2129  if (!Res)
2130  Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2131  BG.EndIdx, InstCnt);
2132  else
2133  Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2134  BG.StartIdx, BG.EndIdx, InstCnt);
2135  }
2136 
2137  if (LateMask) {
2138  uint64_t Mask = getZerosMask();
2139 
2140  // We can use the 32-bit andi/andis technique if the mask does not
2141  // require any higher-order bits. This can save an instruction compared
2142  // to always using the general 64-bit technique.
2143  bool Use32BitInsts = isUInt<32>(Mask);
2144  // Compute the masks for andi/andis that would be necessary.
2145  unsigned ANDIMask = (Mask & UINT16_MAX),
2146  ANDISMask = (Mask >> 16) & UINT16_MAX;
2147 
2148  if (Use32BitInsts) {
2149  assert((ANDIMask != 0 || ANDISMask != 0) &&
2150  "No set bits in mask when using 32-bit ands for 64-bit value");
2151 
2152  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2153  (unsigned) (ANDISMask != 0) +
2154  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2155 
2156  SDValue ANDIVal, ANDISVal;
2157  if (ANDIMask != 0)
2158  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2159  ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
2160  if (ANDISMask != 0)
2161  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2162  ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
2163 
2164  if (!ANDIVal)
2165  Res = ANDISVal;
2166  else if (!ANDISVal)
2167  Res = ANDIVal;
2168  else
2169  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2170  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2171  } else {
2172  if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
2173 
2174  SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2175  Res =
2176  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2177  ExtendToInt64(Res, dl), MaskVal), 0);
2178  }
2179  }
2180 
2181  return Res.getNode();
2182  }
2183 
2184  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2185  // Fill in BitGroups.
2186  collectBitGroups(LateMask);
2187  if (BitGroups.empty())
2188  return nullptr;
2189 
2190  // For 64-bit values, figure out when we can use 32-bit instructions.
2191  if (Bits.size() == 64)
2192  assignRepl32BitGroups();
2193 
2194  // Fill in ValueRotsVec.
2195  collectValueRotInfo();
2196 
2197  if (Bits.size() == 32) {
2198  return Select32(N, LateMask, InstCnt);
2199  } else {
2200  assert(Bits.size() == 64 && "Not 64 bits here?");
2201  return Select64(N, LateMask, InstCnt);
2202  }
2203 
2204  return nullptr;
2205  }
2206 
2207  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2208  BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
2209  }
2210 
2212 
2213  bool HasZeros;
2215 
2216  SmallVector<BitGroup, 16> BitGroups;
2217 
2218  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2219  SmallVector<ValueRotInfo, 16> ValueRotsVec;
2220 
2221  SelectionDAG *CurDAG;
2222 
2223 public:
2224  BitPermutationSelector(SelectionDAG *DAG)
2225  : CurDAG(DAG) {}
2226 
2227  // Here we try to match complex bit permutations into a set of
2228  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2229  // known to produce optimial code for common cases (like i32 byte swapping).
2230  SDNode *Select(SDNode *N) {
2231  Memoizer.clear();
2232  auto Result =
2233  getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2234  if (!Result.first)
2235  return nullptr;
2236  Bits = std::move(*Result.second);
2237 
2238  DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2239  " selection for: ");
2240  DEBUG(N->dump(CurDAG));
2241 
2242  // Fill it RLAmt and set HasZeros.
2243  computeRotationAmounts();
2244 
2245  if (!HasZeros)
2246  return Select(N, false);
2247 
2248  // We currently have two techniques for handling results with zeros: early
2249  // masking (the default) and late masking. Late masking is sometimes more
2250  // efficient, but because the structure of the bit groups is different, it
2251  // is hard to tell without generating both and comparing the results. With
2252  // late masking, we ignore zeros in the resulting value when inserting each
2253  // set of bit groups, and then mask in the zeros at the end. With early
2254  // masking, we only insert the non-zero parts of the result at every step.
2255 
2256  unsigned InstCnt, InstCntLateMask;
2257  DEBUG(dbgs() << "\tEarly masking:\n");
2258  SDNode *RN = Select(N, false, &InstCnt);
2259  DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2260 
2261  DEBUG(dbgs() << "\tLate masking:\n");
2262  SDNode *RNLM = Select(N, true, &InstCntLateMask);
2263  DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
2264  " instructions\n");
2265 
2266  if (InstCnt <= InstCntLateMask) {
2267  DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2268  return RN;
2269  }
2270 
2271  DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2272  return RNLM;
2273  }
2274 };
2275 
2276 class IntegerCompareEliminator {
2277  SelectionDAG *CurDAG;
2278  PPCDAGToDAGISel *S;
2279  // Conversion type for interpreting results of a 32-bit instruction as
2280  // a 64-bit value or vice versa.
2281  enum ExtOrTruncConversion { Ext, Trunc };
2282 
2283  // Modifiers to guide how an ISD::SETCC node's result is to be computed
2284  // in a GPR.
2285  // ZExtOrig - use the original condition code, zero-extend value
2286  // ZExtInvert - invert the condition code, zero-extend value
2287  // SExtOrig - use the original condition code, sign-extend value
2288  // SExtInvert - invert the condition code, sign-extend value
2289  enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2290 
2291  // Comparisons against zero to emit GPR code sequences for. Each of these
2292  // sequences may need to be emitted for two or more equivalent patterns.
2293  // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2294  // matters as well as the extension type: sext (-1/0), zext (1/0).
2295  // GEZExt - (zext (LHS >= 0))
2296  // GESExt - (sext (LHS >= 0))
2297  // LEZExt - (zext (LHS <= 0))
2298  // LESExt - (sext (LHS <= 0))
2299  enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2300 
2301  SDNode *tryEXTEND(SDNode *N);
2302  SDNode *tryLogicOpOfCompares(SDNode *N);
2303  SDValue computeLogicOpInGPR(SDValue LogicOp);
2304  SDValue signExtendInputIfNeeded(SDValue Input);
2305  SDValue zeroExtendInputIfNeeded(SDValue Input);
2306  SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2307  SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2308  ZeroCompare CmpTy);
2309  SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2310  int64_t RHSValue, SDLoc dl);
2311  SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2312  int64_t RHSValue, SDLoc dl);
2313  SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2314  int64_t RHSValue, SDLoc dl);
2315  SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2316  int64_t RHSValue, SDLoc dl);
2317  SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2318 
2319 public:
2320  IntegerCompareEliminator(SelectionDAG *DAG,
2321  PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2322  assert(CurDAG->getTargetLoweringInfo()
2323  .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2324  "Only expecting to use this on 64 bit targets.");
2325  }
2326  SDNode *Select(SDNode *N) {
2327  if (CmpInGPR == ICGPR_None)
2328  return nullptr;
2329  switch (N->getOpcode()) {
2330  default: break;
2331  case ISD::ZERO_EXTEND:
2332  if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2333  CmpInGPR == ICGPR_SextI64)
2334  return nullptr;
2336  case ISD::SIGN_EXTEND:
2337  if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2339  return nullptr;
2340  return tryEXTEND(N);
2341  case ISD::AND:
2342  case ISD::OR:
2343  case ISD::XOR:
2344  return tryLogicOpOfCompares(N);
2345  }
2346  return nullptr;
2347  }
2348 };
2349 
2350 static bool isLogicOp(unsigned Opc) {
2351  return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2352 }
2353 // The obvious case for wanting to keep the value in a GPR. Namely, the
2354 // result of the comparison is actually needed in a GPR.
2355 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2356  assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2357  N->getOpcode() == ISD::SIGN_EXTEND) &&
2358  "Expecting a zero/sign extend node!");
2359  SDValue WideRes;
2360  // If we are zero-extending the result of a logical operation on i1
2361  // values, we can keep the values in GPRs.
2362  if (isLogicOp(N->getOperand(0).getOpcode()) &&
2363  N->getOperand(0).getValueType() == MVT::i1 &&
2364  N->getOpcode() == ISD::ZERO_EXTEND)
2365  WideRes = computeLogicOpInGPR(N->getOperand(0));
2366  else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2367  return nullptr;
2368  else
2369  WideRes =
2370  getSETCCInGPR(N->getOperand(0),
2371  N->getOpcode() == ISD::SIGN_EXTEND ?
2372  SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2373 
2374  if (!WideRes)
2375  return nullptr;
2376 
2377  SDLoc dl(N);
2378  bool Input32Bit = WideRes.getValueType() == MVT::i32;
2379  bool Output32Bit = N->getValueType(0) == MVT::i32;
2380 
2381  NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2382  NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2383 
2384  SDValue ConvOp = WideRes;
2385  if (Input32Bit != Output32Bit)
2386  ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2387  ExtOrTruncConversion::Trunc);
2388  return ConvOp.getNode();
2389 }
2390 
2391 // Attempt to perform logical operations on the results of comparisons while
2392 // keeping the values in GPRs. Without doing so, these would end up being
2393 // lowered to CR-logical operations which suffer from significant latency and
2394 // low ILP.
2395 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2396  if (N->getValueType(0) != MVT::i1)
2397  return nullptr;
2398  assert(isLogicOp(N->getOpcode()) &&
2399  "Expected a logic operation on setcc results.");
2400  SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2401  if (!LoweredLogical)
2402  return nullptr;
2403 
2404  SDLoc dl(N);
2405  bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2406  unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2407  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2408  SDValue LHS = LoweredLogical.getOperand(0);
2409  SDValue RHS = LoweredLogical.getOperand(1);
2410  SDValue WideOp;
2411  SDValue OpToConvToRecForm;
2412 
2413  // Look through any 32-bit to 64-bit implicit extend nodes to find the
2414  // opcode that is input to the XORI.
2415  if (IsBitwiseNegate &&
2416  LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2417  OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2418  else if (IsBitwiseNegate)
2419  // If the input to the XORI isn't an extension, that's what we're after.
2420  OpToConvToRecForm = LoweredLogical.getOperand(0);
2421  else
2422  // If this is not an XORI, it is a reg-reg logical op and we can convert
2423  // it to record-form.
2424  OpToConvToRecForm = LoweredLogical;
2425 
2426  // Get the record-form version of the node we're looking to use to get the
2427  // CR result from.
2428  uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2429  int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2430 
2431  // Convert the right node to record-form. This is either the logical we're
2432  // looking at or it is the input node to the negation (if we're looking at
2433  // a bitwise negation).
2434  if (NewOpc != -1 && IsBitwiseNegate) {
2435  // The input to the XORI has a record-form. Use it.
2436  assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2437  "Expected a PPC::XORI8 only for bitwise negation.");
2438  // Emit the record-form instruction.
2439  std::vector<SDValue> Ops;
2440  for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2441  Ops.push_back(OpToConvToRecForm.getOperand(i));
2442 
2443  WideOp =
2444  SDValue(CurDAG->getMachineNode(NewOpc, dl,
2445  OpToConvToRecForm.getValueType(),
2446  MVT::Glue, Ops), 0);
2447  } else {
2448  assert((NewOpc != -1 || !IsBitwiseNegate) &&
2449  "No record form available for AND8/OR8/XOR8?");
2450  WideOp =
2451  SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
2452  MVT::i64, MVT::Glue, LHS, RHS), 0);
2453  }
2454 
2455  // Select this node to a single bit from CR0 set by the record-form node
2456  // just created. For bitwise negation, use the EQ bit which is the equivalent
2457  // of negating the result (i.e. it is a bit set when the result of the
2458  // operation is zero).
2459  SDValue SRIdxVal =
2460  CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2461  SDValue CRBit =
2462  SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2463  MVT::i1, CR0Reg, SRIdxVal,
2464  WideOp.getValue(1)), 0);
2465  return CRBit.getNode();
2466 }
2467 
2468 // Lower a logical operation on i1 values into a GPR sequence if possible.
2469 // The result can be kept in a GPR if requested.
2470 // Three types of inputs can be handled:
2471 // - SETCC
2472 // - TRUNCATE
2473 // - Logical operation (AND/OR/XOR)
2474 // There is also a special case that is handled (namely a complement operation
2475 // achieved with xor %a, -1).
2476 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2477  assert(isLogicOp(LogicOp.getOpcode()) &&
2478  "Can only handle logic operations here.");
2479  assert(LogicOp.getValueType() == MVT::i1 &&
2480  "Can only handle logic operations on i1 values here.");
2481  SDLoc dl(LogicOp);
2482  SDValue LHS, RHS;
2483 
2484  // Special case: xor %a, -1
2485  bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2486 
2487  // Produces a GPR sequence for each operand of the binary logic operation.
2488  // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2489  // the value in a GPR and for logic operations, it will recursively produce
2490  // a GPR sequence for the operation.
2491  auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2492  unsigned OperandOpcode = Operand.getOpcode();
2493  if (OperandOpcode == ISD::SETCC)
2494  return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2495  else if (OperandOpcode == ISD::TRUNCATE) {
2496  SDValue InputOp = Operand.getOperand(0);
2497  EVT InVT = InputOp.getValueType();
2498  return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2499  PPC::RLDICL, dl, InVT, InputOp,
2500  S->getI64Imm(0, dl),
2501  S->getI64Imm(63, dl)), 0);
2502  } else if (isLogicOp(OperandOpcode))
2503  return computeLogicOpInGPR(Operand);
2504  return SDValue();
2505  };
2506  LHS = getLogicOperand(LogicOp.getOperand(0));
2507  RHS = getLogicOperand(LogicOp.getOperand(1));
2508 
2509  // If a GPR sequence can't be produced for the LHS we can't proceed.
2510  // Not producing a GPR sequence for the RHS is only a problem if this isn't
2511  // a bitwise negation operation.
2512  if (!LHS || (!RHS && !IsBitwiseNegation))
2513  return SDValue();
2514 
2515  NumLogicOpsOnComparison++;
2516 
2517  // We will use the inputs as 64-bit values.
2518  if (LHS.getValueType() == MVT::i32)
2519  LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2520  if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2521  RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2522 
2523  unsigned NewOpc;
2524  switch (LogicOp.getOpcode()) {
2525  default: llvm_unreachable("Unknown logic operation.");
2526  case ISD::AND: NewOpc = PPC::AND8; break;
2527  case ISD::OR: NewOpc = PPC::OR8; break;
2528  case ISD::XOR: NewOpc = PPC::XOR8; break;
2529  }
2530 
2531  if (IsBitwiseNegation) {
2532  RHS = S->getI64Imm(1, dl);
2533  NewOpc = PPC::XORI8;
2534  }
2535 
2536  return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2537 
2538 }
2539 
2540 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2541 /// Otherwise just reinterpret it as a 64-bit value.
2542 /// Useful when emitting comparison code for 32-bit values without using
2543 /// the compare instruction (which only considers the lower 32-bits).
2544 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2545  assert(Input.getValueType() == MVT::i32 &&
2546  "Can only sign-extend 32-bit values here.");
2547  unsigned Opc = Input.getOpcode();
2548 
2549  // The value was sign extended and then truncated to 32-bits. No need to
2550  // sign extend it again.
2551  if (Opc == ISD::TRUNCATE &&
2552  (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2553  Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2554  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2555 
2556  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2557  // The input is a sign-extending load. All ppc sign-extending loads
2558  // sign-extend to the full 64-bits.
2559  if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
2560  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2561 
2562  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2563  // We don't sign-extend constants.
2564  if (InputConst)
2565  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2566 
2567  SDLoc dl(Input);
2568  SignExtensionsAdded++;
2569  return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
2570  MVT::i64, Input), 0);
2571 }
2572 
2573 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2574 /// Otherwise just reinterpret it as a 64-bit value.
2575 /// Useful when emitting comparison code for 32-bit values without using
2576 /// the compare instruction (which only considers the lower 32-bits).
2577 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
2578  assert(Input.getValueType() == MVT::i32 &&
2579  "Can only zero-extend 32-bit values here.");
2580  unsigned Opc = Input.getOpcode();
2581 
2582  // The only condition under which we can omit the actual extend instruction:
2583  // - The value is a positive constant
2584  // - The value comes from a load that isn't a sign-extending load
2585  // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2586  bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
2587  (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
2588  Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
2589  if (IsTruncateOfZExt)
2590  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2591 
2592  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2593  if (InputConst && InputConst->getSExtValue() >= 0)
2594  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2595 
2596  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2597  // The input is a load that doesn't sign-extend (it will be zero-extended).
2598  if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
2599  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2600 
2601  // None of the above, need to zero-extend.
2602  SDLoc dl(Input);
2603  ZeroExtensionsAdded++;
2604  return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
2605  S->getI64Imm(0, dl),
2606  S->getI64Imm(32, dl)), 0);
2607 }
2608 
2609 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2610 // course not actual zero/sign extensions that will generate machine code,
2611 // they're just a way to reinterpret a 32 bit value in a register as a
2612 // 64 bit value and vice-versa.
2613 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
2614  ExtOrTruncConversion Conv) {
2615  SDLoc dl(NatWidthRes);
2616 
2617  // For reinterpreting 32-bit values as 64 bit values, we generate
2618  // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2619  if (Conv == ExtOrTruncConversion::Ext) {
2620  SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
2621  SDValue SubRegIdx =
2622  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2623  return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
2624  ImDef, NatWidthRes, SubRegIdx), 0);
2625  }
2626 
2627  assert(Conv == ExtOrTruncConversion::Trunc &&
2628  "Unknown convertion between 32 and 64 bit values.");
2629  // For reinterpreting 64-bit values as 32-bit values, we just need to
2630  // EXTRACT_SUBREG (i.e. extract the low word).
2631  SDValue SubRegIdx =
2632  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2633  return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
2634  NatWidthRes, SubRegIdx), 0);
2635 }
2636 
2637 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2638 // Handle both zero-extensions and sign-extensions.
2639 SDValue
2640 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2641  ZeroCompare CmpTy) {
2642  EVT InVT = LHS.getValueType();
2643  bool Is32Bit = InVT == MVT::i32;
2644  SDValue ToExtend;
2645 
2646  // Produce the value that needs to be either zero or sign extended.
2647  switch (CmpTy) {
2648  case ZeroCompare::GEZExt:
2649  case ZeroCompare::GESExt:
2650  ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
2651  dl, InVT, LHS, LHS), 0);
2652  break;
2653  case ZeroCompare::LEZExt:
2654  case ZeroCompare::LESExt: {
2655  if (Is32Bit) {
2656  // Upper 32 bits cannot be undefined for this sequence.
2657  LHS = signExtendInputIfNeeded(LHS);
2658  SDValue Neg =
2659  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2660  ToExtend =
2661  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2662  Neg, S->getI64Imm(1, dl),
2663  S->getI64Imm(63, dl)), 0);
2664  } else {
2665  SDValue Addi =
2666  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
2667  S->getI64Imm(~0ULL, dl)), 0);
2668  ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2669  Addi, LHS), 0);
2670  }
2671  break;
2672  }
2673  }
2674 
2675  // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2676  if (!Is32Bit &&
2677  (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
2678  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2679  ToExtend, S->getI64Imm(1, dl),
2680  S->getI64Imm(63, dl)), 0);
2681  if (!Is32Bit &&
2682  (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
2683  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
2684  S->getI64Imm(63, dl)), 0);
2685 
2686  assert(Is32Bit && "Should have handled the 32-bit sequences above.");
2687  // For 32-bit sequences, the extensions differ between GE/LE cases.
2688  switch (CmpTy) {
2689  case ZeroCompare::GEZExt: {
2690  SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2691  S->getI32Imm(31, dl) };
2692  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2693  ShiftOps), 0);
2694  }
2695  case ZeroCompare::GESExt:
2696  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
2697  S->getI32Imm(31, dl)), 0);
2698  case ZeroCompare::LEZExt:
2699  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
2700  S->getI32Imm(1, dl)), 0);
2701  case ZeroCompare::LESExt:
2702  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
2703  S->getI32Imm(-1, dl)), 0);
2704  }
2705 
2706  // The above case covers all the enumerators so it can't have a default clause
2707  // to avoid compiler warnings.
2708  llvm_unreachable("Unknown zero-comparison type.");
2709 }
2710 
2711 /// Produces a zero-extended result of comparing two 32-bit values according to
2712 /// the passed condition code.
2713 SDValue
2714 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
2715  ISD::CondCode CC,
2716  int64_t RHSValue, SDLoc dl) {
2717  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2719  return SDValue();
2720  bool IsRHSZero = RHSValue == 0;
2721  bool IsRHSOne = RHSValue == 1;
2722  bool IsRHSNegOne = RHSValue == -1LL;
2723  switch (CC) {
2724  default: return SDValue();
2725  case ISD::SETEQ: {
2726  // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2727  // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
2728  SDValue Xor = IsRHSZero ? LHS :
2729  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2730  SDValue Clz =
2731  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2732  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2733  S->getI32Imm(31, dl) };
2734  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2735  ShiftOps), 0);
2736  }
2737  case ISD::SETNE: {
2738  // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2739  // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
2740  SDValue Xor = IsRHSZero ? LHS :
2741  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2742  SDValue Clz =
2743  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2744  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2745  S->getI32Imm(31, dl) };
2746  SDValue Shift =
2747  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2748  return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2749  S->getI32Imm(1, dl)), 0);
2750  }
2751  case ISD::SETGE: {
2752  // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2753  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
2754  if(IsRHSZero)
2755  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2756 
2757  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2758  // by swapping inputs and falling through.
2759  std::swap(LHS, RHS);
2760  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2761  IsRHSZero = RHSConst && RHSConst->isNullValue();
2763  }
2764  case ISD::SETLE: {
2765  if (CmpInGPR == ICGPR_NonExtIn)
2766  return SDValue();
2767  // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2768  // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
2769  if(IsRHSZero) {
2770  if (CmpInGPR == ICGPR_NonExtIn)
2771  return SDValue();
2772  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2773  }
2774 
2775  // The upper 32-bits of the register can't be undefined for this sequence.
2776  LHS = signExtendInputIfNeeded(LHS);
2777  RHS = signExtendInputIfNeeded(RHS);
2778  SDValue Sub =
2779  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2780  SDValue Shift =
2781  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
2782  S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
2783  0);
2784  return
2785  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
2786  MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
2787  }
2788  case ISD::SETGT: {
2789  // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2790  // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2791  // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
2792  // Handle SETLT -1 (which is equivalent to SETGE 0).
2793  if (IsRHSNegOne)
2794  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2795 
2796  if (IsRHSZero) {
2797  if (CmpInGPR == ICGPR_NonExtIn)
2798  return SDValue();
2799  // The upper 32-bits of the register can't be undefined for this sequence.
2800  LHS = signExtendInputIfNeeded(LHS);
2801  RHS = signExtendInputIfNeeded(RHS);
2802  SDValue Neg =
2803  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2804  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2805  Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
2806  }
2807  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2808  // (%b < %a) by swapping inputs and falling through.
2809  std::swap(LHS, RHS);
2810  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2811  IsRHSZero = RHSConst && RHSConst->isNullValue();
2812  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
2814  }
2815  case ISD::SETLT: {
2816  // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2817  // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
2818  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
2819  // Handle SETLT 1 (which is equivalent to SETLE 0).
2820  if (IsRHSOne) {
2821  if (CmpInGPR == ICGPR_NonExtIn)
2822  return SDValue();
2823  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2824  }
2825 
2826  if (IsRHSZero) {
2827  SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2828  S->getI32Imm(31, dl) };
2829  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2830  ShiftOps), 0);
2831  }
2832 
2833  if (CmpInGPR == ICGPR_NonExtIn)
2834  return SDValue();
2835  // The upper 32-bits of the register can't be undefined for this sequence.
2836  LHS = signExtendInputIfNeeded(LHS);
2837  RHS = signExtendInputIfNeeded(RHS);
2838  SDValue SUBFNode =
2839  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
2840  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2841  SUBFNode, S->getI64Imm(1, dl),
2842  S->getI64Imm(63, dl)), 0);
2843  }
2844  case ISD::SETUGE:
2845  // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
2846  // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
2847  std::swap(LHS, RHS);
2849  case ISD::SETULE: {
2850  if (CmpInGPR == ICGPR_NonExtIn)
2851  return SDValue();
2852  // The upper 32-bits of the register can't be undefined for this sequence.
2853  LHS = zeroExtendInputIfNeeded(LHS);
2854  RHS = zeroExtendInputIfNeeded(RHS);
2855  SDValue Subtract =
2856  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2857  SDValue SrdiNode =
2858  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2859  Subtract, S->getI64Imm(1, dl),
2860  S->getI64Imm(63, dl)), 0);
2861  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
2862  S->getI32Imm(1, dl)), 0);
2863  }
2864  case ISD::SETUGT:
2865  // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
2866  // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
2867  std::swap(LHS, RHS);
2869  case ISD::SETULT: {
2870  if (CmpInGPR == ICGPR_NonExtIn)
2871  return SDValue();
2872  // The upper 32-bits of the register can't be undefined for this sequence.
2873  LHS = zeroExtendInputIfNeeded(LHS);
2874  RHS = zeroExtendInputIfNeeded(RHS);
2875  SDValue Subtract =
2876  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
2877  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2878  Subtract, S->getI64Imm(1, dl),
2879  S->getI64Imm(63, dl)), 0);
2880  }
2881  }
2882 }
2883 
2884 /// Produces a sign-extended result of comparing two 32-bit values according to
2885 /// the passed condition code.
2886 SDValue
2887 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
2888  ISD::CondCode CC,
2889  int64_t RHSValue, SDLoc dl) {
2890  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2892  return SDValue();
2893  bool IsRHSZero = RHSValue == 0;
2894  bool IsRHSOne = RHSValue == 1;
2895  bool IsRHSNegOne = RHSValue == -1LL;
2896 
2897  switch (CC) {
2898  default: return SDValue();
2899  case ISD::SETEQ: {
2900  // (sext (setcc %a, %b, seteq)) ->
2901  // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
2902  // (sext (setcc %a, 0, seteq)) ->
2903  // (ashr (shl (ctlz %a), 58), 63)
2904  SDValue CountInput = IsRHSZero ? LHS :
2905  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2906  SDValue Cntlzw =
2907  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
2908  SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
2909  S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
2910  SDValue Slwi =
2911  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
2912  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
2913  }
2914  case ISD::SETNE: {
2915  // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
2916  // flip the bit, finally take 2's complement.
2917  // (sext (setcc %a, %b, setne)) ->
2918  // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
2919  // Same as above, but the first xor is not needed.
2920  // (sext (setcc %a, 0, setne)) ->
2921  // (neg (xor (lshr (ctlz %a), 5), 1))
2922  SDValue Xor = IsRHSZero ? LHS :
2923  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2924  SDValue Clz =
2925  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2926  SDValue ShiftOps[] =
2927  { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
2928  SDValue Shift =
2929  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2930  SDValue Xori =
2931  SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2932  S->getI32Imm(1, dl)), 0);
2933  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
2934  }
2935  case ISD::SETGE: {
2936  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
2937  // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
2938  if (IsRHSZero)
2939  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
2940 
2941  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2942  // by swapping inputs and falling through.
2943  std::swap(LHS, RHS);
2944  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2945  IsRHSZero = RHSConst && RHSConst->isNullValue();
2947  }
2948  case ISD::SETLE: {
2949  if (CmpInGPR == ICGPR_NonExtIn)
2950  return SDValue();
2951  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
2952  // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
2953  if (IsRHSZero)
2954  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
2955 
2956  // The upper 32-bits of the register can't be undefined for this sequence.
2957  LHS = signExtendInputIfNeeded(LHS);
2958  RHS = signExtendInputIfNeeded(RHS);
2959  SDValue SUBFNode =
2960  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
2961  LHS, RHS), 0);
2962  SDValue Srdi =
2963  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2964  SUBFNode, S->getI64Imm(1, dl),
2965  S->getI64Imm(63, dl)), 0);
2966  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
2967  S->getI32Imm(-1, dl)), 0);
2968  }
2969  case ISD::SETGT: {
2970  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
2971  // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
2972  // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
2973  if (IsRHSNegOne)
2974  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
2975  if (IsRHSZero) {
2976  if (CmpInGPR == ICGPR_NonExtIn)
2977  return SDValue();
2978  // The upper 32-bits of the register can't be undefined for this sequence.
2979  LHS = signExtendInputIfNeeded(LHS);
2980  RHS = signExtendInputIfNeeded(RHS);
2981  SDValue Neg =
2982  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2983  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
2984  S->getI64Imm(63, dl)), 0);
2985  }
2986  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2987  // (%b < %a) by swapping inputs and falling through.
2988  std::swap(LHS, RHS);
2989  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2990  IsRHSZero = RHSConst && RHSConst->isNullValue();
2991  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
2993  }
2994  case ISD::SETLT: {
2995  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
2996  // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
2997  // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
2998  if (IsRHSOne) {
2999  if (CmpInGPR == ICGPR_NonExtIn)
3000  return SDValue();
3001  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3002  }
3003  if (IsRHSZero)
3004  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3005  S->getI32Imm(31, dl)), 0);
3006 
3007  if (CmpInGPR == ICGPR_NonExtIn)
3008  return SDValue();
3009  // The upper 32-bits of the register can't be undefined for this sequence.
3010  LHS = signExtendInputIfNeeded(LHS);
3011  RHS = signExtendInputIfNeeded(RHS);
3012  SDValue SUBFNode =
3013  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3014  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3015  SUBFNode, S->getI64Imm(63, dl)), 0);
3016  }
3017  case ISD::SETUGE:
3018  // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3019  // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3020  std::swap(LHS, RHS);
3022  case ISD::SETULE: {
3023  if (CmpInGPR == ICGPR_NonExtIn)
3024  return SDValue();
3025  // The upper 32-bits of the register can't be undefined for this sequence.
3026  LHS = zeroExtendInputIfNeeded(LHS);
3027  RHS = zeroExtendInputIfNeeded(RHS);
3028  SDValue Subtract =
3029  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3030  SDValue Shift =
3031  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3032  S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3033  0);
3034  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3035  S->getI32Imm(-1, dl)), 0);
3036  }
3037  case ISD::SETUGT:
3038  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3039  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3040  std::swap(LHS, RHS);
3042  case ISD::SETULT: {
3043  if (CmpInGPR == ICGPR_NonExtIn)
3044  return SDValue();
3045  // The upper 32-bits of the register can't be undefined for this sequence.
3046  LHS = zeroExtendInputIfNeeded(LHS);
3047  RHS = zeroExtendInputIfNeeded(RHS);
3048  SDValue Subtract =
3049  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3050  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3051  Subtract, S->getI64Imm(63, dl)), 0);
3052  }
3053  }
3054 }
3055 
3056 /// Produces a zero-extended result of comparing two 64-bit values according to
3057 /// the passed condition code.
3058 SDValue
3059 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3060  ISD::CondCode CC,
3061  int64_t RHSValue, SDLoc dl) {
3062  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3064  return SDValue();
3065  bool IsRHSZero = RHSValue == 0;
3066  bool IsRHSOne = RHSValue == 1;
3067  bool IsRHSNegOne = RHSValue == -1LL;
3068  switch (CC) {
3069  default: return SDValue();
3070  case ISD::SETEQ: {
3071  // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3072  // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3073  SDValue Xor = IsRHSZero ? LHS :
3074  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3075  SDValue Clz =
3076  SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3077  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3078  S->getI64Imm(58, dl),
3079  S->getI64Imm(63, dl)), 0);
3080  }
3081  case ISD::SETNE: {
3082  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3083  // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3084  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3085  // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3086  SDValue Xor = IsRHSZero ? LHS :
3087  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3088  SDValue AC =
3089  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3090  Xor, S->getI32Imm(~0U, dl)), 0);
3091  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3092  Xor, AC.getValue(1)), 0);
3093  }
3094  case ISD::SETGE: {
3095  // {subc.reg, subc.CA} = (subcarry %a, %b)
3096  // (zext (setcc %a, %b, setge)) ->
3097  // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3098  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3099  if (IsRHSZero)
3100  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3101  std::swap(LHS, RHS);
3102  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3103  IsRHSZero = RHSConst && RHSConst->isNullValue();
3105  }
3106  case ISD::SETLE: {
3107  // {subc.reg, subc.CA} = (subcarry %b, %a)
3108  // (zext (setcc %a, %b, setge)) ->
3109  // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3110  // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3111  if (IsRHSZero)
3112  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3113  SDValue ShiftL =
3114  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3115  S->getI64Imm(1, dl),
3116  S->getI64Imm(63, dl)), 0);
3117  SDValue ShiftR =
3118  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3119  S->getI64Imm(63, dl)), 0);
3120  SDValue SubtractCarry =
3121  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3122  LHS, RHS), 1);
3123  return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3124  ShiftR, ShiftL, SubtractCarry), 0);
3125  }
3126  case ISD::SETGT: {
3127  // {subc.reg, subc.CA} = (subcarry %b, %a)
3128  // (zext (setcc %a, %b, setgt)) ->
3129  // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3130  // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3131  if (IsRHSNegOne)
3132  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3133  if (IsRHSZero) {
3134  SDValue Addi =
3135  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3136  S->getI64Imm(~0ULL, dl)), 0);
3137  SDValue Nor =
3138  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3139  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3140  S->getI64Imm(1, dl),
3141  S->getI64Imm(63, dl)), 0);
3142  }
3143  std::swap(LHS, RHS);
3144  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3145  IsRHSZero = RHSConst && RHSConst->isNullValue();
3146  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3148  }
3149  case ISD::SETLT: {
3150  // {subc.reg, subc.CA} = (subcarry %a, %b)
3151  // (zext (setcc %a, %b, setlt)) ->
3152  // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3153  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3154  if (IsRHSOne)
3155  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3156  if (IsRHSZero)
3157  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3158  S->getI64Imm(1, dl),
3159  S->getI64Imm(63, dl)), 0);
3160  SDValue SRADINode =
3161  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3162  LHS, S->getI64Imm(63, dl)), 0);
3163  SDValue SRDINode =
3164  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3165  RHS, S->getI64Imm(1, dl),
3166  S->getI64Imm(63, dl)), 0);
3167  SDValue SUBFC8Carry =
3168  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3169  RHS, LHS), 1);
3170  SDValue ADDE8Node =
3171  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3172  SRDINode, SRADINode, SUBFC8Carry), 0);
3173  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3174  ADDE8Node, S->getI64Imm(1, dl)), 0);
3175  }
3176  case ISD::SETUGE:
3177  // {subc.reg, subc.CA} = (subcarry %a, %b)
3178  // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3179  std::swap(LHS, RHS);
3181  case ISD::SETULE: {
3182  // {subc.reg, subc.CA} = (subcarry %b, %a)
3183  // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3184  SDValue SUBFC8Carry =
3185  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3186  LHS, RHS), 1);
3187  SDValue SUBFE8Node =
3188  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3189  LHS, LHS, SUBFC8Carry), 0);
3190  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3191  SUBFE8Node, S->getI64Imm(1, dl)), 0);
3192  }
3193  case ISD::SETUGT:
3194  // {subc.reg, subc.CA} = (subcarry %b, %a)
3195  // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3196  std::swap(LHS, RHS);
3198  case ISD::SETULT: {
3199  // {subc.reg, subc.CA} = (subcarry %a, %b)
3200  // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3201  SDValue SubtractCarry =
3202  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3203  RHS, LHS), 1);
3204  SDValue ExtSub =
3205  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3206  LHS, LHS, SubtractCarry), 0);
3207  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3208  ExtSub), 0);
3209  }
3210  }
3211 }
3212 
3213 /// Produces a sign-extended result of comparing two 64-bit values according to
3214 /// the passed condition code.
3215 SDValue
3216 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3217  ISD::CondCode CC,
3218  int64_t RHSValue, SDLoc dl) {
3219  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3221  return SDValue();
3222  bool IsRHSZero = RHSValue == 0;
3223  bool IsRHSOne = RHSValue == 1;
3224  bool IsRHSNegOne = RHSValue == -1LL;
3225  switch (CC) {
3226  default: return SDValue();
3227  case ISD::SETEQ: {
3228  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3229  // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3230  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3231  // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3232  SDValue AddInput = IsRHSZero ? LHS :
3233  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3234  SDValue Addic =
3235  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3236  AddInput, S->getI32Imm(~0U, dl)), 0);
3237  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3238  Addic, Addic.getValue(1)), 0);
3239  }
3240  case ISD::SETNE: {
3241  // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3242  // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3243  // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3244  // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3245  SDValue Xor = IsRHSZero ? LHS :
3246  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3247  SDValue SC =
3248  SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3249  Xor, S->getI32Imm(0, dl)), 0);
3250  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3251  SC, SC.getValue(1)), 0);
3252  }
3253  case ISD::SETGE: {
3254  // {subc.reg, subc.CA} = (subcarry %a, %b)
3255  // (zext (setcc %a, %b, setge)) ->
3256  // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3257  // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3258  if (IsRHSZero)
3259  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3260  std::swap(LHS, RHS);
3261  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3262  IsRHSZero = RHSConst && RHSConst->isNullValue();
3264  }
3265  case ISD::SETLE: {
3266  // {subc.reg, subc.CA} = (subcarry %b, %a)
3267  // (zext (setcc %a, %b, setge)) ->
3268  // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3269  // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3270  if (IsRHSZero)
3271  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3272  SDValue ShiftR =
3273  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3274  S->getI64Imm(63, dl)), 0);
3275  SDValue ShiftL =
3276  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3277  S->getI64Imm(1, dl),
3278  S->getI64Imm(63, dl)), 0);
3279  SDValue SubtractCarry =
3280  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3281  LHS, RHS), 1);
3282  SDValue Adde =
3283  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3284  ShiftR, ShiftL, SubtractCarry), 0);
3285  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3286  }
3287  case ISD::SETGT: {
3288  // {subc.reg, subc.CA} = (subcarry %b, %a)
3289  // (zext (setcc %a, %b, setgt)) ->
3290  // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3291  // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3292  if (IsRHSNegOne)
3293  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3294  if (IsRHSZero) {
3295  SDValue Add =
3296  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3297  S->getI64Imm(-1, dl)), 0);
3298  SDValue Nor =
3299  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3300  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3301  S->getI64Imm(63, dl)), 0);
3302  }
3303  std::swap(LHS, RHS);
3304  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3305  IsRHSZero = RHSConst && RHSConst->isNullValue();
3306  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3308  }
3309  case ISD::SETLT: {
3310  // {subc.reg, subc.CA} = (subcarry %a, %b)
3311  // (zext (setcc %a, %b, setlt)) ->
3312  // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3313  // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3314  if (IsRHSOne)
3315  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3316  if (IsRHSZero) {
3317  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3318  S->getI64Imm(63, dl)), 0);
3319  }
3320  SDValue SRADINode =
3321  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3322  LHS, S->getI64Imm(63, dl)), 0);
3323  SDValue SRDINode =
3324  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3325  RHS, S->getI64Imm(1, dl),
3326  S->getI64Imm(63, dl)), 0);
3327  SDValue SUBFC8Carry =
3328  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3329  RHS, LHS), 1);
3330  SDValue ADDE8Node =
3331  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3332  SRDINode, SRADINode, SUBFC8Carry), 0);
3333  SDValue XORI8Node =
3334  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3335  ADDE8Node, S->getI64Imm(1, dl)), 0);
3336  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3337  XORI8Node), 0);
3338  }
3339  case ISD::SETUGE:
3340  // {subc.reg, subc.CA} = (subcarry %a, %b)
3341  // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3342  std::swap(LHS, RHS);
3344  case ISD::SETULE: {
3345  // {subc.reg, subc.CA} = (subcarry %b, %a)
3346  // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3347  SDValue SubtractCarry =
3348  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3349  LHS, RHS), 1);
3350  SDValue ExtSub =
3351  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3352  LHS, SubtractCarry), 0);
3353  return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3354  ExtSub, ExtSub), 0);
3355  }
3356  case ISD::SETUGT:
3357  // {subc.reg, subc.CA} = (subcarry %b, %a)
3358  // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3359  std::swap(LHS, RHS);
3361  case ISD::SETULT: {
3362  // {subc.reg, subc.CA} = (subcarry %a, %b)
3363  // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3364  SDValue SubCarry =
3365  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3366  RHS, LHS), 1);
3367  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3368  LHS, LHS, SubCarry), 0);
3369  }
3370  }
3371 }
3372 
3373 /// Do all uses of this SDValue need the result in a GPR?
3374 /// This is meant to be used on values that have type i1 since
3375 /// it is somewhat meaningless to ask if values of other types
3376 /// should be kept in GPR's.
3377 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3378  assert(Compare.getOpcode() == ISD::SETCC &&
3379  "An ISD::SETCC node required here.");
3380 
3381  // For values that have a single use, the caller should obviously already have
3382  // checked if that use is an extending use. We check the other uses here.
3383  if (Compare.hasOneUse())
3384  return true;
3385  // We want the value in a GPR if it is being extended, used for a select, or
3386  // used in logical operations.
3387  for (auto CompareUse : Compare.getNode()->uses())
3388  if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3389  CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3390  CompareUse->getOpcode() != ISD::SELECT &&
3391  !isLogicOp(CompareUse->getOpcode())) {
3392  OmittedForNonExtendUses++;
3393  return false;
3394  }
3395  return true;
3396 }
3397 
3398 /// Returns an equivalent of a SETCC node but with the result the same width as
3399 /// the inputs. This can nalso be used for SELECT_CC if either the true or false
3400 /// values is a power of two while the other is zero.
3401 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3402  SetccInGPROpts ConvOpts) {
3403  assert((Compare.getOpcode() == ISD::SETCC ||
3404  Compare.getOpcode() == ISD::SELECT_CC) &&
3405  "An ISD::SETCC node required here.");
3406 
3407  // Don't convert this comparison to a GPR sequence because there are uses
3408  // of the i1 result (i.e. uses that require the result in the CR).
3409  if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3410  return SDValue();
3411 
3412  SDValue LHS = Compare.getOperand(0);
3413  SDValue RHS = Compare.getOperand(1);
3414 
3415  // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3416  int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3417  ISD::CondCode CC =
3418  cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3419  EVT InputVT = LHS.getValueType();
3420  if (InputVT != MVT::i32 && InputVT != MVT::i64)
3421  return SDValue();
3422 
3423  if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3424  ConvOpts == SetccInGPROpts::SExtInvert)
3425  CC = ISD::getSetCCInverse(CC, true);
3426 
3427  bool Inputs32Bit = InputVT == MVT::i32;
3428 
3429  SDLoc dl(Compare);
3430  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3431  int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
3432  bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3433  ConvOpts == SetccInGPROpts::SExtInvert;
3434 
3435  if (IsSext && Inputs32Bit)
3436  return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3437  else if (Inputs32Bit)
3438  return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3439  else if (IsSext)
3440  return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3441  return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3442 }
3443 
3444 } // end anonymous namespace
3445 
3446 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3447  if (N->getValueType(0) != MVT::i32 &&
3448  N->getValueType(0) != MVT::i64)
3449  return false;
3450 
3451  // This optimization will emit code that assumes 64-bit registers
3452  // so we don't want to run it in 32-bit mode. Also don't run it
3453  // on functions that are not to be optimized.
3454  if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3455  return false;
3456 
3457  switch (N->getOpcode()) {
3458  default: break;
3459  case ISD::ZERO_EXTEND:
3460  case ISD::SIGN_EXTEND:
3461  case ISD::AND:
3462  case ISD::OR:
3463  case ISD::XOR: {
3464  IntegerCompareEliminator ICmpElim(CurDAG, this);
3465  if (SDNode *New = ICmpElim.Select(N)) {
3466  ReplaceNode(N, New);
3467  return true;
3468  }
3469  }
3470  }
3471  return false;
3472 }
3473 
3474 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3475  if (N->getValueType(0) != MVT::i32 &&
3476  N->getValueType(0) != MVT::i64)
3477  return false;
3478 
3479  if (!UseBitPermRewriter)
3480  return false;
3481 
3482  switch (N->getOpcode()) {
3483  default: break;
3484  case ISD::ROTL:
3485  case ISD::SHL:
3486  case ISD::SRL:
3487  case ISD::AND:
3488  case ISD::OR: {
3489  BitPermutationSelector BPS(CurDAG);
3490  if (SDNode *New = BPS.Select(N)) {
3491  ReplaceNode(N, New);
3492  return true;
3493  }
3494  return false;
3495  }
3496  }
3497 
3498  return false;
3499 }
3500 
3501 /// SelectCC - Select a comparison of the specified values with the specified
3502 /// condition code, returning the CR# of the expression.
3503 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3504  const SDLoc &dl) {
3505  // Always select the LHS.
3506  unsigned Opc;
3507 
3508  if (LHS.getValueType() == MVT::i32) {
3509  unsigned Imm;
3510  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3511  if (isInt32Immediate(RHS, Imm)) {
3512  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3513  if (isUInt<16>(Imm))
3514  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3515  getI32Imm(Imm & 0xFFFF, dl)),
3516  0);
3517  // If this is a 16-bit signed immediate, fold it.
3518  if (isInt<16>((int)Imm))
3519  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3520  getI32Imm(Imm & 0xFFFF, dl)),
3521  0);
3522 
3523  // For non-equality comparisons, the default code would materialize the
3524  // constant, then compare against it, like this:
3525  // lis r2, 4660
3526  // ori r2, r2, 22136
3527  // cmpw cr0, r3, r2
3528  // Since we are just comparing for equality, we can emit this instead:
3529  // xoris r0,r3,0x1234
3530  // cmplwi cr0,r0,0x5678
3531  // beq cr0,L6
3532  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3533  getI32Imm(Imm >> 16, dl)), 0);
3534  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3535  getI32Imm(Imm & 0xFFFF, dl)), 0);
3536  }
3537  Opc = PPC::CMPLW;
3538  } else if (ISD::isUnsignedIntSetCC(CC)) {
3539  if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
3540  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3541  getI32Imm(Imm & 0xFFFF, dl)), 0);
3542  Opc = PPC::CMPLW;
3543  } else {
3544  int16_t SImm;
3545  if (isIntS16Immediate(RHS, SImm))
3546  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3547  getI32Imm((int)SImm & 0xFFFF,
3548  dl)),
3549  0);
3550  Opc = PPC::CMPW;
3551  }
3552  } else if (LHS.getValueType() == MVT::i64) {
3553  uint64_t Imm;
3554  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3555  if (isInt64Immediate(RHS.getNode(), Imm)) {
3556  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3557  if (isUInt<16>(Imm))
3558  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3559  getI32Imm(Imm & 0xFFFF, dl)),
3560  0);
3561  // If this is a 16-bit signed immediate, fold it.
3562  if (isInt<16>(Imm))
3563  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3564  getI32Imm(Imm & 0xFFFF, dl)),
3565  0);
3566 
3567  // For non-equality comparisons, the default code would materialize the
3568  // constant, then compare against it, like this:
3569  // lis r2, 4660
3570  // ori r2, r2, 22136
3571  // cmpd cr0, r3, r2
3572  // Since we are just comparing for equality, we can emit this instead:
3573  // xoris r0,r3,0x1234
3574  // cmpldi cr0,r0,0x5678
3575  // beq cr0,L6
3576  if (isUInt<32>(Imm)) {
3577  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
3578  getI64Imm(Imm >> 16, dl)), 0);
3579  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
3580  getI64Imm(Imm & 0xFFFF, dl)),
3581  0);
3582  }
3583  }
3584  Opc = PPC::CMPLD;
3585  } else if (ISD::isUnsignedIntSetCC(CC)) {
3586  if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
3587  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3588  getI64Imm(Imm & 0xFFFF, dl)), 0);
3589  Opc = PPC::CMPLD;
3590  } else {
3591  int16_t SImm;
3592  if (isIntS16Immediate(RHS, SImm))
3593  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3594  getI64Imm(SImm & 0xFFFF, dl)),
3595  0);
3596  Opc = PPC::CMPD;
3597  }
3598  } else if (LHS.getValueType() == MVT::f32) {
3599  Opc = PPC::FCMPUS;
3600  } else {
3601  assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
3602  Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
3603  }
3604  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
3605 }
3606 
3608  switch (CC) {
3609  case ISD::SETUEQ:
3610  case ISD::SETONE:
3611  case ISD::SETOLE:
3612  case ISD::SETOGE:
3613  llvm_unreachable("Should be lowered by legalize!");
3614  default: llvm_unreachable("Unknown condition!");
3615  case ISD::SETOEQ:
3616  case ISD::SETEQ: return PPC::PRED_EQ;
3617  case ISD::SETUNE:
3618  case ISD::SETNE: return PPC::PRED_NE;
3619  case ISD::SETOLT:
3620  case ISD::SETLT: return PPC::PRED_LT;
3621  case ISD::SETULE:
3622  case ISD::SETLE: return PPC::PRED_LE;
3623  case ISD::SETOGT:
3624  case ISD::SETGT: return PPC::PRED_GT;
3625  case ISD::SETUGE:
3626  case ISD::SETGE: return PPC::PRED_GE;
3627  case ISD::SETO: return PPC::PRED_NU;
3628  case ISD::SETUO: return PPC::PRED_UN;
3629  // These two are invalid for floating point. Assume we have int.
3630  case ISD::SETULT: return PPC::PRED_LT;
3631  case ISD::SETUGT: return PPC::PRED_GT;
3632  }
3633 }
3634 
3635 /// getCRIdxForSetCC - Return the index of the condition register field
3636 /// associated with the SetCC condition, and whether or not the field is
3637 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
3638 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
3639  Invert = false;
3640  switch (CC) {
3641  default: llvm_unreachable("Unknown condition!");
3642  case ISD::SETOLT:
3643  case ISD::SETLT: return 0; // Bit #0 = SETOLT
3644  case ISD::SETOGT:
3645  case ISD::SETGT: return 1; // Bit #1 = SETOGT
3646  case ISD::SETOEQ:
3647  case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
3648  case ISD::SETUO: return 3; // Bit #3 = SETUO
3649  case ISD::SETUGE:
3650  case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
3651  case ISD::SETULE:
3652  case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
3653  case ISD::SETUNE:
3654  case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
3655  case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
3656  case ISD::SETUEQ:
3657  case ISD::SETOGE:
3658  case ISD::SETOLE:
3659  case ISD::SETONE:
3660  llvm_unreachable("Invalid branch code: should be expanded by legalize");
3661  // These are invalid for floating point. Assume integer.
3662  case ISD::SETULT: return 0;
3663  case ISD::SETUGT: return 1;
3664  }
3665 }
3666 
3667 // getVCmpInst: return the vector compare instruction for the specified
3668 // vector type and condition code. Since this is for altivec specific code,
3669 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3670 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
3671  bool HasVSX, bool &Swap, bool &Negate) {
3672  Swap = false;
3673  Negate = false;
3674 
3675  if (VecVT.isFloatingPoint()) {
3676  /* Handle some cases by swapping input operands. */
3677  switch (CC) {
3678  case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
3679  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3680  case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
3681  case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
3682  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3683  case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
3684  default: break;
3685  }
3686  /* Handle some cases by negating the result. */
3687  switch (CC) {
3688  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3689  case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
3690  case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
3691  case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
3692  default: break;
3693  }
3694  /* We have instructions implementing the remaining cases. */
3695  switch (CC) {
3696  case ISD::SETEQ:
3697  case ISD::SETOEQ:
3698  if (VecVT == MVT::v4f32)
3699  return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
3700  else if (VecVT == MVT::v2f64)
3701  return PPC::XVCMPEQDP;
3702  break;
3703  case ISD::SETGT:
3704  case ISD::SETOGT:
3705  if (VecVT == MVT::v4f32)
3706  return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
3707  else if (VecVT == MVT::v2f64)
3708  return PPC::XVCMPGTDP;
3709  break;
3710  case ISD::SETGE:
3711  case ISD::SETOGE:
3712  if (VecVT == MVT::v4f32)
3713  return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
3714  else if (VecVT == MVT::v2f64)
3715  return PPC::XVCMPGEDP;
3716  break;
3717  default:
3718  break;
3719  }
3720  llvm_unreachable("Invalid floating-point vector compare condition");
3721  } else {
3722  /* Handle some cases by swapping input operands. */
3723  switch (CC) {
3724  case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
3725  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3726  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3727  case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
3728  default: break;
3729  }
3730  /* Handle some cases by negating the result. */
3731  switch (CC) {
3732  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3733  case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
3734  case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
3735  case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
3736  default: break;
3737  }
3738  /* We have instructions implementing the remaining cases. */
3739  switch (CC) {
3740  case ISD::SETEQ:
3741  case ISD::SETUEQ:
3742  if (VecVT == MVT::v16i8)
3743  return PPC::VCMPEQUB;
3744  else if (VecVT == MVT::v8i16)
3745  return PPC::VCMPEQUH;
3746  else if (VecVT == MVT::v4i32)
3747  return PPC::VCMPEQUW;
3748  else if (VecVT == MVT::v2i64)
3749  return PPC::VCMPEQUD;
3750  break;
3751  case ISD::SETGT:
3752  if (VecVT == MVT::v16i8)
3753  return PPC::VCMPGTSB;
3754  else if (VecVT == MVT::v8i16)
3755  return PPC::VCMPGTSH;
3756  else if (VecVT == MVT::v4i32)
3757  return PPC::VCMPGTSW;
3758  else if (VecVT == MVT::v2i64)
3759  return PPC::VCMPGTSD;
3760  break;
3761  case ISD::SETUGT:
3762  if (VecVT == MVT::v16i8)
3763  return PPC::VCMPGTUB;
3764  else if (VecVT == MVT::v8i16)
3765  return PPC::VCMPGTUH;
3766  else if (VecVT == MVT::v4i32)
3767  return PPC::VCMPGTUW;
3768  else if (VecVT == MVT::v2i64)
3769  return PPC::VCMPGTUD;
3770  break;
3771  default:
3772  break;
3773  }
3774  llvm_unreachable("Invalid integer vector compare condition");
3775  }
3776 }
3777 
3778 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
3779  SDLoc dl(N);
3780  unsigned Imm;
3781  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3782  EVT PtrVT =
3783  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
3784  bool isPPC64 = (PtrVT == MVT::i64);
3785 
3786  if (!PPCSubTarget->useCRBits() &&
3787  isInt32Immediate(N->getOperand(1), Imm)) {
3788  // We can codegen setcc op, imm very efficiently compared to a brcond.
3789  // Check for those cases here.
3790  // setcc op, 0
3791  if (Imm == 0) {
3792  SDValue Op = N->getOperand(0);
3793  switch (CC) {
3794  default: break;
3795  case ISD::SETEQ: {
3796  Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
3797  SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
3798  getI32Imm(31, dl) };
3799  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3800  return true;
3801  }
3802  case ISD::SETNE: {
3803  if (isPPC64) break;
3804  SDValue AD =
3805  SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
3806  Op, getI32Imm(~0U, dl)), 0);
3807  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
3808  return true;
3809  }
3810  case ISD::SETLT: {
3811  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
3812  getI32Imm(31, dl) };
3813  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3814  return true;
3815  }
3816  case ISD::SETGT: {
3817  SDValue T =
3818  SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
3819  T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
3820  SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
3821  getI32Imm(31, dl) };
3822  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3823  return true;
3824  }
3825  }
3826  } else if (Imm == ~0U) { // setcc op, -1
3827  SDValue Op = N->getOperand(0);
3828  switch (CC) {
3829  default: break;
3830  case ISD::SETEQ:
3831  if (isPPC64) break;
3832  Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
3833  Op, getI32Imm(1, dl)), 0);
3834  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
3835  SDValue(CurDAG->getMachineNode(PPC::LI, dl,
3836  MVT::i32,
3837  getI32Imm(0, dl)),
3838  0), Op.getValue(1));
3839  return true;
3840  case ISD::SETNE: {
3841  if (isPPC64) break;
3842  Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
3843  SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
3844  Op, getI32Imm(~0U, dl));
3845  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
3846  SDValue(AD, 1));
3847  return true;
3848  }
3849  case ISD::SETLT: {
3850  SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
3851  getI32Imm(1, dl)), 0);
3852  SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
3853  Op), 0);
3854  SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
3855  getI32Imm(31, dl) };
3856  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3857  return true;
3858  }
3859  case ISD::SETGT: {
3860  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
3861  getI32Imm(31, dl) };
3862  Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
3863  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
3864  return true;
3865  }
3866  }
3867  }
3868  }
3869 
3870  SDValue LHS = N->getOperand(0);
3871  SDValue RHS = N->getOperand(1);
3872 
3873  // Altivec Vector compare instructions do not set any CR register by default and
3874  // vector compare operations return the same type as the operands.
3875  if (LHS.getValueType().isVector()) {
3876  if (PPCSubTarget->hasQPX())
3877  return false;
3878 
3879  EVT VecVT = LHS.getValueType();
3880  bool Swap, Negate;
3881  unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
3882  PPCSubTarget->hasVSX(), Swap, Negate);
3883  if (Swap)
3884  std::swap(LHS, RHS);
3885 
3886  EVT ResVT = VecVT.changeVectorElementTypeToInteger();
3887  if (Negate) {
3888  SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
3889  CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
3890  ResVT, VCmp, VCmp);
3891  return true;
3892  }
3893 
3894  CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
3895  return true;
3896  }
3897 
3898  if (PPCSubTarget->useCRBits())
3899  return false;
3900 
3901  bool Inv;
3902  unsigned Idx = getCRIdxForSetCC(CC, Inv);
3903  SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
3904  SDValue IntCR;
3905 
3906  // Force the ccreg into CR7.
3907  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
3908 
3909  SDValue InFlag(nullptr, 0); // Null incoming flag value.
3910  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
3911  InFlag).getValue(1);
3912 
3913  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
3914  CCReg), 0);
3915 
3916  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
3917  getI32Imm(31, dl), getI32Imm(31, dl) };
3918  if (!Inv) {
3919  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3920  return true;
3921  }
3922 
3923  // Get the specified bit.
3924  SDValue Tmp =
3925  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
3926  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
3927  return true;
3928 }
3929 
3930 /// Does this node represent a load/store node whose address can be represented
3931 /// with a register plus an immediate that's a multiple of \p Val:
3932 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
3933  LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
3934  StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
3935  SDValue AddrOp;
3936  if (LDN)
3937  AddrOp = LDN->getOperand(1);
3938  else if (STN)
3939  AddrOp = STN->getOperand(2);
3940 
3941  // If the address points a frame object or a frame object with an offset,
3942  // we need to check the object alignment.
3943  short Imm = 0;
3944  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
3945  AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
3946  AddrOp)) {
3947  // If op0 is a frame index that is under aligned, we can't do it either,
3948  // because it is translated to r31 or r1 + slot + offset. We won't know the
3949  // slot number until the stack frame is finalized.
3950  const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
3951  unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
3952  if ((SlotAlign % Val) != 0)
3953  return false;
3954 
3955  // If we have an offset, we need further check on the offset.
3956  if (AddrOp.getOpcode() != ISD::ADD)
3957  return true;
3958  }
3959 
3960  if (AddrOp.getOpcode() == ISD::ADD)
3961  return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
3962 
3963  // If the address comes from the outside, the offset will be zero.
3964  return AddrOp.getOpcode() == ISD::CopyFromReg;
3965 }
3966 
3967 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
3968  // Transfer memoperands.
3970  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
3971  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
3972 }
3973 
3974 /// This method returns a node after flipping the MSB of each element
3975 /// of vector integer type. Additionally, if SignBitVec is non-null,
3976 /// this method sets a node with one at MSB of all elements
3977 /// and zero at other bits in SignBitVec.
3978 MachineSDNode *
3979 PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) {
3980  SDLoc dl(N);
3981  EVT VecVT = N.getValueType();
3982  if (VecVT == MVT::v4i32) {
3983  if (SignBitVec) {
3984  SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32);
3985  *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT,
3986  SDValue(ZV, 0));
3987  }
3988  return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N);
3989  }
3990  else if (VecVT == MVT::v8i16) {
3991  SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32,
3992  getI32Imm(0x8000, dl));
3993  SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32,
3994  SDValue(Hi, 0),
3995  getI32Imm(0x8000, dl));
3996  SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT,
3997  SDValue(ScaImm, 0));
3998  /*
3999  Alternatively, we can do this as follow to use VRF instead of GPR.
4000  vspltish 5, 1
4001  vspltish 6, 15
4002  vslh 5, 6, 5
4003  */
4004  if (SignBitVec) *SignBitVec = VecImm;
4005  return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N,
4006  SDValue(VecImm, 0));
4007  }
4008  else if (VecVT == MVT::v16i8) {
4009  SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32,
4010  getI32Imm(0x80, dl));
4011  if (SignBitVec) *SignBitVec = VecImm;
4012  return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N,
4013  SDValue(VecImm, 0));
4014  }
4015  else
4016  llvm_unreachable("Unsupported vector data type for flipSignBit");
4017 }
4018 
4019 // Select - Convert the specified operand from a target-independent to a
4020 // target-specific node if it hasn't already been changed.
4022  SDLoc dl(N);
4023  if (N->isMachineOpcode()) {
4024  N->setNodeId(-1);
4025  return; // Already selected.
4026  }
4027 
4028  // In case any misguided DAG-level optimizations form an ADD with a
4029  // TargetConstant operand, crash here instead of miscompiling (by selecting
4030  // an r+r add instead of some kind of r+i add).
4031  if (N->getOpcode() == ISD::ADD &&
4033  llvm_unreachable("Invalid ADD with TargetConstant operand");
4034 
4035  // Try matching complex bit permutations before doing anything else.
4036  if (tryBitPermutation(N))
4037  return;
4038 
4039  // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4040  if (tryIntCompareInGPR(N))
4041  return;
4042 
4043  switch (N->getOpcode()) {
4044  default: break;
4045 
4046  case ISD::Constant:
4047  if (N->getValueType(0) == MVT::i64) {
4048  ReplaceNode(N, selectI64Imm(CurDAG, N));
4049  return;
4050  }
4051  break;
4052 
4053  case ISD::SETCC:
4054  if (trySETCC(N))
4055  return;
4056  break;
4057 
4058  case PPCISD::CALL: {
4059  const Module *M = MF->getFunction().getParent();
4060 
4061  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4062  !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
4064  break;
4065 
4066  SDValue Op = N->getOperand(1);
4067 
4068  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4069  if (GA->getTargetFlags() == PPCII::MO_PLT)
4070  getGlobalBaseReg();
4071  }
4072  else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
4073  if (ES->getTargetFlags() == PPCII::MO_PLT)
4074  getGlobalBaseReg();
4075  }
4076  }
4077  break;
4078 
4079  case PPCISD::GlobalBaseReg:
4080  ReplaceNode(N, getGlobalBaseReg());
4081  return;
4082 
4083  case ISD::FrameIndex:
4084  selectFrameIndex(N, N);
4085  return;
4086 
4087  case PPCISD::MFOCRF: {
4088  SDValue InFlag = N->getOperand(1);
4089  ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
4090  N->getOperand(0), InFlag));
4091  return;
4092  }
4093 
4095  ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
4096  MVT::Other, N->getOperand(0)));
4097  return;
4098 
4099  case PPCISD::SRA_ADDZE: {
4100  SDValue N0 = N->getOperand(0);
4101  SDValue ShiftAmt =
4102  CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
4103  getConstantIntValue(), dl,
4104  N->getValueType(0));
4105  if (N->getValueType(0) == MVT::i64) {
4106  SDNode *Op =
4107  CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
4108  N0, ShiftAmt);
4109  CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
4110  SDValue(Op, 1));
4111  return;
4112  } else {
4113  assert(N->getValueType(0) == MVT::i32 &&
4114  "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4115  SDNode *Op =
4116  CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
4117  N0, ShiftAmt);
4118  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
4119  SDValue(Op, 1));
4120  return;
4121  }
4122  }
4123 
4124  case ISD::STORE: {
4125  // Change TLS initial-exec D-form stores to X-form stores.
4126  StoreSDNode *ST = cast<StoreSDNode>(N);
4127  if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
4129  if (tryTLSXFormStore(ST))
4130  return;
4131  break;
4132  }
4133  case ISD::LOAD: {
4134  // Handle preincrement loads.
4135  LoadSDNode *LD = cast<LoadSDNode>(N);
4136  EVT LoadedVT = LD->getMemoryVT();
4137 
4138  // Normal loads are handled by code generated from the .td file.
4139  if (LD->getAddressingMode() != ISD::PRE_INC) {
4140  // Change TLS initial-exec D-form loads to X-form loads.
4141  if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
4142  if (tryTLSXFormLoad(LD))
4143  return;
4144  break;
4145  }
4146 
4147  SDValue Offset = LD->getOffset();
4148  if (Offset.getOpcode() == ISD::TargetConstant ||
4149  Offset.getOpcode() == ISD::TargetGlobalAddress) {
4150 
4151  unsigned Opcode;
4152  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4153  if (LD->getValueType(0) != MVT::i64) {
4154  // Handle PPC32 integer and normal FP loads.
4155  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4156  switch (LoadedVT.getSimpleVT().SimpleTy) {
4157  default: llvm_unreachable("Invalid PPC load type!");
4158  case MVT::f64: Opcode = PPC::LFDU; break;
4159  case MVT::f32: Opcode = PPC::LFSU; break;
4160  case MVT::i32: Opcode = PPC::LWZU; break;
4161  case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
4162  case MVT::i1:
4163  case MVT::i8: Opcode = PPC::LBZU; break;
4164  }
4165  } else {
4166  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4167  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4168  switch (LoadedVT.getSimpleVT().SimpleTy) {
4169  default: llvm_unreachable("Invalid PPC load type!");
4170  case MVT::i64: Opcode = PPC::LDU; break;
4171  case MVT::i32: Opcode = PPC::LWZU8; break;
4172  case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
4173  case MVT::i1:
4174  case MVT::i8: Opcode = PPC::LBZU8; break;
4175  }
4176  }
4177 
4178  SDValue Chain = LD->getChain();
4179  SDValue Base = LD->getBasePtr();
4180  SDValue Ops[] = { Offset, Base, Chain };
4181  SDNode *MN = CurDAG->getMachineNode(
4182  Opcode, dl, LD->getValueType(0),
4183  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4184  transferMemOperands(N, MN);
4185  ReplaceNode(N, MN);
4186  return;
4187  } else {
4188  unsigned Opcode;
4189  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4190  if (LD->getValueType(0) != MVT::i64) {
4191  // Handle PPC32 integer and normal FP loads.
4192  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4193  switch (LoadedVT.getSimpleVT().SimpleTy) {
4194  default: llvm_unreachable("Invalid PPC load type!");
4195  case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
4196  case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
4197  case MVT::f64: Opcode = PPC::LFDUX; break;
4198  case MVT::f32: Opcode = PPC::LFSUX; break;
4199  case MVT::i32: Opcode = PPC::LWZUX; break;
4200  case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
4201  case MVT::i1:
4202  case MVT::i8: Opcode = PPC::LBZUX; break;
4203  }
4204  } else {
4205  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4206  assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
4207  "Invalid sext update load");
4208  switch (LoadedVT.getSimpleVT().SimpleTy) {
4209  default: llvm_unreachable("Invalid PPC load type!");
4210  case MVT::i64: Opcode = PPC::LDUX; break;
4211  case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
4212  case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
4213  case MVT::i1:
4214  case MVT::i8: Opcode = PPC::LBZUX8; break;
4215  }
4216  }
4217 
4218  SDValue Chain = LD->getChain();
4219  SDValue Base = LD->getBasePtr();
4220  SDValue Ops[] = { Base, Offset, Chain };
4221  SDNode *MN = CurDAG->getMachineNode(
4222  Opcode, dl, LD->getValueType(0),
4223  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4224  transferMemOperands(N, MN);
4225  ReplaceNode(N, MN);
4226  return;
4227  }
4228  }
4229 
4230  case ISD::AND: {
4231  unsigned Imm, Imm2, SH, MB, ME;
4232  uint64_t Imm64;
4233 
4234  // If this is an and of a value rotated between 0 and 31 bits and then and'd
4235  // with a mask, emit rlwinm
4236  if (isInt32Immediate(N->getOperand(1), Imm) &&
4237  isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
4238  SDValue Val = N->getOperand(0).getOperand(0);
4239  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4240  getI32Imm(ME, dl) };
4241  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4242  return;
4243  }
4244  // If this is just a masked value where the input is not handled above, and
4245  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4246  if (isInt32Immediate(N->getOperand(1), Imm) &&
4247  isRunOfOnes(Imm, MB, ME) &&
4248  N->getOperand(0).getOpcode() != ISD::ROTL) {
4249  SDValue Val = N->getOperand(0);
4250  SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4251  getI32Imm(ME, dl) };
4252  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4253  return;
4254  }
4255  // If this is a 64-bit zero-extension mask, emit rldicl.
4256  if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4257  isMask_64(Imm64)) {
4258  SDValue Val = N->getOperand(0);
4259  MB = 64 - countTrailingOnes(Imm64);
4260  SH = 0;
4261 
4262  if (Val.getOpcode() == ISD::ANY_EXTEND) {
4263  auto Op0 = Val.getOperand(0);
4264  if ( Op0.getOpcode() == ISD::SRL &&
4265  isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
4266 
4267  auto ResultType = Val.getNode()->getValueType(0);
4268  auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
4269  ResultType);
4270  SDValue IDVal (ImDef, 0);
4271 
4272  Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
4273  ResultType, IDVal, Op0.getOperand(0),
4274  getI32Imm(1, dl)), 0);
4275  SH = 64 - Imm;
4276  }
4277  }
4278 
4279  // If the operand is a logical right shift, we can fold it into this
4280  // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4281  // for n <= mb. The right shift is really a left rotate followed by a
4282  // mask, and this mask is a more-restrictive sub-mask of the mask implied
4283  // by the shift.
4284  if (Val.getOpcode() == ISD::SRL &&
4285  isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
4286  assert(Imm < 64 && "Illegal shift amount");
4287  Val = Val.getOperand(0);
4288  SH = 64 - Imm;
4289  }
4290 
4291  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4292  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4293  return;
4294  }
4295  // If this is a negated 64-bit zero-extension mask,
4296  // i.e. the immediate is a sequence of ones from most significant side
4297  // and all zero for reminder, we should use rldicr.
4298  if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4299  isMask_64(~Imm64)) {
4300  SDValue Val = N->getOperand(0);
4301  MB = 63 - countTrailingOnes(~Imm64);
4302  SH = 0;
4303  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4304  CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4305  return;
4306  }
4307 
4308  // AND X, 0 -> 0, not "rlwinm 32".
4309  if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
4310  ReplaceUses(SDValue(N, 0), N->getOperand(1));
4311  return;
4312  }
4313  // ISD::OR doesn't get all the bitfield insertion fun.
4314  // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4315  // bitfield insert.
4316  if (isInt32Immediate(N->getOperand(1), Imm) &&
4317  N->getOperand(0).getOpcode() == ISD::OR &&
4318  isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
4319  // The idea here is to check whether this is equivalent to:
4320  // (c1 & m) | (x & ~m)
4321  // where m is a run-of-ones mask. The logic here is that, for each bit in
4322  // c1 and c2:
4323  // - if both are 1, then the output will be 1.
4324  // - if both are 0, then the output will be 0.
4325  // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4326  // come from x.
4327  // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4328  // be 0.
4329  // If that last condition is never the case, then we can form m from the
4330  // bits that are the same between c1 and c2.
4331  unsigned MB, ME;
4332  if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
4333  SDValue Ops[] = { N->getOperand(0).getOperand(0),
4334  N->getOperand(0).getOperand(1),
4335  getI32Imm(0, dl), getI32Imm(MB, dl),
4336  getI32Imm(ME, dl) };
4337  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4338  return;
4339  }
4340  }
4341 
4342  // Other cases are autogenerated.
4343  break;
4344  }
4345  case ISD::OR: {
4346  if (N->getValueType(0) == MVT::i32)
4347  if (tryBitfieldInsert(N))
4348  return;
4349 
4350  int16_t Imm;
4351  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4352  isIntS16Immediate(N->getOperand(1), Imm)) {
4353  KnownBits LHSKnown;
4354  CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
4355 
4356  // If this is equivalent to an add, then we can fold it with the
4357  // FrameIndex calculation.
4358  if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
4359  selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4360  return;
4361  }
4362  }
4363 
4364  // OR with a 32-bit immediate can be handled by ori + oris
4365  // without creating an immediate in a GPR.
4366  uint64_t Imm64 = 0;
4367  bool IsPPC64 = PPCSubTarget->isPPC64();
4368  if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4369  (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4370  // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4371  uint64_t ImmHi = Imm64 >> 16;
4372  uint64_t ImmLo = Imm64 & 0xFFFF;
4373  if (ImmHi != 0 && ImmLo != 0) {
4374  SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
4375  N->getOperand(0),
4376  getI16Imm(ImmLo, dl));
4377  SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4378  CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
4379  return;
4380  }
4381  }
4382 
4383  // Other cases are autogenerated.
4384  break;
4385  }
4386  case ISD::XOR: {
4387  // XOR with a 32-bit immediate can be handled by xori + xoris
4388  // without creating an immediate in a GPR.
4389  uint64_t Imm64 = 0;
4390  bool IsPPC64 = PPCSubTarget->isPPC64();
4391  if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4392  (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4393  // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4394  uint64_t ImmHi = Imm64 >> 16;
4395  uint64_t ImmLo = Imm64 & 0xFFFF;
4396  if (ImmHi != 0 && ImmLo != 0) {
4397  SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
4398  N->getOperand(0),
4399  getI16Imm(ImmLo, dl));
4400  SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4401  CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
4402  return;
4403  }
4404  }
4405 
4406  break;
4407  }
4408  case ISD::ADD: {
4409  int16_t Imm;
4410  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4411  isIntS16Immediate(N->getOperand(1), Imm)) {
4412  selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4413  return;
4414  }
4415 
4416  break;
4417  }
4418  case ISD::SHL: {
4419  unsigned Imm, SH, MB, ME;
4420  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4421  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4422  SDValue Ops[] = { N->getOperand(0).getOperand(0),
4423  getI32Imm(SH, dl), getI32Imm(MB, dl),
4424  getI32Imm(ME, dl) };
4425  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4426  return;
4427  }
4428 
4429  // Other cases are autogenerated.
4430  break;
4431  }
4432  case ISD::SRL: {
4433  unsigned Imm, SH, MB, ME;
4434  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4435  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4436  SDValue Ops[] = { N->getOperand(0).getOperand(0),
4437  getI32Imm(SH, dl), getI32Imm(MB, dl),
4438  getI32Imm(ME, dl) };
4439  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4440  return;
4441  }
4442 
4443  // Other cases are autogenerated.
4444  break;
4445  }
4446  // FIXME: Remove this once the ANDI glue bug is fixed:
4448  case PPCISD::ANDIo_1_GT_BIT: {
4449  if (!ANDIGlueBug)
4450  break;
4451 
4452  EVT InVT = N->getOperand(0).getValueType();
4453  assert((InVT == MVT::i64 || InVT == MVT::i32) &&
4454  "Invalid input type for ANDIo_1_EQ_BIT");
4455 
4456  unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
4457  SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
4458  N->getOperand(0),
4459  CurDAG->getTargetConstant(1, dl, InVT)),
4460  0);
4461  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
4462  SDValue SRIdxVal =
4463  CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
4464  PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
4465 
4466  CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
4467  SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
4468  return;
4469  }
4470  case ISD::SELECT_CC: {
4471  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4472  EVT PtrVT =
4473  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4474  bool isPPC64 = (PtrVT == MVT::i64);
4475 
4476  // If this is a select of i1 operands, we'll pattern match it.
4477  if (PPCSubTarget->useCRBits() &&
4478  N->getOperand(0).getValueType() == MVT::i1)
4479  break;
4480 
4481  // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
4482  if (!isPPC64)
4483  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
4484  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
4485  if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
4486  if (N1C->isNullValue() && N3C->isNullValue() &&
4487  N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
4488  // FIXME: Implement this optzn for PPC64.
4489  N->getValueType(0) == MVT::i32) {
4490  SDNode *Tmp =
4491  CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4492  N->getOperand(0), getI32Imm(~0U, dl));
4493  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
4494  N->getOperand(0), SDValue(Tmp, 1));
4495  return;
4496  }
4497 
4498  SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
4499 
4500  if (N->getValueType(0) == MVT::i1) {
4501  // An i1 select is: (c & t) | (!c & f).
4502  bool Inv;
4503  unsigned Idx = getCRIdxForSetCC(CC, Inv);
4504 
4505  unsigned SRI;
4506  switch (Idx) {
4507  default: llvm_unreachable("Invalid CC index");
4508  case 0: SRI = PPC::sub_lt; break;
4509  case 1: SRI = PPC::sub_gt; break;
4510  case 2: SRI = PPC::sub_eq; break;
4511  case 3: SRI = PPC::sub_un; break;
4512  }
4513 
4514  SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
4515 
4516  SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
4517  CCBit, CCBit), 0);
4518  SDValue C = Inv ? NotCCBit : CCBit,
4519  NotC = Inv ? CCBit : NotCCBit;
4520 
4521  SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4522  C, N->getOperand(2)), 0);
4523  SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4524  NotC, N->getOperand(3)), 0);
4525 
4526  CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
4527  return;
4528  }
4529 
4530  unsigned BROpc = getPredicateForSetCC(CC);
4531 
4532  unsigned SelectCCOp;
4533  if (N->getValueType(0) == MVT::i32)
4534  SelectCCOp = PPC::SELECT_CC_I4;
4535  else if (N->getValueType(0) == MVT::i64)
4536  SelectCCOp = PPC::SELECT_CC_I8;
4537  else if (N->getValueType(0) == MVT::f32)
4538  if (PPCSubTarget->hasP8Vector())
4539  SelectCCOp = PPC::SELECT_CC_VSSRC;
4540  else
4541  SelectCCOp = PPC::SELECT_CC_F4;
4542  else if (N->getValueType(0) == MVT::f64)
4543  if (PPCSubTarget->hasVSX())
4544  SelectCCOp = PPC::SELECT_CC_VSFRC;
4545  else
4546  SelectCCOp = PPC::SELECT_CC_F8;
4547  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
4548  SelectCCOp = PPC::SELECT_CC_QFRC;
4549  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
4550  SelectCCOp = PPC::SELECT_CC_QSRC;
4551  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
4552  SelectCCOp = PPC::SELECT_CC_QBRC;
4553  else if (N->getValueType(0) == MVT::v2f64 ||
4554  N->getValueType(0) == MVT::v2i64)
4555  SelectCCOp = PPC::SELECT_CC_VSRC;
4556  else
4557  SelectCCOp = PPC::SELECT_CC_VRRC;
4558 
4559  SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
4560  getI32Imm(BROpc, dl) };
4561  CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
4562  return;
4563  }
4564  case ISD::VSELECT:
4565  if (PPCSubTarget->hasVSX()) {
4566  SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
4567  CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
4568  return;
4569  }
4570  break;
4571 
4572  case ISD::VECTOR_SHUFFLE:
4573  if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
4574  N->getValueType(0) == MVT::v2i64)) {
4575  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
4576 
4577  SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
4578  Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
4579  unsigned DM[2];
4580 
4581  for (int i = 0; i < 2; ++i)
4582  if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
4583  DM[i] = 0;
4584  else
4585  DM[i] = 1;
4586 
4587  if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
4588  Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
4589  isa<LoadSDNode>(Op1.getOperand(0))) {
4590  LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
4591  SDValue Base, Offset;
4592 
4593  if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
4594  (LD->getMemoryVT() == MVT::f64 ||
4595  LD->getMemoryVT() == MVT::i64) &&
4596  SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
4597  SDValue Chain = LD->getChain();
4598  SDValue Ops[] = { Base, Offset, Chain };
4600  MemOp[0] = LD->getMemOperand();
4601  SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
4602  N->getValueType(0), Ops);
4603  cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
4604  return;
4605  }
4606  }
4607 
4608  // For little endian, we must swap the input operands and adjust
4609  // the mask elements (reverse and invert them).
4610  if (PPCSubTarget->isLittleEndian()) {
4611  std::swap(Op1, Op2);
4612  unsigned tmp = DM[0];
4613  DM[0] = 1 - DM[1];
4614  DM[1] = 1 - tmp;
4615  }
4616 
4617  SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
4618  MVT::i32);
4619  SDValue Ops[] = { Op1, Op2, DMV };
4620  CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
4621  return;
4622  }
4623 
4624  break;
4625  case PPCISD::BDNZ:
4626  case PPCISD::BDZ: {
4627  bool IsPPC64 = PPCSubTarget->isPPC64();
4628  SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
4629  CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
4630  ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
4631  : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
4632  MVT::Other, Ops);
4633  return;
4634  }
4635  case PPCISD::COND_BRANCH: {
4636  // Op #0 is the Chain.
4637  // Op #1 is the PPC::PRED_* number.
4638  // Op #2 is the CR#
4639  // Op #3 is the Dest MBB
4640  // Op #4 is the Flag.
4641  // Prevent PPC::PRED_* from being selected into LI.
4642  unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4643  if (EnableBranchHint)
4644  PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
4645 
4646  SDValue Pred = getI32Imm(PCC, dl);
4647  SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
4648  N->getOperand(0), N->getOperand(4) };
4649  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4650  return;
4651  }
4652  case ISD::BR_CC: {
4653  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4654  unsigned PCC = getPredicateForSetCC(CC);
4655 
4656  if (N->getOperand(2).getValueType() == MVT::i1) {
4657  unsigned Opc;
4658  bool Swap;
4659  switch (PCC) {
4660  default: llvm_unreachable("Unexpected Boolean-operand predicate");
4661  case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
4662  case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
4663  case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
4664  case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
4665  case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
4666  case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
4667  }
4668 
4669  SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
4670  N->getOperand(Swap ? 3 : 2),
4671  N->getOperand(Swap ? 2 : 3)), 0);
4672  CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
4673  N->getOperand(0));
4674  return;
4675  }
4676 
4677  if (EnableBranchHint)
4678  PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
4679 
4680  SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
4681  SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
4682  N->getOperand(4), N->getOperand(0) };
4683  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4684  return;
4685  }
4686  case ISD::BRIND: {
4687  // FIXME: Should custom lower this.
4688  SDValue Chain = N->getOperand(0);
4689  SDValue Target = N->getOperand(1);
4690  unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
4691  unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
4692  Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
4693  Chain), 0);
4694  CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
4695  return;
4696  }
4697  case PPCISD::TOC_ENTRY: {
4698  assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
4699  "Only supported for 64-bit ABI and 32-bit SVR4");
4700  if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
4701  SDValue GA = N->getOperand(0);
4702  SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
4703  N->getOperand(1));
4704  transferMemOperands(N, MN);
4705  ReplaceNode(N, MN);
4706  return;
4707  }
4708 
4709  // For medium and large code model, we generate two instructions as
4710  // described below. Otherwise we allow SelectCodeCommon to handle this,
4711  // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
4712  CodeModel::Model CModel = TM.getCodeModel();
4713  if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
4714  break;
4715 
4716  // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
4717  // If it must be toc-referenced according to PPCSubTarget, we generate:
4718  // LDtocL(@sym, ADDIStocHA(%x2, @sym))
4719  // Otherwise we generate:
4720  // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
4721  SDValue GA = N->getOperand(0);
4722  SDValue TOCbase = N->getOperand(1);
4723  SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
4724  TOCbase, GA);
4725 
4726  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
4727  CModel == CodeModel::Large) {
4728  SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
4729  SDValue(Tmp, 0));
4730  transferMemOperands(N, MN);
4731  ReplaceNode(N, MN);
4732  return;
4733  }
4734 
4735  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
4736  const GlobalValue *GV = G->getGlobal();
4737  unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
4738  if (GVFlags & PPCII::MO_NLP_FLAG) {
4739  SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
4740  SDValue(Tmp, 0));
4741  transferMemOperands(N, MN);
4742  ReplaceNode(N, MN);
4743  return;
4744  }
4745  }
4746 
4747  ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
4748  SDValue(Tmp, 0), GA));
4749  return;
4750  }
4751  case PPCISD::PPC32_PICGOT:
4752  // Generate a PIC-safe GOT reference.
4753  assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
4754  "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
4755  CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
4756  PPCLowering->getPointerTy(CurDAG->getDataLayout()),
4757  MVT::i32);
4758  return;
4759 
4760  case PPCISD::VADD_SPLAT: {
4761  // This expands into one of three sequences, depending on whether
4762  // the first operand is odd or even, positive or negative.
4763  assert(isa<ConstantSDNode>(N->getOperand(0)) &&
4764  isa<ConstantSDNode>(N->getOperand(1)) &&
4765  "Invalid operand on VADD_SPLAT!");
4766 
4767  int Elt = N->getConstantOperandVal(0);
4768  int EltSize = N->getConstantOperandVal(1);
4769  unsigned Opc1, Opc2, Opc3;
4770  EVT VT;
4771 
4772  if (EltSize == 1) {
4773  Opc1 = PPC::VSPLTISB;
4774  Opc2 = PPC::VADDUBM;
4775  Opc3 = PPC::VSUBUBM;
4776  VT = MVT::v16i8;
4777  } else if (EltSize == 2) {
4778  Opc1 = PPC::VSPLTISH;
4779  Opc2 = PPC::VADDUHM;
4780  Opc3 = PPC::VSUBUHM;
4781  VT = MVT::v8i16;
4782  } else {
4783  assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
4784  Opc1 = PPC::VSPLTISW;
4785  Opc2 = PPC::VADDUWM;
4786  Opc3 = PPC::VSUBUWM;
4787  VT = MVT::v4i32;
4788  }
4789 
4790  if ((Elt & 1) == 0) {
4791  // Elt is even, in the range [-32,-18] + [16,30].
4792  //
4793  // Convert: VADD_SPLAT elt, size
4794  // Into: tmp = VSPLTIS[BHW] elt
4795  // VADDU[BHW]M tmp, tmp
4796  // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
4797  SDValue EltVal = getI32Imm(Elt >> 1, dl);
4798  SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4799  SDValue TmpVal = SDValue(Tmp, 0);
4800  ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
4801  return;
4802  } else if (Elt > 0) {
4803  // Elt is odd and positive, in the range [17,31].
4804  //
4805  // Convert: VADD_SPLAT elt, size
4806  // Into: tmp1 = VSPLTIS[BHW] elt-16
4807  // tmp2 = VSPLTIS[BHW] -16
4808  // VSUBU[BHW]M tmp1, tmp2
4809  SDValue EltVal = getI32Imm(Elt - 16, dl);
4810  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4811  EltVal = getI32Imm(-16, dl);
4812  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4813  ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
4814  SDValue(Tmp2, 0)));
4815  return;
4816  } else {
4817  // Elt is odd and negative, in the range [-31,-17].
4818  //
4819  // Convert: VADD_SPLAT elt, size
4820  // Into: tmp1 = VSPLTIS[BHW] elt+16
4821  // tmp2 = VSPLTIS[BHW] -16
4822  // VADDU[BHW]M tmp1, tmp2
4823  SDValue EltVal = getI32Imm(Elt + 16, dl);
4824  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4825  EltVal = getI32Imm(-16, dl);
4826  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4827  ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
4828  SDValue(Tmp2, 0)));
4829  return;
4830  }
4831  }
4832  case ISD::ABS: {
4833  assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector");
4834 
4835  // For vector absolute difference, we use VABSDUW instruction of POWER9.
4836  // Since VABSDU instructions are for unsigned integers, we need adjustment
4837  // for signed integers.
4838  // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).
4839  // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.
4840  // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).
4841  EVT VecVT = N->getOperand(0).getValueType();
4842  SDNode *AbsOp = nullptr;
4843  unsigned AbsOpcode;
4844 
4845  if (VecVT == MVT::v4i32)
4846  AbsOpcode = PPC::VABSDUW;
4847  else if (VecVT == MVT::v8i16)
4848  AbsOpcode = PPC::VABSDUH;
4849  else if (VecVT == MVT::v16i8)
4850  AbsOpcode = PPC::VABSDUB;
4851  else
4852  llvm_unreachable("Unsupported vector data type for ISD::ABS");
4853 
4854  // Even for signed integers, we can skip adjustment if all values are
4855  // known to be positive (as signed integer) due to zero-extended inputs.
4856  if (N->getOperand(0).getOpcode() == ISD::SUB &&
4859  AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
4860  SDValue(N->getOperand(0)->getOperand(0)),
4861  SDValue(N->getOperand(0)->getOperand(1)));
4862  ReplaceNode(N, AbsOp);
4863  return;
4864  }
4865  if (N->getOperand(0).getOpcode() == ISD::SUB) {
4866  SDValue SubVal = N->getOperand(0);
4867  SDNode *Op0 = flipSignBit(SubVal->getOperand(0));
4868  SDNode *Op1 = flipSignBit(SubVal->getOperand(1));
4869  AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
4870  SDValue(Op0, 0), SDValue(Op1, 0));
4871  }
4872  else {
4873  SDNode *Op1 = nullptr;
4874  SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1);
4875  AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0),
4876  SDValue(Op1, 0));
4877  }
4878  ReplaceNode(N, AbsOp);
4879  return;
4880  }
4881  }
4882 
4883  SelectCode(N);
4884 }
4885 
4886 // If the target supports the cmpb instruction, do the idiom recognition here.
4887 // We don't do this as a DAG combine because we don't want to do it as nodes
4888 // are being combined (because we might miss part of the eventual idiom). We
4889 // don't want to do it during instruction selection because we want to reuse
4890 // the logic for lowering the masking operations already part of the
4891 // instruction selector.
4892 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
4893  SDLoc dl(N);
4894 
4895  assert(N->getOpcode() == ISD::OR &&
4896  "Only OR nodes are supported for CMPB");
4897 
4898  SDValue Res;
4899  if (!PPCSubTarget->hasCMPB())
4900  return Res;
4901 
4902  if (N->getValueType(0) != MVT::i32 &&
4903  N->getValueType(0) != MVT::i64)
4904  return Res;
4905 
4906  EVT VT = N->getValueType(0);
4907 
4908  SDValue RHS, LHS;
4909  bool BytesFound[8] = {false, false, false, false, false, false, false, false};
4910  uint64_t Mask = 0, Alt = 0;
4911 
4912  auto IsByteSelectCC = [this](SDValue O, unsigned &b,
4913  uint64_t &Mask, uint64_t &Alt,
4914  SDValue &LHS, SDValue &RHS) {
4915  if (O.getOpcode() != ISD::SELECT_CC)
4916  return false;
4917  ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
4918 
4919  if (!isa<ConstantSDNode>(O.getOperand(2)) ||
4920  !isa<ConstantSDNode>(O.getOperand(3)))
4921  return false;
4922 
4923  uint64_t PM = O.getConstantOperandVal(2);
4924  uint64_t PAlt = O.getConstantOperandVal(3);
4925  for (b = 0; b < 8; ++b) {
4926  uint64_t Mask = UINT64_C(0xFF) << (8*b);
4927  if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
4928  break;
4929  }
4930 
4931  if (b == 8)
4932  return false;
4933  Mask |= PM;
4934  Alt |= PAlt;
4935 
4936  if (!isa<ConstantSDNode>(O.getOperand(1)) ||
4937  O.getConstantOperandVal(1) != 0) {
4938  SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
4939  if (Op0.getOpcode() == ISD::TRUNCATE)
4940  Op0 = Op0.getOperand(0);
4941  if (Op1.getOpcode() == ISD::TRUNCATE)
4942  Op1 = Op1.getOperand(0);
4943 
4944  if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
4945  Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
4946  isa<ConstantSDNode>(Op0.getOperand(1))) {
4947 
4948  unsigned Bits = Op0.getValueSizeInBits();
4949  if (b != Bits/8-1)
4950  return false;
4951  if (Op0.getConstantOperandVal(1) != Bits-8)
4952  return false;
4953 
4954  LHS = Op0.getOperand(0);
4955  RHS = Op1.getOperand(0);
4956  return true;
4957  }
4958 
4959  // When we have small integers (i16 to be specific), the form present
4960  // post-legalization uses SETULT in the SELECT_CC for the
4961  // higher-order byte, depending on the fact that the
4962  // even-higher-order bytes are known to all be zero, for example:
4963  // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
4964  // (so when the second byte is the same, because all higher-order
4965  // bits from bytes 3 and 4 are known to be zero, the result of the
4966  // xor can be at most 255)
4967  if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
4968  isa<ConstantSDNode>(O.getOperand(1))) {
4969 
4970  uint64_t ULim = O.getConstantOperandVal(1);
4971  if (ULim != (UINT64_C(1) << b*8))
4972  return false;
4973 
4974  // Now we need to make sure that the upper bytes are known to be
4975  // zero.
4976  unsigned Bits = Op0.getValueSizeInBits();
4977  if (!CurDAG->MaskedValueIsZero(
4978  Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
4979  return false;
4980 
4981  LHS = Op0.getOperand(0);
4982  RHS = Op0.getOperand(1);
4983  return true;
4984  }
4985 
4986  return false;
4987  }
4988 
4989  if (CC != ISD::SETEQ)
4990  return false;
4991 
4992  SDValue Op = O.getOperand(0);
4993  if (Op.getOpcode() == ISD::AND) {
4994  if (!isa<ConstantSDNode>(Op.getOperand(1)))
4995  return false;
4996  if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
4997  return false;
4998 
4999  SDValue XOR = Op.getOperand(0);
5000  if (XOR.getOpcode() == ISD::TRUNCATE)
5001  XOR = XOR.getOperand(0);
5002  if (XOR.getOpcode() != ISD::XOR)
5003  return false;
5004 
5005  LHS = XOR.getOperand(0);
5006  RHS = XOR.getOperand(1);
5007  return true;
5008  } else if (Op.getOpcode() == ISD::SRL) {
5009  if (!isa<ConstantSDNode>(Op.getOperand(1)))
5010  return false;
5011  unsigned Bits = Op.getValueSizeInBits();
5012  if (b != Bits/8-1)
5013  return false;
5014  if (Op.getConstantOperandVal(1) != Bits-8)
5015  return false;
5016 
5017  SDValue XOR = Op.getOperand(0);
5018  if (XOR.getOpcode() == ISD::TRUNCATE)
5019  XOR = XOR.getOperand(0);
5020  if (XOR.getOpcode() != ISD::XOR)
5021  return false;
5022 
5023  LHS = XOR.getOperand(0);
5024  RHS = XOR.getOperand(1);
5025  return true;
5026  }
5027 
5028  return false;
5029  };
5030 
5031  SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
5032  while (!Queue.empty()) {
5033  SDValue V = Queue.pop_back_val();
5034 
5035  for (const SDValue &O : V.getNode()->ops()) {
5036  unsigned b;
5037  uint64_t M = 0, A = 0;
5038  SDValue OLHS, ORHS;
5039  if (O.getOpcode() == ISD::OR) {
5040  Queue.push_back(O);
5041  } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
5042  if (!LHS) {
5043  LHS = OLHS;
5044  RHS = ORHS;
5045  BytesFound[b] = true;
5046  Mask |= M;
5047  Alt |= A;
5048  } else if ((LHS == ORHS && RHS == OLHS) ||
5049  (RHS == ORHS && LHS == OLHS)) {
5050  BytesFound[b] = true;
5051  Mask |= M;
5052  Alt |= A;
5053  } else {
5054  return Res;
5055  }
5056  } else {
5057  return Res;
5058  }
5059  }
5060  }
5061 
5062  unsigned LastB = 0, BCnt = 0;
5063  for (unsigned i = 0; i < 8; ++i)
5064  if (BytesFound[LastB]) {
5065  ++BCnt;
5066  LastB = i;
5067  }
5068 
5069  if (!LastB || BCnt < 2)
5070  return Res;
5071 
5072  // Because we'll be zero-extending the output anyway if don't have a specific
5073  // value for each input byte (via the Mask), we can 'anyext' the inputs.
5074  if (LHS.getValueType() != VT) {
5075  LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
5076  RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
5077  }
5078 
5079  Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
5080 
5081  bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
5082  if (NonTrivialMask && !Alt) {
5083  // Res = Mask & CMPB
5084  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
5085  CurDAG->getConstant(Mask, dl, VT));
5086  } else if (Alt) {
5087  // Res = (CMPB & Mask) | (~CMPB & Alt)
5088  // Which, as suggested here:
5089  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
5090  // can be written as:
5091  // Res = Alt ^ ((Alt ^ Mask) & CMPB)
5092  // useful because the (Alt ^ Mask) can be pre-computed.
5093  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
5094  CurDAG->getConstant(Mask ^ Alt, dl, VT));
5095  Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
5096  CurDAG->getConstant(Alt, dl, VT));
5097  }
5098 
5099  return Res;
5100 }
5101 
5102 // When CR bit registers are enabled, an extension of an i1 variable to a i32
5103 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
5104 // involves constant materialization of a 0 or a 1 or both. If the result of
5105 // the extension is then operated upon by some operator that can be constant
5106 // folded with a constant 0 or 1, and that constant can be materialized using
5107 // only one instruction (like a zero or one), then we should fold in those
5108 // operations with the select.
5109 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
5110  if (!PPCSubTarget->useCRBits())
5111  return;
5112 
5113  if (N->getOpcode() != ISD::ZERO_EXTEND &&
5114  N->getOpcode() != ISD::SIGN_EXTEND &&
5115  N->getOpcode() != ISD::ANY_EXTEND)
5116  return;
5117 
5118  if (N->getOperand(0).getValueType() != MVT::i1)
5119  return;
5120 
5121  if (!N->hasOneUse())
5122  return;
5123 
5124  SDLoc dl(N);
5125  EVT VT = N->getValueType(0);
5126  SDValue Cond = N->getOperand(0);
5127  SDValue ConstTrue =
5128  CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
5129  SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
5130 
5131  do {
5132  SDNode *User = *N->use_begin();
5133  if (User->getNumOperands() != 2)
5134  break;
5135 
5136  auto TryFold = [this, N, User, dl](SDValue Val) {
5137  SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
5138  SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
5139  SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
5140 
5141  return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
5142  User->getValueType(0),
5143  O0.getNode(), O1.getNode());
5144  };
5145 
5146  // FIXME: When the semantics of the interaction between select and undef
5147  // are clearly defined, it may turn out to be unnecessary to break here.
5148  SDValue TrueRes = TryFold(ConstTrue);
5149  if (!TrueRes || TrueRes.isUndef())
5150  break;
5151  SDValue FalseRes = TryFold(ConstFalse);
5152  if (!FalseRes || FalseRes.isUndef())
5153  break;
5154 
5155  // For us to materialize these using one instruction, we must be able to
5156  // represent them as signed 16-bit integers.
5157  uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
5158  False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
5159  if (!isInt<16>(True) || !isInt<16>(False))
5160  break;
5161 
5162  // We can replace User with a new SELECT node, and try again to see if we
5163  // can fold the select with its user.
5164  Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
5165  N = User;
5166  ConstTrue = TrueRes;
5167  ConstFalse = FalseRes;
5168  } while (N->hasOneUse());
5169 }
5170 
5171 void PPCDAGToDAGISel::PreprocessISelDAG() {
5172  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
5173  ++Position;
5174 
5175  bool MadeChange = false;
5176  while (Position != CurDAG->allnodes_begin()) {
5177  SDNode *N = &*--Position;
5178  if (N->use_empty())
5179  continue;
5180 
5181  SDValue Res;
5182  switch (N->getOpcode()) {
5183  default: break;
5184  case ISD::OR:
5185  Res = combineToCMPB(N);
5186  break;
5187  }
5188 
5189  if (!Res)
5190  foldBoolExts(Res, N);
5191 
5192  if (Res) {
5193  DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
5194  DEBUG(N->dump(CurDAG));
5195  DEBUG(dbgs() << "\nNew: ");
5196  DEBUG(Res.getNode()->dump(CurDAG));
5197  DEBUG(dbgs() << "\n");
5198 
5199  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
5200  MadeChange = true;
5201  }
5202  }
5203 
5204  if (MadeChange)
5205  CurDAG->RemoveDeadNodes();
5206 }
5207 
5208 /// PostprocessISelDAG - Perform some late peephole optimizations
5209 /// on the DAG representation.
5210 void PPCDAGToDAGISel::PostprocessISelDAG() {
5211  // Skip peepholes at -O0.
5212  if (TM.getOptLevel() == CodeGenOpt::None)
5213  return;
5214 
5215  PeepholePPC64();
5216  PeepholeCROps();
5217  PeepholePPC64ZExt();
5218 }
5219 
5220 // Check if all users of this node will become isel where the second operand
5221 // is the constant zero. If this is so, and if we can negate the condition,
5222 // then we can flip the true and false operands. This will allow the zero to
5223 // be folded with the isel so that we don't need to materialize a register
5224 // containing zero.
5225 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
5226  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5227  UI != UE; ++UI) {
5228  SDNode *User = *UI;
5229  if (!User->isMachineOpcode())
5230  return false;
5231  if (User->getMachineOpcode() != PPC::SELECT_I4 &&
5232  User->getMachineOpcode() != PPC::SELECT_I8)
5233  return false;
5234 
5235  SDNode *Op2 = User->getOperand(2).getNode();
5236  if (!Op2->isMachineOpcode())
5237  return false;
5238 
5239  if (Op2->getMachineOpcode() != PPC::LI &&
5240  Op2->getMachineOpcode() != PPC::LI8)
5241  return false;
5242 
5244  if (!C)
5245  return false;
5246 
5247  if (!C->isNullValue())
5248  return false;
5249  }
5250 
5251  return true;
5252 }
5253 
5254 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
5255  SmallVector<SDNode *, 4> ToReplace;
5256  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5257  UI != UE; ++UI) {
5258  SDNode *User = *UI;
5259  assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
5260  User->getMachineOpcode() == PPC::SELECT_I8) &&
5261  "Must have all select users");
5262  ToReplace.push_back(User);
5263  }
5264 
5265  for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
5266  UE = ToReplace.end(); UI != UE; ++UI) {
5267  SDNode *User = *UI;
5268  SDNode *ResNode =
5269  CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
5270  User->getValueType(0), User->getOperand(0),
5271  User->getOperand(2),
5272  User->getOperand(1));
5273 
5274  DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
5275  DEBUG(User->dump(CurDAG));
5276  DEBUG(dbgs() << "\nNew: ");
5277  DEBUG(ResNode->dump(CurDAG));
5278  DEBUG(dbgs() << "\n");
5279 
5280  ReplaceUses(User, ResNode);
5281  }
5282 }
5283 
5284 void PPCDAGToDAGISel::PeepholeCROps() {
5285  bool IsModified;
5286  do {
5287  IsModified = false;
5288  for (SDNode &Node : CurDAG->allnodes()) {
5289  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
5290  if (!MachineNode || MachineNode->use_empty())
5291  continue;
5292  SDNode *ResNode = MachineNode;
5293 
5294  bool Op1Set = false, Op1Unset = false,
5295  Op1Not = false,
5296  Op2Set = false, Op2Unset = false,
5297  Op2Not = false;
5298 
5299  unsigned Opcode = MachineNode->getMachineOpcode();
5300  switch (Opcode) {
5301  default: break;
5302  case PPC::CRAND:
5303  case PPC::CRNAND:
5304  case PPC::CROR:
5305  case PPC::CRXOR:
5306  case PPC::CRNOR:
5307  case PPC::CREQV:
5308  case PPC::CRANDC:
5309  case PPC::CRORC: {
5310  SDValue Op = MachineNode->getOperand(1);
5311  if (Op.isMachineOpcode()) {
5312  if (Op.getMachineOpcode() == PPC::CRSET)
5313  Op2Set = true;
5314  else if (Op.getMachineOpcode() == PPC::CRUNSET)
5315  Op2Unset = true;
5316  else if (Op.getMachineOpcode() == PPC::CRNOR &&
5317  Op.getOperand(0) == Op.getOperand(1))
5318  Op2Not = true;
5319  }
5321  }
5322  case PPC::BC:
5323  case PPC::BCn:
5324  case PPC::SELECT_I4:
5325  case PPC::SELECT_I8:
5326  case PPC::SELECT_F4:
5327  case PPC::SELECT_F8:
5328  case PPC::SELECT_QFRC:
5329  case PPC::SELECT_QSRC:
5330  case PPC::SELECT_QBRC:
5331  case PPC::SELECT_VRRC:
5332  case PPC::SELECT_VSFRC:
5333  case PPC::SELECT_VSSRC:
5334  case PPC::SELECT_VSRC: {
5335  SDValue Op = MachineNode->getOperand(0);
5336  if (Op.isMachineOpcode()) {
5337  if (Op.getMachineOpcode() == PPC::CRSET)
5338  Op1Set = true;
5339  else if (Op.getMachineOpcode() == PPC::CRUNSET)
5340  Op1Unset = true;
5341  else if (Op.getMachineOpcode() == PPC::CRNOR &&
5342  Op.getOperand(0) == Op.getOperand(1))
5343  Op1Not = true;
5344  }
5345  }
5346  break;
5347  }
5348 
5349  bool SelectSwap = false;
5350  switch (Opcode) {
5351  default: break;
5352  case PPC::CRAND:
5353  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5354  // x & x = x
5355  ResNode = MachineNode->getOperand(0).getNode();
5356  else if (Op1Set)
5357  // 1 & y = y
5358  ResNode = MachineNode->getOperand(1).getNode();
5359  else if (Op2Set)
5360  // x & 1 = x
5361  ResNode = MachineNode->getOperand(0).getNode();
5362  else if (Op1Unset || Op2Unset)
5363  // x & 0 = 0 & y = 0
5364  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5365  MVT::i1);
5366  else if (Op1Not)
5367  // ~x & y = andc(y, x)
5368  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5369  MVT::i1, MachineNode->getOperand(1),
5370  MachineNode->getOperand(0).
5371  getOperand(0));
5372  else if (Op2Not)
5373  // x & ~y = andc(x, y)
5374  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5375  MVT::i1, MachineNode->getOperand(0),
5376  MachineNode->getOperand(1).
5377  getOperand(0));
5378  else if (AllUsersSelectZero(MachineNode)) {
5379  ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5380  MVT::i1, MachineNode->getOperand(0),
5381  MachineNode->getOperand(1));
5382  SelectSwap = true;
5383  }
5384  break;
5385  case PPC::CRNAND:
5386  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5387  // nand(x, x) -> nor(x, x)
5388  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5389  MVT::i1, MachineNode->getOperand(0),
5390  MachineNode->getOperand(0));
5391  else if (Op1Set)
5392  // nand(1, y) -> nor(y, y)
5393  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5394  MVT::i1, MachineNode->getOperand(1),
5395  MachineNode->getOperand(1));
5396  else if (Op2Set)
5397  // nand(x, 1) -> nor(x, x)
5398  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5399  MVT::i1, MachineNode->getOperand(0),
5400  MachineNode->getOperand(0));
5401  else if (Op1Unset || Op2Unset)
5402  // nand(x, 0) = nand(0, y) = 1
5403  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5404  MVT::i1);
5405  else if (Op1Not)
5406  // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
5407  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5408