LLVM  13.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a pattern matching instruction selector for PowerPC,
10 // converting from a legalized dag to a PPC dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "PPC.h"
17 #include "PPCISelLowering.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCSubtarget.h"
20 #include "PPCTargetMachine.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalValue.h"
44 #include "llvm/IR/InlineAsm.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/IntrinsicsPowerPC.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/KnownBits.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <limits>
63 #include <memory>
64 #include <new>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "ppc-codegen"
71 
72 STATISTIC(NumSextSetcc,
73  "Number of (sext(setcc)) nodes expanded into GPR sequence.");
74 STATISTIC(NumZextSetcc,
75  "Number of (zext(setcc)) nodes expanded into GPR sequence.");
76 STATISTIC(SignExtensionsAdded,
77  "Number of sign extensions for compare inputs added.");
78 STATISTIC(ZeroExtensionsAdded,
79  "Number of zero extensions for compare inputs added.");
80 STATISTIC(NumLogicOpsOnComparison,
81  "Number of logical ops on i1 values calculated in GPR.");
82 STATISTIC(OmittedForNonExtendUses,
83  "Number of compares not eliminated as they have non-extending uses.");
84 STATISTIC(NumP9Setb,
85  "Number of compares lowered to setb.");
86 
87 // FIXME: Remove this once the bug has been fixed!
88 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
89 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
90 
91 static cl::opt<bool>
92  UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
93  cl::desc("use aggressive ppc isel for bit permutations"),
94  cl::Hidden);
96  "ppc-bit-perm-rewriter-stress-rotates",
97  cl::desc("stress rotate selection in aggressive ppc isel for "
98  "bit permutations"),
99  cl::Hidden);
100 
102  "ppc-use-branch-hint", cl::init(true),
103  cl::desc("Enable static hinting of branches on ppc"),
104  cl::Hidden);
105 
107  "ppc-tls-opt", cl::init(true),
108  cl::desc("Enable tls optimization peephole"),
109  cl::Hidden);
110 
114 
116  "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
117  cl::desc("Specify the types of comparisons to emit GPR-only code for."),
118  cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
119  clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
120  clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
121  clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
122  clEnumValN(ICGPR_NonExtIn, "nonextin",
123  "Only comparisons where inputs don't need [sz]ext."),
124  clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
125  clEnumValN(ICGPR_ZextI32, "zexti32",
126  "Only i32 comparisons with zext result."),
127  clEnumValN(ICGPR_ZextI64, "zexti64",
128  "Only i64 comparisons with zext result."),
129  clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
130  clEnumValN(ICGPR_SextI32, "sexti32",
131  "Only i32 comparisons with sext result."),
132  clEnumValN(ICGPR_SextI64, "sexti64",
133  "Only i64 comparisons with sext result.")));
134 namespace {
135 
136  //===--------------------------------------------------------------------===//
137  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
138  /// instructions for SelectionDAG operations.
139  ///
140  class PPCDAGToDAGISel : public SelectionDAGISel {
141  const PPCTargetMachine &TM;
142  const PPCSubtarget *Subtarget = nullptr;
143  const PPCTargetLowering *PPCLowering = nullptr;
144  unsigned GlobalBaseReg = 0;
145 
146  public:
147  explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
148  : SelectionDAGISel(tm, OptLevel), TM(tm) {}
149 
150  bool runOnMachineFunction(MachineFunction &MF) override {
151  // Make sure we re-emit a set of the global base reg if necessary
152  GlobalBaseReg = 0;
153  Subtarget = &MF.getSubtarget<PPCSubtarget>();
154  PPCLowering = Subtarget->getTargetLowering();
155  if (Subtarget->hasROPProtect()) {
156  // Create a place on the stack for the ROP Protection Hash.
157  // The ROP Protection Hash will always be 8 bytes and aligned to 8
158  // bytes.
159  MachineFrameInfo &MFI = MF.getFrameInfo();
161  const int Result = MFI.CreateStackObject(8, Align(8), false);
162  FI->setROPProtectionHashSaveIndex(Result);
163  }
165 
166  return true;
167  }
168 
169  void PreprocessISelDAG() override;
170  void PostprocessISelDAG() override;
171 
172  /// getI16Imm - Return a target constant with the specified value, of type
173  /// i16.
174  inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
175  return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
176  }
177 
178  /// getI32Imm - Return a target constant with the specified value, of type
179  /// i32.
180  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
181  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
182  }
183 
184  /// getI64Imm - Return a target constant with the specified value, of type
185  /// i64.
186  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
187  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
188  }
189 
190  /// getSmallIPtrImm - Return a target constant of pointer type.
191  inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
192  return CurDAG->getTargetConstant(
193  Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
194  }
195 
196  /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
197  /// rotate and mask opcode and mask operation.
198  static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
199  unsigned &SH, unsigned &MB, unsigned &ME);
200 
201  /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
202  /// base register. Return the virtual register that holds this value.
203  SDNode *getGlobalBaseReg();
204 
205  void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
206 
207  // Select - Convert the specified operand from a target-independent to a
208  // target-specific node if it hasn't already been changed.
209  void Select(SDNode *N) override;
210 
211  bool tryBitfieldInsert(SDNode *N);
212  bool tryBitPermutation(SDNode *N);
213  bool tryIntCompareInGPR(SDNode *N);
214 
215  // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
216  // an X-Form load instruction with the offset being a relocation coming from
217  // the PPCISD::ADD_TLS.
218  bool tryTLSXFormLoad(LoadSDNode *N);
219  // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
220  // an X-Form store instruction with the offset being a relocation coming from
221  // the PPCISD::ADD_TLS.
222  bool tryTLSXFormStore(StoreSDNode *N);
223  /// SelectCC - Select a comparison of the specified values with the
224  /// specified condition code, returning the CR# of the expression.
225  SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
226  const SDLoc &dl, SDValue Chain = SDValue());
227 
228  /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
229  /// immediate field. Note that the operand at this point is already the
230  /// result of a prior SelectAddressRegImm call.
231  bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
232  if (N.getOpcode() == ISD::TargetConstant ||
233  N.getOpcode() == ISD::TargetGlobalAddress) {
234  Out = N;
235  return true;
236  }
237 
238  return false;
239  }
240 
241  /// SelectDSForm - Returns true if address N can be represented by the
242  /// addressing mode of DSForm instructions (a base register, plus a signed
243  /// 16-bit displacement that is a multiple of 4.
244  bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
245  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
246  Align(4)) == PPC::AM_DSForm;
247  }
248 
249  /// SelectDQForm - Returns true if address N can be represented by the
250  /// addressing mode of DQForm instructions (a base register, plus a signed
251  /// 16-bit displacement that is a multiple of 16.
252  bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
253  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
254  Align(16)) == PPC::AM_DQForm;
255  }
256 
257  /// SelectDForm - Returns true if address N can be represented by
258  /// the addressing mode of DForm instructions (a base register, plus a
259  /// signed 16-bit immediate.
260  bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
261  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
262  None) == PPC::AM_DForm;
263  }
264 
265  /// SelectXForm - Returns true if address N can be represented by the
266  /// addressing mode of XForm instructions (an indexed [r+r] operation).
267  bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
268  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
269  None) == PPC::AM_XForm;
270  }
271 
272  /// SelectForceXForm - Given the specified address, force it to be
273  /// represented as an indexed [r+r] operation (an XForm instruction).
274  bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
275  SDValue &Base) {
276  return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
278  }
279 
280  /// SelectAddrIdx - Given the specified address, check to see if it can be
281  /// represented as an indexed [r+r] operation.
282  /// This is for xform instructions whose associated displacement form is D.
283  /// The last parameter \p 0 means associated D form has no requirment for 16
284  /// bit signed displacement.
285  /// Returns false if it can be represented by [r+imm], which are preferred.
286  bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
287  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);
288  }
289 
290  /// SelectAddrIdx4 - Given the specified address, check to see if it can be
291  /// represented as an indexed [r+r] operation.
292  /// This is for xform instructions whose associated displacement form is DS.
293  /// The last parameter \p 4 means associated DS form 16 bit signed
294  /// displacement must be a multiple of 4.
295  /// Returns false if it can be represented by [r+imm], which are preferred.
296  bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
297  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
298  Align(4));
299  }
300 
301  /// SelectAddrIdx16 - Given the specified address, check to see if it can be
302  /// represented as an indexed [r+r] operation.
303  /// This is for xform instructions whose associated displacement form is DQ.
304  /// The last parameter \p 16 means associated DQ form 16 bit signed
305  /// displacement must be a multiple of 16.
306  /// Returns false if it can be represented by [r+imm], which are preferred.
307  bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
308  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
309  Align(16));
310  }
311 
312  /// SelectAddrIdxOnly - Given the specified address, force it to be
313  /// represented as an indexed [r+r] operation.
314  bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
315  return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
316  }
317 
318  /// SelectAddrImm - Returns true if the address N can be represented by
319  /// a base register plus a signed 16-bit displacement [r+imm].
320  /// The last parameter \p 0 means D form has no requirment for 16 bit signed
321  /// displacement.
322  bool SelectAddrImm(SDValue N, SDValue &Disp,
323  SDValue &Base) {
324  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);
325  }
326 
327  /// SelectAddrImmX4 - Returns true if the address N can be represented by
328  /// a base register plus a signed 16-bit displacement that is a multiple of
329  /// 4 (last parameter). Suitable for use by STD and friends.
330  bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
331  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
332  }
333 
334  /// SelectAddrImmX16 - Returns true if the address N can be represented by
335  /// a base register plus a signed 16-bit displacement that is a multiple of
336  /// 16(last parameter). Suitable for use by STXV and friends.
337  bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
338  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
339  Align(16));
340  }
341 
342  /// SelectAddrImmX34 - Returns true if the address N can be represented by
343  /// a base register plus a signed 34-bit displacement. Suitable for use by
344  /// PSTXVP and friends.
345  bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
346  return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
347  }
348 
349  // Select an address into a single register.
350  bool SelectAddr(SDValue N, SDValue &Base) {
351  Base = N;
352  return true;
353  }
354 
355  bool SelectAddrPCRel(SDValue N, SDValue &Base) {
356  return PPCLowering->SelectAddressPCRel(N, Base);
357  }
358 
359  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
360  /// inline asm expressions. It is always correct to compute the value into
361  /// a register. The case of adding a (possibly relocatable) constant to a
362  /// register can be improved, but it is wrong to substitute Reg+Reg for
363  /// Reg in an asm, because the load or store opcode would have to change.
364  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
365  unsigned ConstraintID,
366  std::vector<SDValue> &OutOps) override {
367  switch(ConstraintID) {
368  default:
369  errs() << "ConstraintID: " << ConstraintID << "\n";
370  llvm_unreachable("Unexpected asm memory constraint");
377  // We need to make sure that this one operand does not end up in r0
378  // (because we might end up lowering this as 0(%op)).
379  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
380  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
381  SDLoc dl(Op);
382  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
383  SDValue NewOp =
384  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
385  dl, Op.getValueType(),
386  Op, RC), 0);
387 
388  OutOps.push_back(NewOp);
389  return false;
390  }
391  return true;
392  }
393 
394  StringRef getPassName() const override {
395  return "PowerPC DAG->DAG Pattern Instruction Selection";
396  }
397 
398 // Include the pieces autogenerated from the target description.
399 #include "PPCGenDAGISel.inc"
400 
401 private:
402  bool trySETCC(SDNode *N);
403  bool tryFoldSWTestBRCC(SDNode *N);
404  bool tryAsSingleRLDICL(SDNode *N);
405  bool tryAsSingleRLDICR(SDNode *N);
406  bool tryAsSingleRLWINM(SDNode *N);
407  bool tryAsSingleRLWINM8(SDNode *N);
408  bool tryAsSingleRLWIMI(SDNode *N);
409  bool tryAsPairOfRLDICL(SDNode *N);
410  bool tryAsSingleRLDIMI(SDNode *N);
411 
412  void PeepholePPC64();
413  void PeepholePPC64ZExt();
414  void PeepholeCROps();
415 
416  SDValue combineToCMPB(SDNode *N);
417  void foldBoolExts(SDValue &Res, SDNode *&N);
418 
419  bool AllUsersSelectZero(SDNode *N);
420  void SwapAllSelectUsers(SDNode *N);
421 
422  bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
423  void transferMemOperands(SDNode *N, SDNode *Result);
424  };
425 
426 } // end anonymous namespace
427 
428 /// getGlobalBaseReg - Output the instructions required to put the
429 /// base address to use for accessing globals into a register.
430 ///
431 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
432  if (!GlobalBaseReg) {
433  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
434  // Insert the set of GlobalBaseReg into the first MBB of the function
435  MachineBasicBlock &FirstMBB = MF->front();
437  const Module *M = MF->getFunction().getParent();
438  DebugLoc dl;
439 
440  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
441  if (Subtarget->isTargetELF()) {
442  GlobalBaseReg = PPC::R30;
443  if (!Subtarget->isSecurePlt() &&
444  M->getPICLevel() == PICLevel::SmallPIC) {
445  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
446  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
447  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
448  } else {
449  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
450  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
451  Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
452  BuildMI(FirstMBB, MBBI, dl,
453  TII.get(PPC::UpdateGBR), GlobalBaseReg)
455  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
456  }
457  } else {
458  GlobalBaseReg =
459  RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
460  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
461  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
462  }
463  } else {
464  // We must ensure that this sequence is dominated by the prologue.
465  // FIXME: This is a bit of a big hammer since we don't get the benefits
466  // of shrink-wrapping whenever we emit this instruction. Considering
467  // this is used in any function where we emit a jump table, this may be
468  // a significant limitation. We should consider inserting this in the
469  // block where it is used and then commoning this sequence up if it
470  // appears in multiple places.
471  // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
472  // MovePCtoLR8.
473  MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
474  GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
475  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
476  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
477  }
478  }
479  return CurDAG->getRegister(GlobalBaseReg,
480  PPCLowering->getPointerTy(CurDAG->getDataLayout()))
481  .getNode();
482 }
483 
484 // Check if a SDValue has the toc-data attribute.
485 static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
486  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
487  if (!GA)
488  return false;
489 
490  const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
491  if (!GV)
492  return false;
493 
494  if (!GV->hasAttribute("toc-data"))
495  return false;
496 
497  // TODO: These asserts should be updated as more support for the toc data
498  // transformation is added (64 bit, struct support, etc.).
499 
500  assert(PointerSize == 4 && "Only 32 Bit Codegen is currently supported by "
501  "the toc data transformation.");
502 
503  assert(PointerSize >= GV->getAlign().valueOrOne().value() &&
504  "GlobalVariables with an alignment requirement stricter then 4-bytes "
505  "not supported by the toc data transformation.");
506 
507  Type *PtrType = GV->getType();
508  assert(PtrType->isPointerTy() &&
509  "GlobalVariables always have pointer type!.");
510 
511  Type *GVType = dyn_cast<PointerType>(PtrType)->getElementType();
512 
513  assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
514  "supported by the toc data transformation.");
515 
516  if (GVType->isVectorTy())
517  report_fatal_error("A GlobalVariable of Vector type is not currently "
518  "supported by the toc data transformation.");
519 
520  if (GVType->isArrayTy())
521  report_fatal_error("A GlobalVariable of Array type is not currently "
522  "supported by the toc data transformation.");
523 
524  if (GVType->isStructTy())
525  report_fatal_error("A GlobalVariable of Struct type is not currently "
526  "supported by the toc data transformation.");
527 
528  assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&
529  "A GlobalVariable with size larger than 32 bits is not currently "
530  "supported by the toc data transformation.");
531 
532  if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())
533  report_fatal_error("A GlobalVariable with private or local linkage is not "
534  "currently supported by the toc data transformation.");
535 
536  assert(!GV->hasCommonLinkage() &&
537  "Tentative definitions cannot have the mapping class XMC_TD.");
538 
539  return true;
540 }
541 
542 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
543 /// operand. If so Imm will receive the 32-bit value.
544 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
545  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
546  Imm = cast<ConstantSDNode>(N)->getZExtValue();
547  return true;
548  }
549  return false;
550 }
551 
552 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
553 /// operand. If so Imm will receive the 64-bit value.
554 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
555  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
556  Imm = cast<ConstantSDNode>(N)->getZExtValue();
557  return true;
558  }
559  return false;
560 }
561 
562 // isInt32Immediate - This method tests to see if a constant operand.
563 // If so Imm will receive the 32 bit value.
564 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
565  return isInt32Immediate(N.getNode(), Imm);
566 }
567 
568 /// isInt64Immediate - This method tests to see if the value is a 64-bit
569 /// constant operand. If so Imm will receive the 64-bit value.
570 static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
571  return isInt64Immediate(N.getNode(), Imm);
572 }
573 
574 static unsigned getBranchHint(unsigned PCC,
575  const FunctionLoweringInfo &FuncInfo,
576  const SDValue &DestMBB) {
577  assert(isa<BasicBlockSDNode>(DestMBB));
578 
579  if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
580 
581  const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
582  const Instruction *BBTerm = BB->getTerminator();
583 
584  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
585 
586  const BasicBlock *TBB = BBTerm->getSuccessor(0);
587  const BasicBlock *FBB = BBTerm->getSuccessor(1);
588 
589  auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
590  auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
591 
592  // We only want to handle cases which are easy to predict at static time, e.g.
593  // C++ throw statement, that is very likely not taken, or calling never
594  // returned function, e.g. stdlib exit(). So we set Threshold to filter
595  // unwanted cases.
596  //
597  // Below is LLVM branch weight table, we only want to handle case 1, 2
598  //
599  // Case Taken:Nontaken Example
600  // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
601  // 2. Invoke-terminating 1:1048575
602  // 3. Coldblock 4:64 __builtin_expect
603  // 4. Loop Branch 124:4 For loop
604  // 5. PH/ZH/FPH 20:12
605  const uint32_t Threshold = 10000;
606 
607  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
608  return PPC::BR_NO_HINT;
609 
610  LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
611  << "::" << BB->getName() << "'\n"
612  << " -> " << TBB->getName() << ": " << TProb << "\n"
613  << " -> " << FBB->getName() << ": " << FProb << "\n");
614 
615  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
616 
617  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
618  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
619  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
620  std::swap(TProb, FProb);
621 
622  return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
623 }
624 
625 // isOpcWithIntImmediate - This method tests to see if the node is a specific
626 // opcode and that it has a immediate integer right operand.
627 // If so Imm will receive the 32 bit value.
628 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
629  return N->getOpcode() == Opc
630  && isInt32Immediate(N->getOperand(1).getNode(), Imm);
631 }
632 
633 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
634  SDLoc dl(SN);
635  int FI = cast<FrameIndexSDNode>(N)->getIndex();
636  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
637  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
638  if (SN->hasOneUse())
639  CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
640  getSmallIPtrImm(Offset, dl));
641  else
642  ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
643  getSmallIPtrImm(Offset, dl)));
644 }
645 
646 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
647  bool isShiftMask, unsigned &SH,
648  unsigned &MB, unsigned &ME) {
649  // Don't even go down this path for i64, since different logic will be
650  // necessary for rldicl/rldicr/rldimi.
651  if (N->getValueType(0) != MVT::i32)
652  return false;
653 
654  unsigned Shift = 32;
655  unsigned Indeterminant = ~0; // bit mask marking indeterminant results
656  unsigned Opcode = N->getOpcode();
657  if (N->getNumOperands() != 2 ||
658  !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
659  return false;
660 
661  if (Opcode == ISD::SHL) {
662  // apply shift left to mask if it comes first
663  if (isShiftMask) Mask = Mask << Shift;
664  // determine which bits are made indeterminant by shift
665  Indeterminant = ~(0xFFFFFFFFu << Shift);
666  } else if (Opcode == ISD::SRL) {
667  // apply shift right to mask if it comes first
668  if (isShiftMask) Mask = Mask >> Shift;
669  // determine which bits are made indeterminant by shift
670  Indeterminant = ~(0xFFFFFFFFu >> Shift);
671  // adjust for the left rotate
672  Shift = 32 - Shift;
673  } else if (Opcode == ISD::ROTL) {
674  Indeterminant = 0;
675  } else {
676  return false;
677  }
678 
679  // if the mask doesn't intersect any Indeterminant bits
680  if (Mask && !(Mask & Indeterminant)) {
681  SH = Shift & 31;
682  // make sure the mask is still a mask (wrap arounds may not be)
683  return isRunOfOnes(Mask, MB, ME);
684  }
685  return false;
686 }
687 
688 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
689  SDValue Base = ST->getBasePtr();
690  if (Base.getOpcode() != PPCISD::ADD_TLS)
691  return false;
692  SDValue Offset = ST->getOffset();
693  if (!Offset.isUndef())
694  return false;
695  if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
696  return false;
697 
698  SDLoc dl(ST);
699  EVT MemVT = ST->getMemoryVT();
700  EVT RegVT = ST->getValue().getValueType();
701 
702  unsigned Opcode;
703  switch (MemVT.getSimpleVT().SimpleTy) {
704  default:
705  return false;
706  case MVT::i8: {
707  Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
708  break;
709  }
710  case MVT::i16: {
711  Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
712  break;
713  }
714  case MVT::i32: {
715  Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
716  break;
717  }
718  case MVT::i64: {
719  Opcode = PPC::STDXTLS;
720  break;
721  }
722  }
723  SDValue Chain = ST->getChain();
724  SDVTList VTs = ST->getVTList();
725  SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
726  Chain};
727  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
728  transferMemOperands(ST, MN);
729  ReplaceNode(ST, MN);
730  return true;
731 }
732 
733 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
734  SDValue Base = LD->getBasePtr();
735  if (Base.getOpcode() != PPCISD::ADD_TLS)
736  return false;
737  SDValue Offset = LD->getOffset();
738  if (!Offset.isUndef())
739  return false;
740  if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
741  return false;
742 
743  SDLoc dl(LD);
744  EVT MemVT = LD->getMemoryVT();
745  EVT RegVT = LD->getValueType(0);
746  unsigned Opcode;
747  switch (MemVT.getSimpleVT().SimpleTy) {
748  default:
749  return false;
750  case MVT::i8: {
751  Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
752  break;
753  }
754  case MVT::i16: {
755  Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
756  break;
757  }
758  case MVT::i32: {
759  Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
760  break;
761  }
762  case MVT::i64: {
763  Opcode = PPC::LDXTLS;
764  break;
765  }
766  }
767  SDValue Chain = LD->getChain();
768  SDVTList VTs = LD->getVTList();
769  SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
770  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
771  transferMemOperands(LD, MN);
772  ReplaceNode(LD, MN);
773  return true;
774 }
775 
776 /// Turn an or of two masked values into the rotate left word immediate then
777 /// mask insert (rlwimi) instruction.
778 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
779  SDValue Op0 = N->getOperand(0);
780  SDValue Op1 = N->getOperand(1);
781  SDLoc dl(N);
782 
783  KnownBits LKnown = CurDAG->computeKnownBits(Op0);
784  KnownBits RKnown = CurDAG->computeKnownBits(Op1);
785 
786  unsigned TargetMask = LKnown.Zero.getZExtValue();
787  unsigned InsertMask = RKnown.Zero.getZExtValue();
788 
789  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
790  unsigned Op0Opc = Op0.getOpcode();
791  unsigned Op1Opc = Op1.getOpcode();
792  unsigned Value, SH = 0;
793  TargetMask = ~TargetMask;
794  InsertMask = ~InsertMask;
795 
796  // If the LHS has a foldable shift and the RHS does not, then swap it to the
797  // RHS so that we can fold the shift into the insert.
798  if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
799  if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
800  Op0.getOperand(0).getOpcode() == ISD::SRL) {
801  if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
802  Op1.getOperand(0).getOpcode() != ISD::SRL) {
803  std::swap(Op0, Op1);
804  std::swap(Op0Opc, Op1Opc);
805  std::swap(TargetMask, InsertMask);
806  }
807  }
808  } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
809  if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
810  Op1.getOperand(0).getOpcode() != ISD::SRL) {
811  std::swap(Op0, Op1);
812  std::swap(Op0Opc, Op1Opc);
813  std::swap(TargetMask, InsertMask);
814  }
815  }
816 
817  unsigned MB, ME;
818  if (isRunOfOnes(InsertMask, MB, ME)) {
819  if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
820  isInt32Immediate(Op1.getOperand(1), Value)) {
821  Op1 = Op1.getOperand(0);
822  SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
823  }
824  if (Op1Opc == ISD::AND) {
825  // The AND mask might not be a constant, and we need to make sure that
826  // if we're going to fold the masking with the insert, all bits not
827  // know to be zero in the mask are known to be one.
828  KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
829  bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
830 
831  unsigned SHOpc = Op1.getOperand(0).getOpcode();
832  if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
834  // Note that Value must be in range here (less than 32) because
835  // otherwise there would not be any bits set in InsertMask.
836  Op1 = Op1.getOperand(0).getOperand(0);
837  SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
838  }
839  }
840 
841  SH &= 31;
842  SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
843  getI32Imm(ME, dl) };
844  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
845  return true;
846  }
847  }
848  return false;
849 }
850 
851 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
852  unsigned MaxTruncation = 0;
853  // Cannot use range-based for loop here as we need the actual use (i.e. we
854  // need the operand number corresponding to the use). A range-based for
855  // will unbox the use and provide an SDNode*.
856  for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
857  Use != UseEnd; ++Use) {
858  unsigned Opc =
859  Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
860  switch (Opc) {
861  default: return 0;
862  case ISD::TRUNCATE:
863  if (Use->isMachineOpcode())
864  return 0;
865  MaxTruncation =
866  std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
867  continue;
868  case ISD::STORE: {
869  if (Use->isMachineOpcode())
870  return 0;
871  StoreSDNode *STN = cast<StoreSDNode>(*Use);
872  unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
873  if (MemVTSize == 64 || Use.getOperandNo() != 0)
874  return 0;
875  MaxTruncation = std::max(MaxTruncation, MemVTSize);
876  continue;
877  }
878  case PPC::STW8:
879  case PPC::STWX8:
880  case PPC::STWU8:
881  case PPC::STWUX8:
882  if (Use.getOperandNo() != 0)
883  return 0;
884  MaxTruncation = std::max(MaxTruncation, 32u);
885  continue;
886  case PPC::STH8:
887  case PPC::STHX8:
888  case PPC::STHU8:
889  case PPC::STHUX8:
890  if (Use.getOperandNo() != 0)
891  return 0;
892  MaxTruncation = std::max(MaxTruncation, 16u);
893  continue;
894  case PPC::STB8:
895  case PPC::STBX8:
896  case PPC::STBU8:
897  case PPC::STBUX8:
898  if (Use.getOperandNo() != 0)
899  return 0;
900  MaxTruncation = std::max(MaxTruncation, 8u);
901  continue;
902  }
903  }
904  return MaxTruncation;
905 }
906 
907 // For any 32 < Num < 64, check if the Imm contains at least Num consecutive
908 // zeros and return the number of bits by the left of these consecutive zeros.
909 static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
910  unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
911  unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
912  if ((HiTZ + LoLZ) >= Num)
913  return (32 + HiTZ);
914  return 0;
915 }
916 
917 // Direct materialization of 64-bit constants by enumerated patterns.
918 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
919  uint64_t Imm, unsigned &InstCnt) {
920  unsigned TZ = countTrailingZeros<uint64_t>(Imm);
921  unsigned LZ = countLeadingZeros<uint64_t>(Imm);
922  unsigned TO = countTrailingOnes<uint64_t>(Imm);
923  unsigned LO = countLeadingOnes<uint64_t>(Imm);
924  unsigned Hi32 = Hi_32(Imm);
925  unsigned Lo32 = Lo_32(Imm);
926  SDNode *Result = nullptr;
927  unsigned Shift = 0;
928 
929  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
930  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
931  };
932 
933  // Following patterns use 1 instructions to materialize the Imm.
934  InstCnt = 1;
935  // 1-1) Patterns : {zeros}{15-bit valve}
936  // {ones}{15-bit valve}
937  if (isInt<16>(Imm)) {
938  SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
939  return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
940  }
941  // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
942  // {ones}{15-bit valve}{16 zeros}
943  if (TZ > 15 && (LZ > 32 || LO > 32))
944  return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
945  getI32Imm((Imm >> 16) & 0xffff));
946 
947  // Following patterns use 2 instructions to materialize the Imm.
948  InstCnt = 2;
949  assert(LZ < 64 && "Unexpected leading zeros here.");
950  // Count of ones follwing the leading zeros.
951  unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
952  // 2-1) Patterns : {zeros}{31-bit value}
953  // {ones}{31-bit value}
954  if (isInt<32>(Imm)) {
955  uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
956  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
957  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
958  return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
959  getI32Imm(Imm & 0xffff));
960  }
961  // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
962  // {zeros}{15-bit value}{zeros}
963  // {zeros}{ones}{15-bit value}
964  // {ones}{15-bit value}{zeros}
965  // We can take advantage of LI's sign-extension semantics to generate leading
966  // ones, and then use RLDIC to mask off the ones in both sides after rotation.
967  if ((LZ + FO + TZ) > 48) {
968  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
969  getI32Imm((Imm >> TZ) & 0xffff));
970  return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
971  getI32Imm(TZ), getI32Imm(LZ));
972  }
973  // 2-3) Pattern : {zeros}{15-bit value}{ones}
974  // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
975  // therefore we can take advantage of LI's sign-extension semantics, and then
976  // mask them off after rotation.
977  //
978  // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
979  // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
980  // +------------------------+ +------------------------+
981  // 63 0 63 0
982  // Imm (Imm >> (48 - LZ) & 0xffff)
983  // +----sext-----|--16-bit--+ +clear-|-----------------+
984  // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
985  // +------------------------+ +------------------------+
986  // 63 0 63 0
987  // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
988  if ((LZ + TO) > 48) {
989  // Since the immediates with (LZ > 32) have been handled by previous
990  // patterns, here we have (LZ <= 32) to make sure we will not shift right
991  // the Imm by a negative value.
992  assert(LZ <= 32 && "Unexpected shift value.");
993  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
994  getI32Imm((Imm >> (48 - LZ) & 0xffff)));
995  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
996  getI32Imm(48 - LZ), getI32Imm(LZ));
997  }
998  // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
999  // {ones}{15-bit value}{ones}
1000  // We can take advantage of LI's sign-extension semantics to generate leading
1001  // ones, and then use RLDICL to mask off the ones in left sides (if required)
1002  // after rotation.
1003  //
1004  // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1005  // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1006  // +------------------------+ +------------------------+
1007  // 63 0 63 0
1008  // Imm (Imm >> TO) & 0xffff
1009  // +----sext-----|--16-bit--+ +LZ|---------------------+
1010  // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1011  // +------------------------+ +------------------------+
1012  // 63 0 63 0
1013  // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1014  if ((LZ + FO + TO) > 48) {
1015  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1016  getI32Imm((Imm >> TO) & 0xffff));
1017  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1018  getI32Imm(TO), getI32Imm(LZ));
1019  }
1020  // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1021  // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1022  // value, we can use LI for Lo16 without generating leading ones then add the
1023  // Hi16(in Lo32).
1024  if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1025  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1026  getI32Imm(Lo32 & 0xffff));
1027  return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1028  getI32Imm(Lo32 >> 16));
1029  }
1030  // 2-6) Patterns : {******}{49 zeros}{******}
1031  // {******}{49 ones}{******}
1032  // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1033  // bits remain on both sides. Rotate right the Imm to construct an int<16>
1034  // value, use LI for int<16> value and then use RLDICL without mask to rotate
1035  // it back.
1036  //
1037  // 1) findContiguousZerosAtLeast(Imm, 49)
1038  // +------|--zeros-|------+ +---ones--||---15 bit--+
1039  // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1040  // +----------------------+ +----------------------+
1041  // 63 0 63 0
1042  //
1043  // 2) findContiguousZerosAtLeast(~Imm, 49)
1044  // +------|--ones--|------+ +---ones--||---15 bit--+
1045  // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1046  // +----------------------+ +----------------------+
1047  // 63 0 63 0
1048  if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1049  (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1050  uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1051  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1052  getI32Imm(RotImm & 0xffff));
1053  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1054  getI32Imm(Shift), getI32Imm(0));
1055  }
1056 
1057  // Following patterns use 3 instructions to materialize the Imm.
1058  InstCnt = 3;
1059  // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1060  // {zeros}{31-bit value}{zeros}
1061  // {zeros}{ones}{31-bit value}
1062  // {ones}{31-bit value}{zeros}
1063  // We can take advantage of LIS's sign-extension semantics to generate leading
1064  // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1065  // ones in both sides after rotation.
1066  if ((LZ + FO + TZ) > 32) {
1067  uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1068  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1069  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1070  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1071  getI32Imm((Imm >> TZ) & 0xffff));
1072  return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1073  getI32Imm(TZ), getI32Imm(LZ));
1074  }
1075  // 3-2) Pattern : {zeros}{31-bit value}{ones}
1076  // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,
1077  // therefore we can take advantage of LIS's sign-extension semantics, add
1078  // the remaining bits with ORI, and then mask them off after rotation.
1079  // This is similar to Pattern 2-3, please refer to the diagram there.
1080  if ((LZ + TO) > 32) {
1081  // Since the immediates with (LZ > 32) have been handled by previous
1082  // patterns, here we have (LZ <= 32) to make sure we will not shift right
1083  // the Imm by a negative value.
1084  assert(LZ <= 32 && "Unexpected shift value.");
1085  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1086  getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1087  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1088  getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1089  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1090  getI32Imm(32 - LZ), getI32Imm(LZ));
1091  }
1092  // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1093  // {ones}{31-bit value}{ones}
1094  // We can take advantage of LIS's sign-extension semantics to generate leading
1095  // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1096  // ones in left sides (if required) after rotation.
1097  // This is similar to Pattern 2-4, please refer to the diagram there.
1098  if ((LZ + FO + TO) > 32) {
1099  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1100  getI32Imm((Imm >> (TO + 16)) & 0xffff));
1101  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1102  getI32Imm((Imm >> TO) & 0xffff));
1103  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1104  getI32Imm(TO), getI32Imm(LZ));
1105  }
1106  // 3-4) Patterns : High word == Low word
1107  if (Hi32 == Lo32) {
1108  // Handle the first 32 bits.
1109  uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1110  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1111  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1112  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1113  getI32Imm(Lo32 & 0xffff));
1114  // Use rldimi to insert the Low word into High word.
1115  SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1116  getI32Imm(0)};
1117  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1118  }
1119  // 3-5) Patterns : {******}{33 zeros}{******}
1120  // {******}{33 ones}{******}
1121  // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1122  // bits remain on both sides. Rotate right the Imm to construct an int<32>
1123  // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1124  // rotate it back.
1125  // This is similar to Pattern 2-6, please refer to the diagram there.
1126  if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1127  (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1128  uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1129  uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1130  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1131  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1132  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1133  getI32Imm(RotImm & 0xffff));
1134  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1135  getI32Imm(Shift), getI32Imm(0));
1136  }
1137 
1138  InstCnt = 0;
1139  return nullptr;
1140 }
1141 
1142 // Try to select instructions to generate a 64 bit immediate using prefix as
1143 // well as non prefix instructions. The function will return the SDNode
1144 // to materialize that constant or it will return nullptr if it does not
1145 // find one. The variable InstCnt is set to the number of instructions that
1146 // were selected.
1148  uint64_t Imm, unsigned &InstCnt) {
1149  unsigned TZ = countTrailingZeros<uint64_t>(Imm);
1150  unsigned LZ = countLeadingZeros<uint64_t>(Imm);
1151  unsigned TO = countTrailingOnes<uint64_t>(Imm);
1152  unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1153  unsigned Hi32 = Hi_32(Imm);
1154  unsigned Lo32 = Lo_32(Imm);
1155 
1156  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1157  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1158  };
1159 
1160  auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1161  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1162  };
1163 
1164  // Following patterns use 1 instruction to materialize Imm.
1165  InstCnt = 1;
1166 
1167  // The pli instruction can materialize up to 34 bits directly.
1168  // If a constant fits within 34-bits, emit the pli instruction here directly.
1169  if (isInt<34>(Imm))
1170  return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1171  CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1172 
1173  // Require at least two instructions.
1174  InstCnt = 2;
1175  SDNode *Result = nullptr;
1176  // Patterns : {zeros}{ones}{33-bit value}{zeros}
1177  // {zeros}{33-bit value}{zeros}
1178  // {zeros}{ones}{33-bit value}
1179  // {ones}{33-bit value}{zeros}
1180  // We can take advantage of PLI's sign-extension semantics to generate leading
1181  // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1182  if ((LZ + FO + TZ) > 30) {
1183  APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1184  APInt Extended = SignedInt34.sext(64);
1185  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1186  getI64Imm(*Extended.getRawData()));
1187  return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1188  getI32Imm(TZ), getI32Imm(LZ));
1189  }
1190  // Pattern : {zeros}{33-bit value}{ones}
1191  // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1192  // therefore we can take advantage of PLI's sign-extension semantics, and then
1193  // mask them off after rotation.
1194  //
1195  // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1196  // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1197  // +------------------------+ +------------------------+
1198  // 63 0 63 0
1199  //
1200  // +----sext-----|--34-bit--+ +clear-|-----------------+
1201  // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1202  // +------------------------+ +------------------------+
1203  // 63 0 63 0
1204  if ((LZ + TO) > 30) {
1205  APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1206  APInt Extended = SignedInt34.sext(64);
1207  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1208  getI64Imm(*Extended.getRawData()));
1209  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1210  getI32Imm(30 - LZ), getI32Imm(LZ));
1211  }
1212  // Patterns : {zeros}{ones}{33-bit value}{ones}
1213  // {ones}{33-bit value}{ones}
1214  // Similar to LI we can take advantage of PLI's sign-extension semantics to
1215  // generate leading ones, and then use RLDICL to mask off the ones in left
1216  // sides (if required) after rotation.
1217  if ((LZ + FO + TO) > 30) {
1218  APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1219  APInt Extended = SignedInt34.sext(64);
1220  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1221  getI64Imm(*Extended.getRawData()));
1222  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1223  getI32Imm(TO), getI32Imm(LZ));
1224  }
1225  // Patterns : {******}{31 zeros}{******}
1226  // : {******}{31 ones}{******}
1227  // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1228  // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1229  // for the int<33> value and then use RLDICL without a mask to rotate it back.
1230  //
1231  // +------|--ones--|------+ +---ones--||---33 bit--+
1232  // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1233  // +----------------------+ +----------------------+
1234  // 63 0 63 0
1235  for (unsigned Shift = 0; Shift < 63; ++Shift) {
1236  uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1237  if (isInt<34>(RotImm)) {
1238  Result =
1239  CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1240  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1241  SDValue(Result, 0), getI32Imm(Shift),
1242  getI32Imm(0));
1243  }
1244  }
1245 
1246  // Patterns : High word == Low word
1247  // This is basically a splat of a 32 bit immediate.
1248  if (Hi32 == Lo32) {
1249  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1250  SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1251  getI32Imm(0)};
1252  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1253  }
1254 
1255  InstCnt = 3;
1256  // Catch-all
1257  // This pattern can form any 64 bit immediate in 3 instructions.
1258  SDNode *ResultHi =
1259  CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1260  SDNode *ResultLo =
1261  CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1262  SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1263  getI32Imm(0)};
1264  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1265 }
1266 
1267 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1268  unsigned *InstCnt = nullptr) {
1269  unsigned InstCntDirect = 0;
1270  // No more than 3 instructions is used if we can select the i64 immediate
1271  // directly.
1272  SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1273 
1274  const PPCSubtarget &Subtarget =
1276 
1277  // If we have prefixed instructions and there is a chance we can
1278  // materialize the constant with fewer prefixed instructions than
1279  // non-prefixed, try that.
1280  if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1281  unsigned InstCntDirectP = 0;
1282  SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1283  // Use the prefix case in either of two cases:
1284  // 1) We have no result from the non-prefix case to use.
1285  // 2) The non-prefix case uses more instructions than the prefix case.
1286  // If the prefix and non-prefix cases use the same number of instructions
1287  // we will prefer the non-prefix case.
1288  if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1289  if (InstCnt)
1290  *InstCnt = InstCntDirectP;
1291  return ResultP;
1292  }
1293  }
1294 
1295  if (Result) {
1296  if (InstCnt)
1297  *InstCnt = InstCntDirect;
1298  return Result;
1299  }
1300  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1301  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1302  };
1303  // Handle the upper 32 bit value.
1304  Result =
1305  selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1306  // Add in the last bits as required.
1307  if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
1308  Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1309  SDValue(Result, 0), getI32Imm(Hi16));
1310  ++InstCntDirect;
1311  }
1312  if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
1313  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1314  getI32Imm(Lo16));
1315  ++InstCntDirect;
1316  }
1317  if (InstCnt)
1318  *InstCnt = InstCntDirect;
1319  return Result;
1320 }
1321 
1322 // Select a 64-bit constant.
1324  SDLoc dl(N);
1325 
1326  // Get 64 bit value.
1327  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1328  if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1329  uint64_t SextImm = SignExtend64(Imm, MinSize);
1330  SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1331  if (isInt<16>(SextImm))
1332  return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1333  }
1334  return selectI64Imm(CurDAG, dl, Imm);
1335 }
1336 
1337 namespace {
1338 
1339 class BitPermutationSelector {
1340  struct ValueBit {
1341  SDValue V;
1342 
1343  // The bit number in the value, using a convention where bit 0 is the
1344  // lowest-order bit.
1345  unsigned Idx;
1346 
1347  // ConstZero means a bit we need to mask off.
1348  // Variable is a bit comes from an input variable.
1349  // VariableKnownToBeZero is also a bit comes from an input variable,
1350  // but it is known to be already zero. So we do not need to mask them.
1351  enum Kind {
1352  ConstZero,
1353  Variable,
1354  VariableKnownToBeZero
1355  } K;
1356 
1357  ValueBit(SDValue V, unsigned I, Kind K = Variable)
1358  : V(V), Idx(I), K(K) {}
1359  ValueBit(Kind K = Variable)
1360  : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
1361 
1362  bool isZero() const {
1363  return K == ConstZero || K == VariableKnownToBeZero;
1364  }
1365 
1366  bool hasValue() const {
1367  return K == Variable || K == VariableKnownToBeZero;
1368  }
1369 
1370  SDValue getValue() const {
1371  assert(hasValue() && "Cannot get the value of a constant bit");
1372  return V;
1373  }
1374 
1375  unsigned getValueBitIndex() const {
1376  assert(hasValue() && "Cannot get the value bit index of a constant bit");
1377  return Idx;
1378  }
1379  };
1380 
1381  // A bit group has the same underlying value and the same rotate factor.
1382  struct BitGroup {
1383  SDValue V;
1384  unsigned RLAmt;
1385  unsigned StartIdx, EndIdx;
1386 
1387  // This rotation amount assumes that the lower 32 bits of the quantity are
1388  // replicated in the high 32 bits by the rotation operator (which is done
1389  // by rlwinm and friends in 64-bit mode).
1390  bool Repl32;
1391  // Did converting to Repl32 == true change the rotation factor? If it did,
1392  // it decreased it by 32.
1393  bool Repl32CR;
1394  // Was this group coalesced after setting Repl32 to true?
1395  bool Repl32Coalesced;
1396 
1397  BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1398  : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1399  Repl32Coalesced(false) {
1400  LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1401  << " [" << S << ", " << E << "]\n");
1402  }
1403  };
1404 
1405  // Information on each (Value, RLAmt) pair (like the number of groups
1406  // associated with each) used to choose the lowering method.
1407  struct ValueRotInfo {
1408  SDValue V;
1409  unsigned RLAmt = std::numeric_limits<unsigned>::max();
1410  unsigned NumGroups = 0;
1411  unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1412  bool Repl32 = false;
1413 
1414  ValueRotInfo() = default;
1415 
1416  // For sorting (in reverse order) by NumGroups, and then by
1417  // FirstGroupStartIdx.
1418  bool operator < (const ValueRotInfo &Other) const {
1419  // We need to sort so that the non-Repl32 come first because, when we're
1420  // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1421  // masking operation.
1422  if (Repl32 < Other.Repl32)
1423  return true;
1424  else if (Repl32 > Other.Repl32)
1425  return false;
1426  else if (NumGroups > Other.NumGroups)
1427  return true;
1428  else if (NumGroups < Other.NumGroups)
1429  return false;
1430  else if (RLAmt == 0 && Other.RLAmt != 0)
1431  return true;
1432  else if (RLAmt != 0 && Other.RLAmt == 0)
1433  return false;
1434  else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1435  return true;
1436  return false;
1437  }
1438  };
1439 
1440  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1441  using ValueBitsMemoizer =
1443  ValueBitsMemoizer Memoizer;
1444 
1445  // Return a pair of bool and a SmallVector pointer to a memoization entry.
1446  // The bool is true if something interesting was deduced, otherwise if we're
1447  // providing only a generic representation of V (or something else likewise
1448  // uninteresting for instruction selection) through the SmallVector.
1449  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1450  unsigned NumBits) {
1451  auto &ValueEntry = Memoizer[V];
1452  if (ValueEntry)
1453  return std::make_pair(ValueEntry->first, &ValueEntry->second);
1454  ValueEntry.reset(new ValueBitsMemoizedValue());
1455  bool &Interesting = ValueEntry->first;
1456  SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1457  Bits.resize(NumBits);
1458 
1459  switch (V.getOpcode()) {
1460  default: break;
1461  case ISD::ROTL:
1462  if (isa<ConstantSDNode>(V.getOperand(1))) {
1463  unsigned RotAmt = V.getConstantOperandVal(1);
1464 
1465  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1466 
1467  for (unsigned i = 0; i < NumBits; ++i)
1468  Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1469 
1470  return std::make_pair(Interesting = true, &Bits);
1471  }
1472  break;
1473  case ISD::SHL:
1474  case PPCISD::SHL:
1475  if (isa<ConstantSDNode>(V.getOperand(1))) {
1476  unsigned ShiftAmt = V.getConstantOperandVal(1);
1477 
1478  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1479 
1480  for (unsigned i = ShiftAmt; i < NumBits; ++i)
1481  Bits[i] = LHSBits[i - ShiftAmt];
1482 
1483  for (unsigned i = 0; i < ShiftAmt; ++i)
1484  Bits[i] = ValueBit(ValueBit::ConstZero);
1485 
1486  return std::make_pair(Interesting = true, &Bits);
1487  }
1488  break;
1489  case ISD::SRL:
1490  case PPCISD::SRL:
1491  if (isa<ConstantSDNode>(V.getOperand(1))) {
1492  unsigned ShiftAmt = V.getConstantOperandVal(1);
1493 
1494  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1495 
1496  for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1497  Bits[i] = LHSBits[i + ShiftAmt];
1498 
1499  for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1500  Bits[i] = ValueBit(ValueBit::ConstZero);
1501 
1502  return std::make_pair(Interesting = true, &Bits);
1503  }
1504  break;
1505  case ISD::AND:
1506  if (isa<ConstantSDNode>(V.getOperand(1))) {
1507  uint64_t Mask = V.getConstantOperandVal(1);
1508 
1509  const SmallVector<ValueBit, 64> *LHSBits;
1510  // Mark this as interesting, only if the LHS was also interesting. This
1511  // prevents the overall procedure from matching a single immediate 'and'
1512  // (which is non-optimal because such an and might be folded with other
1513  // things if we don't select it here).
1514  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1515 
1516  for (unsigned i = 0; i < NumBits; ++i)
1517  if (((Mask >> i) & 1) == 1)
1518  Bits[i] = (*LHSBits)[i];
1519  else {
1520  // AND instruction masks this bit. If the input is already zero,
1521  // we have nothing to do here. Otherwise, make the bit ConstZero.
1522  if ((*LHSBits)[i].isZero())
1523  Bits[i] = (*LHSBits)[i];
1524  else
1525  Bits[i] = ValueBit(ValueBit::ConstZero);
1526  }
1527 
1528  return std::make_pair(Interesting, &Bits);
1529  }
1530  break;
1531  case ISD::OR: {
1532  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1533  const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1534 
1535  bool AllDisjoint = true;
1536  SDValue LastVal = SDValue();
1537  unsigned LastIdx = 0;
1538  for (unsigned i = 0; i < NumBits; ++i) {
1539  if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1540  // If both inputs are known to be zero and one is ConstZero and
1541  // another is VariableKnownToBeZero, we can select whichever
1542  // we like. To minimize the number of bit groups, we select
1543  // VariableKnownToBeZero if this bit is the next bit of the same
1544  // input variable from the previous bit. Otherwise, we select
1545  // ConstZero.
1546  if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1547  LHSBits[i].getValueBitIndex() == LastIdx + 1)
1548  Bits[i] = LHSBits[i];
1549  else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1550  RHSBits[i].getValueBitIndex() == LastIdx + 1)
1551  Bits[i] = RHSBits[i];
1552  else
1553  Bits[i] = ValueBit(ValueBit::ConstZero);
1554  }
1555  else if (LHSBits[i].isZero())
1556  Bits[i] = RHSBits[i];
1557  else if (RHSBits[i].isZero())
1558  Bits[i] = LHSBits[i];
1559  else {
1560  AllDisjoint = false;
1561  break;
1562  }
1563  // We remember the value and bit index of this bit.
1564  if (Bits[i].hasValue()) {
1565  LastVal = Bits[i].getValue();
1566  LastIdx = Bits[i].getValueBitIndex();
1567  }
1568  else {
1569  if (LastVal) LastVal = SDValue();
1570  LastIdx = 0;
1571  }
1572  }
1573 
1574  if (!AllDisjoint)
1575  break;
1576 
1577  return std::make_pair(Interesting = true, &Bits);
1578  }
1579  case ISD::ZERO_EXTEND: {
1580  // We support only the case with zero extension from i32 to i64 so far.
1581  if (V.getValueType() != MVT::i64 ||
1582  V.getOperand(0).getValueType() != MVT::i32)
1583  break;
1584 
1585  const SmallVector<ValueBit, 64> *LHSBits;
1586  const unsigned NumOperandBits = 32;
1587  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1588  NumOperandBits);
1589 
1590  for (unsigned i = 0; i < NumOperandBits; ++i)
1591  Bits[i] = (*LHSBits)[i];
1592 
1593  for (unsigned i = NumOperandBits; i < NumBits; ++i)
1594  Bits[i] = ValueBit(ValueBit::ConstZero);
1595 
1596  return std::make_pair(Interesting, &Bits);
1597  }
1598  case ISD::TRUNCATE: {
1600  EVT ToType = V.getValueType();
1601  // We support only the case with truncate from i64 to i32.
1602  if (FromType != MVT::i64 || ToType != MVT::i32)
1603  break;
1604  const unsigned NumAllBits = FromType.getSizeInBits();
1605  SmallVector<ValueBit, 64> *InBits;
1606  std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1607  NumAllBits);
1608  const unsigned NumValidBits = ToType.getSizeInBits();
1609 
1610  // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1611  // So, we cannot include this truncate.
1612  bool UseUpper32bit = false;
1613  for (unsigned i = 0; i < NumValidBits; ++i)
1614  if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1615  UseUpper32bit = true;
1616  break;
1617  }
1618  if (UseUpper32bit)
1619  break;
1620 
1621  for (unsigned i = 0; i < NumValidBits; ++i)
1622  Bits[i] = (*InBits)[i];
1623 
1624  return std::make_pair(Interesting, &Bits);
1625  }
1626  case ISD::AssertZext: {
1627  // For AssertZext, we look through the operand and
1628  // mark the bits known to be zero.
1629  const SmallVector<ValueBit, 64> *LHSBits;
1630  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1631  NumBits);
1632 
1633  EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1634  const unsigned NumValidBits = FromType.getSizeInBits();
1635  for (unsigned i = 0; i < NumValidBits; ++i)
1636  Bits[i] = (*LHSBits)[i];
1637 
1638  // These bits are known to be zero but the AssertZext may be from a value
1639  // that already has some constant zero bits (i.e. from a masking and).
1640  for (unsigned i = NumValidBits; i < NumBits; ++i)
1641  Bits[i] = (*LHSBits)[i].hasValue()
1642  ? ValueBit((*LHSBits)[i].getValue(),
1643  (*LHSBits)[i].getValueBitIndex(),
1644  ValueBit::VariableKnownToBeZero)
1645  : ValueBit(ValueBit::ConstZero);
1646 
1647  return std::make_pair(Interesting, &Bits);
1648  }
1649  case ISD::LOAD:
1650  LoadSDNode *LD = cast<LoadSDNode>(V);
1651  if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1652  EVT VT = LD->getMemoryVT();
1653  const unsigned NumValidBits = VT.getSizeInBits();
1654 
1655  for (unsigned i = 0; i < NumValidBits; ++i)
1656  Bits[i] = ValueBit(V, i);
1657 
1658  // These bits are known to be zero.
1659  for (unsigned i = NumValidBits; i < NumBits; ++i)
1660  Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1661 
1662  // Zero-extending load itself cannot be optimized. So, it is not
1663  // interesting by itself though it gives useful information.
1664  return std::make_pair(Interesting = false, &Bits);
1665  }
1666  break;
1667  }
1668 
1669  for (unsigned i = 0; i < NumBits; ++i)
1670  Bits[i] = ValueBit(V, i);
1671 
1672  return std::make_pair(Interesting = false, &Bits);
1673  }
1674 
1675  // For each value (except the constant ones), compute the left-rotate amount
1676  // to get it from its original to final position.
1677  void computeRotationAmounts() {
1678  NeedMask = false;
1679  RLAmt.resize(Bits.size());
1680  for (unsigned i = 0; i < Bits.size(); ++i)
1681  if (Bits[i].hasValue()) {
1682  unsigned VBI = Bits[i].getValueBitIndex();
1683  if (i >= VBI)
1684  RLAmt[i] = i - VBI;
1685  else
1686  RLAmt[i] = Bits.size() - (VBI - i);
1687  } else if (Bits[i].isZero()) {
1688  NeedMask = true;
1689  RLAmt[i] = UINT32_MAX;
1690  } else {
1691  llvm_unreachable("Unknown value bit type");
1692  }
1693  }
1694 
1695  // Collect groups of consecutive bits with the same underlying value and
1696  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1697  // they break up groups.
1698  void collectBitGroups(bool LateMask) {
1699  BitGroups.clear();
1700 
1701  unsigned LastRLAmt = RLAmt[0];
1702  SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1703  unsigned LastGroupStartIdx = 0;
1704  bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1705  for (unsigned i = 1; i < Bits.size(); ++i) {
1706  unsigned ThisRLAmt = RLAmt[i];
1707  SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1708  if (LateMask && !ThisValue) {
1709  ThisValue = LastValue;
1710  ThisRLAmt = LastRLAmt;
1711  // If we're doing late masking, then the first bit group always starts
1712  // at zero (even if the first bits were zero).
1713  if (BitGroups.empty())
1714  LastGroupStartIdx = 0;
1715  }
1716 
1717  // If this bit is known to be zero and the current group is a bit group
1718  // of zeros, we do not need to terminate the current bit group even the
1719  // Value or RLAmt does not match here. Instead, we terminate this group
1720  // when the first non-zero bit appears later.
1721  if (IsGroupOfZeros && Bits[i].isZero())
1722  continue;
1723 
1724  // If this bit has the same underlying value and the same rotate factor as
1725  // the last one, then they're part of the same group.
1726  if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1727  // We cannot continue the current group if this bits is not known to
1728  // be zero in a bit group of zeros.
1729  if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1730  continue;
1731 
1732  if (LastValue.getNode())
1733  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1734  i-1));
1735  LastRLAmt = ThisRLAmt;
1736  LastValue = ThisValue;
1737  LastGroupStartIdx = i;
1738  IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1739  }
1740  if (LastValue.getNode())
1741  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1742  Bits.size()-1));
1743 
1744  if (BitGroups.empty())
1745  return;
1746 
1747  // We might be able to combine the first and last groups.
1748  if (BitGroups.size() > 1) {
1749  // If the first and last groups are the same, then remove the first group
1750  // in favor of the last group, making the ending index of the last group
1751  // equal to the ending index of the to-be-removed first group.
1752  if (BitGroups[0].StartIdx == 0 &&
1753  BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1754  BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1755  BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1756  LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1757  BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1758  BitGroups.erase(BitGroups.begin());
1759  }
1760  }
1761  }
1762 
1763  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1764  // associated with each. If the number of groups are same, we prefer a group
1765  // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1766  // instruction. If there is a degeneracy, pick the one that occurs
1767  // first (in the final value).
1768  void collectValueRotInfo() {
1769  ValueRots.clear();
1770 
1771  for (auto &BG : BitGroups) {
1772  unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1773  ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1774  VRI.V = BG.V;
1775  VRI.RLAmt = BG.RLAmt;
1776  VRI.Repl32 = BG.Repl32;
1777  VRI.NumGroups += 1;
1778  VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1779  }
1780 
1781  // Now that we've collected the various ValueRotInfo instances, we need to
1782  // sort them.
1783  ValueRotsVec.clear();
1784  for (auto &I : ValueRots) {
1785  ValueRotsVec.push_back(I.second);
1786  }
1787  llvm::sort(ValueRotsVec);
1788  }
1789 
1790  // In 64-bit mode, rlwinm and friends have a rotation operator that
1791  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1792  // indices of these instructions can only be in the lower 32 bits, so they
1793  // can only represent some 64-bit bit groups. However, when they can be used,
1794  // the 32-bit replication can be used to represent, as a single bit group,
1795  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1796  // groups when possible. Returns true if any of the bit groups were
1797  // converted.
1798  void assignRepl32BitGroups() {
1799  // If we have bits like this:
1800  //
1801  // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1802  // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1803  // Groups: | RLAmt = 8 | RLAmt = 40 |
1804  //
1805  // But, making use of a 32-bit operation that replicates the low-order 32
1806  // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1807  // of 8.
1808 
1809  auto IsAllLow32 = [this](BitGroup & BG) {
1810  if (BG.StartIdx <= BG.EndIdx) {
1811  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1812  if (!Bits[i].hasValue())
1813  continue;
1814  if (Bits[i].getValueBitIndex() >= 32)
1815  return false;
1816  }
1817  } else {
1818  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1819  if (!Bits[i].hasValue())
1820  continue;
1821  if (Bits[i].getValueBitIndex() >= 32)
1822  return false;
1823  }
1824  for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1825  if (!Bits[i].hasValue())
1826  continue;
1827  if (Bits[i].getValueBitIndex() >= 32)
1828  return false;
1829  }
1830  }
1831 
1832  return true;
1833  };
1834 
1835  for (auto &BG : BitGroups) {
1836  // If this bit group has RLAmt of 0 and will not be merged with
1837  // another bit group, we don't benefit from Repl32. We don't mark
1838  // such group to give more freedom for later instruction selection.
1839  if (BG.RLAmt == 0) {
1840  auto PotentiallyMerged = [this](BitGroup & BG) {
1841  for (auto &BG2 : BitGroups)
1842  if (&BG != &BG2 && BG.V == BG2.V &&
1843  (BG2.RLAmt == 0 || BG2.RLAmt == 32))
1844  return true;
1845  return false;
1846  };
1847  if (!PotentiallyMerged(BG))
1848  continue;
1849  }
1850  if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1851  if (IsAllLow32(BG)) {
1852  if (BG.RLAmt >= 32) {
1853  BG.RLAmt -= 32;
1854  BG.Repl32CR = true;
1855  }
1856 
1857  BG.Repl32 = true;
1858 
1859  LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1860  << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
1861  << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1862  }
1863  }
1864  }
1865 
1866  // Now walk through the bit groups, consolidating where possible.
1867  for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1868  // We might want to remove this bit group by merging it with the previous
1869  // group (which might be the ending group).
1870  auto IP = (I == BitGroups.begin()) ?
1871  std::prev(BitGroups.end()) : std::prev(I);
1872  if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1873  I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1874 
1875  LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1876  << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
1877  << I->StartIdx << ", " << I->EndIdx
1878  << "] with group with range [" << IP->StartIdx << ", "
1879  << IP->EndIdx << "]\n");
1880 
1881  IP->EndIdx = I->EndIdx;
1882  IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1883  IP->Repl32Coalesced = true;
1884  I = BitGroups.erase(I);
1885  continue;
1886  } else {
1887  // There is a special case worth handling: If there is a single group
1888  // covering the entire upper 32 bits, and it can be merged with both
1889  // the next and previous groups (which might be the same group), then
1890  // do so. If it is the same group (so there will be only one group in
1891  // total), then we need to reverse the order of the range so that it
1892  // covers the entire 64 bits.
1893  if (I->StartIdx == 32 && I->EndIdx == 63) {
1894  assert(std::next(I) == BitGroups.end() &&
1895  "bit group ends at index 63 but there is another?");
1896  auto IN = BitGroups.begin();
1897 
1898  if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1899  (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1900  IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1901  IsAllLow32(*I)) {
1902 
1903  LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
1904  << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
1905  << ", " << I->EndIdx
1906  << "] with 32-bit replicated groups with ranges ["
1907  << IP->StartIdx << ", " << IP->EndIdx << "] and ["
1908  << IN->StartIdx << ", " << IN->EndIdx << "]\n");
1909 
1910  if (IP == IN) {
1911  // There is only one other group; change it to cover the whole
1912  // range (backward, so that it can still be Repl32 but cover the
1913  // whole 64-bit range).
1914  IP->StartIdx = 31;
1915  IP->EndIdx = 30;
1916  IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1917  IP->Repl32Coalesced = true;
1918  I = BitGroups.erase(I);
1919  } else {
1920  // There are two separate groups, one before this group and one
1921  // after us (at the beginning). We're going to remove this group,
1922  // but also the group at the very beginning.
1923  IP->EndIdx = IN->EndIdx;
1924  IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1925  IP->Repl32Coalesced = true;
1926  I = BitGroups.erase(I);
1927  BitGroups.erase(BitGroups.begin());
1928  }
1929 
1930  // This must be the last group in the vector (and we might have
1931  // just invalidated the iterator above), so break here.
1932  break;
1933  }
1934  }
1935  }
1936 
1937  ++I;
1938  }
1939  }
1940 
1941  SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1942  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1943  }
1944 
1945  uint64_t getZerosMask() {
1946  uint64_t Mask = 0;
1947  for (unsigned i = 0; i < Bits.size(); ++i) {
1948  if (Bits[i].hasValue())
1949  continue;
1950  Mask |= (UINT64_C(1) << i);
1951  }
1952 
1953  return ~Mask;
1954  }
1955 
1956  // This method extends an input value to 64 bit if input is 32-bit integer.
1957  // While selecting instructions in BitPermutationSelector in 64-bit mode,
1958  // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1959  // In such case, we extend it to 64 bit to be consistent with other values.
1960  SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1961  if (V.getValueSizeInBits() == 64)
1962  return V;
1963 
1964  assert(V.getValueSizeInBits() == 32);
1965  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1966  SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1967  MVT::i64), 0);
1968  SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1969  MVT::i64, ImDef, V,
1970  SubRegIdx), 0);
1971  return ExtVal;
1972  }
1973 
1974  SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
1975  if (V.getValueSizeInBits() == 32)
1976  return V;
1977 
1978  assert(V.getValueSizeInBits() == 64);
1979  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1980  SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
1981  MVT::i32, V, SubRegIdx), 0);
1982  return SubVal;
1983  }
1984 
1985  // Depending on the number of groups for a particular value, it might be
1986  // better to rotate, mask explicitly (using andi/andis), and then or the
1987  // result. Select this part of the result first.
1988  void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1990  return;
1991 
1992  for (ValueRotInfo &VRI : ValueRotsVec) {
1993  unsigned Mask = 0;
1994  for (unsigned i = 0; i < Bits.size(); ++i) {
1995  if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1996  continue;
1997  if (RLAmt[i] != VRI.RLAmt)
1998  continue;
1999  Mask |= (1u << i);
2000  }
2001 
2002  // Compute the masks for andi/andis that would be necessary.
2003  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2004  assert((ANDIMask != 0 || ANDISMask != 0) &&
2005  "No set bits in mask for value bit groups");
2006  bool NeedsRotate = VRI.RLAmt != 0;
2007 
2008  // We're trying to minimize the number of instructions. If we have one
2009  // group, using one of andi/andis can break even. If we have three
2010  // groups, we can use both andi and andis and break even (to use both
2011  // andi and andis we also need to or the results together). We need four
2012  // groups if we also need to rotate. To use andi/andis we need to do more
2013  // than break even because rotate-and-mask instructions tend to be easier
2014  // to schedule.
2015 
2016  // FIXME: We've biased here against using andi/andis, which is right for
2017  // POWER cores, but not optimal everywhere. For example, on the A2,
2018  // andi/andis have single-cycle latency whereas the rotate-and-mask
2019  // instructions take two cycles, and it would be better to bias toward
2020  // andi/andis in break-even cases.
2021 
2022  unsigned NumAndInsts = (unsigned) NeedsRotate +
2023  (unsigned) (ANDIMask != 0) +
2024  (unsigned) (ANDISMask != 0) +
2025  (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2026  (unsigned) (bool) Res;
2027 
2028  LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2029  << " RL: " << VRI.RLAmt << ":"
2030  << "\n\t\t\tisel using masking: " << NumAndInsts
2031  << " using rotates: " << VRI.NumGroups << "\n");
2032 
2033  if (NumAndInsts >= VRI.NumGroups)
2034  continue;
2035 
2036  LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2037 
2038  if (InstCnt) *InstCnt += NumAndInsts;
2039 
2040  SDValue VRot;
2041  if (VRI.RLAmt) {
2042  SDValue Ops[] =
2043  { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2044  getI32Imm(0, dl), getI32Imm(31, dl) };
2045  VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2046  Ops), 0);
2047  } else {
2048  VRot = TruncateToInt32(VRI.V, dl);
2049  }
2050 
2051  SDValue ANDIVal, ANDISVal;
2052  if (ANDIMask != 0)
2053  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2054  VRot, getI32Imm(ANDIMask, dl)),
2055  0);
2056  if (ANDISMask != 0)
2057  ANDISVal =
2058  SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2059  getI32Imm(ANDISMask, dl)),
2060  0);
2061 
2062  SDValue TotalVal;
2063  if (!ANDIVal)
2064  TotalVal = ANDISVal;
2065  else if (!ANDISVal)
2066  TotalVal = ANDIVal;
2067  else
2068  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2069  ANDIVal, ANDISVal), 0);
2070 
2071  if (!Res)
2072  Res = TotalVal;
2073  else
2074  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2075  Res, TotalVal), 0);
2076 
2077  // Now, remove all groups with this underlying value and rotation
2078  // factor.
2079  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2080  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2081  });
2082  }
2083  }
2084 
2085  // Instruction selection for the 32-bit case.
2086  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2087  SDLoc dl(N);
2088  SDValue Res;
2089 
2090  if (InstCnt) *InstCnt = 0;
2091 
2092  // Take care of cases that should use andi/andis first.
2093  SelectAndParts32(dl, Res, InstCnt);
2094 
2095  // If we've not yet selected a 'starting' instruction, and we have no zeros
2096  // to fill in, select the (Value, RLAmt) with the highest priority (largest
2097  // number of groups), and start with this rotated value.
2098  if ((!NeedMask || LateMask) && !Res) {
2099  ValueRotInfo &VRI = ValueRotsVec[0];
2100  if (VRI.RLAmt) {
2101  if (InstCnt) *InstCnt += 1;
2102  SDValue Ops[] =
2103  { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2104  getI32Imm(0, dl), getI32Imm(31, dl) };
2105  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2106  0);
2107  } else {
2108  Res = TruncateToInt32(VRI.V, dl);
2109  }
2110 
2111  // Now, remove all groups with this underlying value and rotation factor.
2112  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2113  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2114  });
2115  }
2116 
2117  if (InstCnt) *InstCnt += BitGroups.size();
2118 
2119  // Insert the other groups (one at a time).
2120  for (auto &BG : BitGroups) {
2121  if (!Res) {
2122  SDValue Ops[] =
2123  { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2124  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2125  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2126  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2127  } else {
2128  SDValue Ops[] =
2129  { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2130  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2131  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2132  Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2133  }
2134  }
2135 
2136  if (LateMask) {
2137  unsigned Mask = (unsigned) getZerosMask();
2138 
2139  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2140  assert((ANDIMask != 0 || ANDISMask != 0) &&
2141  "No set bits in zeros mask?");
2142 
2143  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2144  (unsigned) (ANDISMask != 0) +
2145  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2146 
2147  SDValue ANDIVal, ANDISVal;
2148  if (ANDIMask != 0)
2149  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2150  Res, getI32Imm(ANDIMask, dl)),
2151  0);
2152  if (ANDISMask != 0)
2153  ANDISVal =
2154  SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2155  getI32Imm(ANDISMask, dl)),
2156  0);
2157 
2158  if (!ANDIVal)
2159  Res = ANDISVal;
2160  else if (!ANDISVal)
2161  Res = ANDIVal;
2162  else
2163  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2164  ANDIVal, ANDISVal), 0);
2165  }
2166 
2167  return Res.getNode();
2168  }
2169 
2170  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2171  unsigned MaskStart, unsigned MaskEnd,
2172  bool IsIns) {
2173  // In the notation used by the instructions, 'start' and 'end' are reversed
2174  // because bits are counted from high to low order.
2175  unsigned InstMaskStart = 64 - MaskEnd - 1,
2176  InstMaskEnd = 64 - MaskStart - 1;
2177 
2178  if (Repl32)
2179  return 1;
2180 
2181  if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2182  InstMaskEnd == 63 - RLAmt)
2183  return 1;
2184 
2185  return 2;
2186  }
2187 
2188  // For 64-bit values, not all combinations of rotates and masks are
2189  // available. Produce one if it is available.
2190  SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2191  bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2192  unsigned *InstCnt = nullptr) {
2193  // In the notation used by the instructions, 'start' and 'end' are reversed
2194  // because bits are counted from high to low order.
2195  unsigned InstMaskStart = 64 - MaskEnd - 1,
2196  InstMaskEnd = 64 - MaskStart - 1;
2197 
2198  if (InstCnt) *InstCnt += 1;
2199 
2200  if (Repl32) {
2201  // This rotation amount assumes that the lower 32 bits of the quantity
2202  // are replicated in the high 32 bits by the rotation operator (which is
2203  // done by rlwinm and friends).
2204  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2205  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2206  SDValue Ops[] =
2207  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2208  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2209  return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2210  Ops), 0);
2211  }
2212 
2213  if (InstMaskEnd == 63) {
2214  SDValue Ops[] =
2215  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2216  getI32Imm(InstMaskStart, dl) };
2217  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2218  }
2219 
2220  if (InstMaskStart == 0) {
2221  SDValue Ops[] =
2222  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2223  getI32Imm(InstMaskEnd, dl) };
2224  return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2225  }
2226 
2227  if (InstMaskEnd == 63 - RLAmt) {
2228  SDValue Ops[] =
2229  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2230  getI32Imm(InstMaskStart, dl) };
2231  return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2232  }
2233 
2234  // We cannot do this with a single instruction, so we'll use two. The
2235  // problem is that we're not free to choose both a rotation amount and mask
2236  // start and end independently. We can choose an arbitrary mask start and
2237  // end, but then the rotation amount is fixed. Rotation, however, can be
2238  // inverted, and so by applying an "inverse" rotation first, we can get the
2239  // desired result.
2240  if (InstCnt) *InstCnt += 1;
2241 
2242  // The rotation mask for the second instruction must be MaskStart.
2243  unsigned RLAmt2 = MaskStart;
2244  // The first instruction must rotate V so that the overall rotation amount
2245  // is RLAmt.
2246  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2247  if (RLAmt1)
2248  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2249  return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2250  }
2251 
2252  // For 64-bit values, not all combinations of rotates and masks are
2253  // available. Produce a rotate-mask-and-insert if one is available.
2254  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2255  unsigned RLAmt, bool Repl32, unsigned MaskStart,
2256  unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2257  // In the notation used by the instructions, 'start' and 'end' are reversed
2258  // because bits are counted from high to low order.
2259  unsigned InstMaskStart = 64 - MaskEnd - 1,
2260  InstMaskEnd = 64 - MaskStart - 1;
2261 
2262  if (InstCnt) *InstCnt += 1;
2263 
2264  if (Repl32) {
2265  // This rotation amount assumes that the lower 32 bits of the quantity
2266  // are replicated in the high 32 bits by the rotation operator (which is
2267  // done by rlwinm and friends).
2268  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2269  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2270  SDValue Ops[] =
2271  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2272  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2273  return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2274  Ops), 0);
2275  }
2276 
2277  if (InstMaskEnd == 63 - RLAmt) {
2278  SDValue Ops[] =
2279  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2280  getI32Imm(InstMaskStart, dl) };
2281  return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2282  }
2283 
2284  // We cannot do this with a single instruction, so we'll use two. The
2285  // problem is that we're not free to choose both a rotation amount and mask
2286  // start and end independently. We can choose an arbitrary mask start and
2287  // end, but then the rotation amount is fixed. Rotation, however, can be
2288  // inverted, and so by applying an "inverse" rotation first, we can get the
2289  // desired result.
2290  if (InstCnt) *InstCnt += 1;
2291 
2292  // The rotation mask for the second instruction must be MaskStart.
2293  unsigned RLAmt2 = MaskStart;
2294  // The first instruction must rotate V so that the overall rotation amount
2295  // is RLAmt.
2296  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2297  if (RLAmt1)
2298  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2299  return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2300  }
2301 
2302  void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2304  return;
2305 
2306  // The idea here is the same as in the 32-bit version, but with additional
2307  // complications from the fact that Repl32 might be true. Because we
2308  // aggressively convert bit groups to Repl32 form (which, for small
2309  // rotation factors, involves no other change), and then coalesce, it might
2310  // be the case that a single 64-bit masking operation could handle both
2311  // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2312  // form allowed coalescing, then we must use a 32-bit rotaton in order to
2313  // completely capture the new combined bit group.
2314 
2315  for (ValueRotInfo &VRI : ValueRotsVec) {
2316  uint64_t Mask = 0;
2317 
2318  // We need to add to the mask all bits from the associated bit groups.
2319  // If Repl32 is false, we need to add bits from bit groups that have
2320  // Repl32 true, but are trivially convertable to Repl32 false. Such a
2321  // group is trivially convertable if it overlaps only with the lower 32
2322  // bits, and the group has not been coalesced.
2323  auto MatchingBG = [VRI](const BitGroup &BG) {
2324  if (VRI.V != BG.V)
2325  return false;
2326 
2327  unsigned EffRLAmt = BG.RLAmt;
2328  if (!VRI.Repl32 && BG.Repl32) {
2329  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2330  !BG.Repl32Coalesced) {
2331  if (BG.Repl32CR)
2332  EffRLAmt += 32;
2333  } else {
2334  return false;
2335  }
2336  } else if (VRI.Repl32 != BG.Repl32) {
2337  return false;
2338  }
2339 
2340  return VRI.RLAmt == EffRLAmt;
2341  };
2342 
2343  for (auto &BG : BitGroups) {
2344  if (!MatchingBG(BG))
2345  continue;
2346 
2347  if (BG.StartIdx <= BG.EndIdx) {
2348  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2349  Mask |= (UINT64_C(1) << i);
2350  } else {
2351  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2352  Mask |= (UINT64_C(1) << i);
2353  for (unsigned i = 0; i <= BG.EndIdx; ++i)
2354  Mask |= (UINT64_C(1) << i);
2355  }
2356  }
2357 
2358  // We can use the 32-bit andi/andis technique if the mask does not
2359  // require any higher-order bits. This can save an instruction compared
2360  // to always using the general 64-bit technique.
2361  bool Use32BitInsts = isUInt<32>(Mask);
2362  // Compute the masks for andi/andis that would be necessary.
2363  unsigned ANDIMask = (Mask & UINT16_MAX),
2364  ANDISMask = (Mask >> 16) & UINT16_MAX;
2365 
2366  bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2367 
2368  unsigned NumAndInsts = (unsigned) NeedsRotate +
2369  (unsigned) (bool) Res;
2370  unsigned NumOfSelectInsts = 0;
2371  selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2372  assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2373  if (Use32BitInsts)
2374  NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2375  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2376  else
2377  NumAndInsts += NumOfSelectInsts + /* and */ 1;
2378 
2379  unsigned NumRLInsts = 0;
2380  bool FirstBG = true;
2381  bool MoreBG = false;
2382  for (auto &BG : BitGroups) {
2383  if (!MatchingBG(BG)) {
2384  MoreBG = true;
2385  continue;
2386  }
2387  NumRLInsts +=
2388  SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2389  !FirstBG);
2390  FirstBG = false;
2391  }
2392 
2393  LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2394  << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2395  << "\n\t\t\tisel using masking: " << NumAndInsts
2396  << " using rotates: " << NumRLInsts << "\n");
2397 
2398  // When we'd use andi/andis, we bias toward using the rotates (andi only
2399  // has a record form, and is cracked on POWER cores). However, when using
2400  // general 64-bit constant formation, bias toward the constant form,
2401  // because that exposes more opportunities for CSE.
2402  if (NumAndInsts > NumRLInsts)
2403  continue;
2404  // When merging multiple bit groups, instruction or is used.
2405  // But when rotate is used, rldimi can inert the rotated value into any
2406  // register, so instruction or can be avoided.
2407  if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2408  continue;
2409 
2410  LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2411 
2412  if (InstCnt) *InstCnt += NumAndInsts;
2413 
2414  SDValue VRot;
2415  // We actually need to generate a rotation if we have a non-zero rotation
2416  // factor or, in the Repl32 case, if we care about any of the
2417  // higher-order replicated bits. In the latter case, we generate a mask
2418  // backward so that it actually includes the entire 64 bits.
2419  if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2420  VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2421  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2422  else
2423  VRot = VRI.V;
2424 
2425  SDValue TotalVal;
2426  if (Use32BitInsts) {
2427  assert((ANDIMask != 0 || ANDISMask != 0) &&
2428  "No set bits in mask when using 32-bit ands for 64-bit value");
2429 
2430  SDValue ANDIVal, ANDISVal;
2431  if (ANDIMask != 0)
2432  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2433  ExtendToInt64(VRot, dl),
2434  getI32Imm(ANDIMask, dl)),
2435  0);
2436  if (ANDISMask != 0)
2437  ANDISVal =
2438  SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2439  ExtendToInt64(VRot, dl),
2440  getI32Imm(ANDISMask, dl)),
2441  0);
2442 
2443  if (!ANDIVal)
2444  TotalVal = ANDISVal;
2445  else if (!ANDISVal)
2446  TotalVal = ANDIVal;
2447  else
2448  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2449  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2450  } else {
2451  TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2452  TotalVal =
2453  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2454  ExtendToInt64(VRot, dl), TotalVal),
2455  0);
2456  }
2457 
2458  if (!Res)
2459  Res = TotalVal;
2460  else
2461  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2462  ExtendToInt64(Res, dl), TotalVal),
2463  0);
2464 
2465  // Now, remove all groups with this underlying value and rotation
2466  // factor.
2467  eraseMatchingBitGroups(MatchingBG);
2468  }
2469  }
2470 
2471  // Instruction selection for the 64-bit case.
2472  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2473  SDLoc dl(N);
2474  SDValue Res;
2475 
2476  if (InstCnt) *InstCnt = 0;
2477 
2478  // Take care of cases that should use andi/andis first.
2479  SelectAndParts64(dl, Res, InstCnt);
2480 
2481  // If we've not yet selected a 'starting' instruction, and we have no zeros
2482  // to fill in, select the (Value, RLAmt) with the highest priority (largest
2483  // number of groups), and start with this rotated value.
2484  if ((!NeedMask || LateMask) && !Res) {
2485  // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2486  // groups will come first, and so the VRI representing the largest number
2487  // of groups might not be first (it might be the first Repl32 groups).
2488  unsigned MaxGroupsIdx = 0;
2489  if (!ValueRotsVec[0].Repl32) {
2490  for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2491  if (ValueRotsVec[i].Repl32) {
2492  if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2493  MaxGroupsIdx = i;
2494  break;
2495  }
2496  }
2497 
2498  ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2499  bool NeedsRotate = false;
2500  if (VRI.RLAmt) {
2501  NeedsRotate = true;
2502  } else if (VRI.Repl32) {
2503  for (auto &BG : BitGroups) {
2504  if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2505  BG.Repl32 != VRI.Repl32)
2506  continue;
2507 
2508  // We don't need a rotate if the bit group is confined to the lower
2509  // 32 bits.
2510  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2511  continue;
2512 
2513  NeedsRotate = true;
2514  break;
2515  }
2516  }
2517 
2518  if (NeedsRotate)
2519  Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2520  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2521  InstCnt);
2522  else
2523  Res = VRI.V;
2524 
2525  // Now, remove all groups with this underlying value and rotation factor.
2526  if (Res)
2527  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2528  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2529  BG.Repl32 == VRI.Repl32;
2530  });
2531  }
2532 
2533  // Because 64-bit rotates are more flexible than inserts, we might have a
2534  // preference regarding which one we do first (to save one instruction).
2535  if (!Res)
2536  for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2537  if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2538  false) <
2539  SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2540  true)) {
2541  if (I != BitGroups.begin()) {
2542  BitGroup BG = *I;
2543  BitGroups.erase(I);
2544  BitGroups.insert(BitGroups.begin(), BG);
2545  }
2546 
2547  break;
2548  }
2549  }
2550 
2551  // Insert the other groups (one at a time).
2552  for (auto &BG : BitGroups) {
2553  if (!Res)
2554  Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2555  BG.EndIdx, InstCnt);
2556  else
2557  Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2558  BG.StartIdx, BG.EndIdx, InstCnt);
2559  }
2560 
2561  if (LateMask) {
2562  uint64_t Mask = getZerosMask();
2563 
2564  // We can use the 32-bit andi/andis technique if the mask does not
2565  // require any higher-order bits. This can save an instruction compared
2566  // to always using the general 64-bit technique.
2567  bool Use32BitInsts = isUInt<32>(Mask);
2568  // Compute the masks for andi/andis that would be necessary.
2569  unsigned ANDIMask = (Mask & UINT16_MAX),
2570  ANDISMask = (Mask >> 16) & UINT16_MAX;
2571 
2572  if (Use32BitInsts) {
2573  assert((ANDIMask != 0 || ANDISMask != 0) &&
2574  "No set bits in mask when using 32-bit ands for 64-bit value");
2575 
2576  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2577  (unsigned) (ANDISMask != 0) +
2578  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2579 
2580  SDValue ANDIVal, ANDISVal;
2581  if (ANDIMask != 0)
2582  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2583  ExtendToInt64(Res, dl),
2584  getI32Imm(ANDIMask, dl)),
2585  0);
2586  if (ANDISMask != 0)
2587  ANDISVal =
2588  SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2589  ExtendToInt64(Res, dl),
2590  getI32Imm(ANDISMask, dl)),
2591  0);
2592 
2593  if (!ANDIVal)
2594  Res = ANDISVal;
2595  else if (!ANDISVal)
2596  Res = ANDIVal;
2597  else
2598  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2599  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2600  } else {
2601  unsigned NumOfSelectInsts = 0;
2602  SDValue MaskVal =
2603  SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2604  Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2605  ExtendToInt64(Res, dl), MaskVal),
2606  0);
2607  if (InstCnt)
2608  *InstCnt += NumOfSelectInsts + /* and */ 1;
2609  }
2610  }
2611 
2612  return Res.getNode();
2613  }
2614 
2615  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2616  // Fill in BitGroups.
2617  collectBitGroups(LateMask);
2618  if (BitGroups.empty())
2619  return nullptr;
2620 
2621  // For 64-bit values, figure out when we can use 32-bit instructions.
2622  if (Bits.size() == 64)
2623  assignRepl32BitGroups();
2624 
2625  // Fill in ValueRotsVec.
2626  collectValueRotInfo();
2627 
2628  if (Bits.size() == 32) {
2629  return Select32(N, LateMask, InstCnt);
2630  } else {
2631  assert(Bits.size() == 64 && "Not 64 bits here?");
2632  return Select64(N, LateMask, InstCnt);
2633  }
2634 
2635  return nullptr;
2636  }
2637 
2638  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2639  erase_if(BitGroups, F);
2640  }
2641 
2643 
2644  bool NeedMask = false;
2646 
2647  SmallVector<BitGroup, 16> BitGroups;
2648 
2649  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2650  SmallVector<ValueRotInfo, 16> ValueRotsVec;
2651 
2652  SelectionDAG *CurDAG = nullptr;
2653 
2654 public:
2655  BitPermutationSelector(SelectionDAG *DAG)
2656  : CurDAG(DAG) {}
2657 
2658  // Here we try to match complex bit permutations into a set of
2659  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2660  // known to produce optimal code for common cases (like i32 byte swapping).
2661  SDNode *Select(SDNode *N) {
2662  Memoizer.clear();
2663  auto Result =
2664  getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2665  if (!Result.first)
2666  return nullptr;
2667  Bits = std::move(*Result.second);
2668 
2669  LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2670  " selection for: ");
2671  LLVM_DEBUG(N->dump(CurDAG));
2672 
2673  // Fill it RLAmt and set NeedMask.
2674  computeRotationAmounts();
2675 
2676  if (!NeedMask)
2677  return Select(N, false);
2678 
2679  // We currently have two techniques for handling results with zeros: early
2680  // masking (the default) and late masking. Late masking is sometimes more
2681  // efficient, but because the structure of the bit groups is different, it
2682  // is hard to tell without generating both and comparing the results. With
2683  // late masking, we ignore zeros in the resulting value when inserting each
2684  // set of bit groups, and then mask in the zeros at the end. With early
2685  // masking, we only insert the non-zero parts of the result at every step.
2686 
2687  unsigned InstCnt = 0, InstCntLateMask = 0;
2688  LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2689  SDNode *RN = Select(N, false, &InstCnt);
2690  LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2691 
2692  LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2693  SDNode *RNLM = Select(N, true, &InstCntLateMask);
2694  LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2695  << " instructions\n");
2696 
2697  if (InstCnt <= InstCntLateMask) {
2698  LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2699  return RN;
2700  }
2701 
2702  LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2703  return RNLM;
2704  }
2705 };
2706 
2707 class IntegerCompareEliminator {
2708  SelectionDAG *CurDAG;
2709  PPCDAGToDAGISel *S;
2710  // Conversion type for interpreting results of a 32-bit instruction as
2711  // a 64-bit value or vice versa.
2712  enum ExtOrTruncConversion { Ext, Trunc };
2713 
2714  // Modifiers to guide how an ISD::SETCC node's result is to be computed
2715  // in a GPR.
2716  // ZExtOrig - use the original condition code, zero-extend value
2717  // ZExtInvert - invert the condition code, zero-extend value
2718  // SExtOrig - use the original condition code, sign-extend value
2719  // SExtInvert - invert the condition code, sign-extend value
2720  enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2721 
2722  // Comparisons against zero to emit GPR code sequences for. Each of these
2723  // sequences may need to be emitted for two or more equivalent patterns.
2724  // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2725  // matters as well as the extension type: sext (-1/0), zext (1/0).
2726  // GEZExt - (zext (LHS >= 0))
2727  // GESExt - (sext (LHS >= 0))
2728  // LEZExt - (zext (LHS <= 0))
2729  // LESExt - (sext (LHS <= 0))
2730  enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2731 
2732  SDNode *tryEXTEND(SDNode *N);
2733  SDNode *tryLogicOpOfCompares(SDNode *N);
2734  SDValue computeLogicOpInGPR(SDValue LogicOp);
2735  SDValue signExtendInputIfNeeded(SDValue Input);
2736  SDValue zeroExtendInputIfNeeded(SDValue Input);
2737  SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2738  SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2739  ZeroCompare CmpTy);
2740  SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2741  int64_t RHSValue, SDLoc dl);
2742  SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2743  int64_t RHSValue, SDLoc dl);
2744  SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2745  int64_t RHSValue, SDLoc dl);
2746  SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2747  int64_t RHSValue, SDLoc dl);
2748  SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2749 
2750 public:
2751  IntegerCompareEliminator(SelectionDAG *DAG,
2752  PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2753  assert(CurDAG->getTargetLoweringInfo()
2754  .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2755  "Only expecting to use this on 64 bit targets.");
2756  }
2757  SDNode *Select(SDNode *N) {
2758  if (CmpInGPR == ICGPR_None)
2759  return nullptr;
2760  switch (N->getOpcode()) {
2761  default: break;
2762  case ISD::ZERO_EXTEND:
2763  if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2765  return nullptr;
2767  case ISD::SIGN_EXTEND:
2768  if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2770  return nullptr;
2771  return tryEXTEND(N);
2772  case ISD::AND:
2773  case ISD::OR:
2774  case ISD::XOR:
2775  return tryLogicOpOfCompares(N);
2776  }
2777  return nullptr;
2778  }
2779 };
2780 
2781 static bool isLogicOp(unsigned Opc) {
2782  return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2783 }
2784 // The obvious case for wanting to keep the value in a GPR. Namely, the
2785 // result of the comparison is actually needed in a GPR.
2786 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2787  assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2788  N->getOpcode() == ISD::SIGN_EXTEND) &&
2789  "Expecting a zero/sign extend node!");
2790  SDValue WideRes;
2791  // If we are zero-extending the result of a logical operation on i1
2792  // values, we can keep the values in GPRs.
2793  if (isLogicOp(N->getOperand(0).getOpcode()) &&
2794  N->getOperand(0).getValueType() == MVT::i1 &&
2795  N->getOpcode() == ISD::ZERO_EXTEND)
2796  WideRes = computeLogicOpInGPR(N->getOperand(0));
2797  else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2798  return nullptr;
2799  else
2800  WideRes =
2801  getSETCCInGPR(N->getOperand(0),
2802  N->getOpcode() == ISD::SIGN_EXTEND ?
2803  SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2804 
2805  if (!WideRes)
2806  return nullptr;
2807 
2808  SDLoc dl(N);
2809  bool Input32Bit = WideRes.getValueType() == MVT::i32;
2810  bool Output32Bit = N->getValueType(0) == MVT::i32;
2811 
2812  NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2813  NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2814 
2815  SDValue ConvOp = WideRes;
2816  if (Input32Bit != Output32Bit)
2817  ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2818  ExtOrTruncConversion::Trunc);
2819  return ConvOp.getNode();
2820 }
2821 
2822 // Attempt to perform logical operations on the results of comparisons while
2823 // keeping the values in GPRs. Without doing so, these would end up being
2824 // lowered to CR-logical operations which suffer from significant latency and
2825 // low ILP.
2826 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2827  if (N->getValueType(0) != MVT::i1)
2828  return nullptr;
2829  assert(isLogicOp(N->getOpcode()) &&
2830  "Expected a logic operation on setcc results.");
2831  SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2832  if (!LoweredLogical)
2833  return nullptr;
2834 
2835  SDLoc dl(N);
2836  bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2837  unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2838  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2839  SDValue LHS = LoweredLogical.getOperand(0);
2840  SDValue RHS = LoweredLogical.getOperand(1);
2841  SDValue WideOp;
2842  SDValue OpToConvToRecForm;
2843 
2844  // Look through any 32-bit to 64-bit implicit extend nodes to find the
2845  // opcode that is input to the XORI.
2846  if (IsBitwiseNegate &&
2847  LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2848  OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2849  else if (IsBitwiseNegate)
2850  // If the input to the XORI isn't an extension, that's what we're after.
2851  OpToConvToRecForm = LoweredLogical.getOperand(0);
2852  else
2853  // If this is not an XORI, it is a reg-reg logical op and we can convert
2854  // it to record-form.
2855  OpToConvToRecForm = LoweredLogical;
2856 
2857  // Get the record-form version of the node we're looking to use to get the
2858  // CR result from.
2859  uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2860  int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2861 
2862  // Convert the right node to record-form. This is either the logical we're
2863  // looking at or it is the input node to the negation (if we're looking at
2864  // a bitwise negation).
2865  if (NewOpc != -1 && IsBitwiseNegate) {
2866  // The input to the XORI has a record-form. Use it.
2867  assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2868  "Expected a PPC::XORI8 only for bitwise negation.");
2869  // Emit the record-form instruction.
2870  std::vector<SDValue> Ops;
2871  for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2872  Ops.push_back(OpToConvToRecForm.getOperand(i));
2873 
2874  WideOp =
2875  SDValue(CurDAG->getMachineNode(NewOpc, dl,
2876  OpToConvToRecForm.getValueType(),
2877  MVT::Glue, Ops), 0);
2878  } else {
2879  assert((NewOpc != -1 || !IsBitwiseNegate) &&
2880  "No record form available for AND8/OR8/XOR8?");
2881  WideOp =
2882  SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
2883  dl, MVT::i64, MVT::Glue, LHS, RHS),
2884  0);
2885  }
2886 
2887  // Select this node to a single bit from CR0 set by the record-form node
2888  // just created. For bitwise negation, use the EQ bit which is the equivalent
2889  // of negating the result (i.e. it is a bit set when the result of the
2890  // operation is zero).
2891  SDValue SRIdxVal =
2892  CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2893  SDValue CRBit =
2894  SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2895  MVT::i1, CR0Reg, SRIdxVal,
2896  WideOp.getValue(1)), 0);
2897  return CRBit.getNode();
2898 }
2899 
2900 // Lower a logical operation on i1 values into a GPR sequence if possible.
2901 // The result can be kept in a GPR if requested.
2902 // Three types of inputs can be handled:
2903 // - SETCC
2904 // - TRUNCATE
2905 // - Logical operation (AND/OR/XOR)
2906 // There is also a special case that is handled (namely a complement operation
2907 // achieved with xor %a, -1).
2908 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2909  assert(isLogicOp(LogicOp.getOpcode()) &&
2910  "Can only handle logic operations here.");
2911  assert(LogicOp.getValueType() == MVT::i1 &&
2912  "Can only handle logic operations on i1 values here.");
2913  SDLoc dl(LogicOp);
2914  SDValue LHS, RHS;
2915 
2916  // Special case: xor %a, -1
2917  bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2918 
2919  // Produces a GPR sequence for each operand of the binary logic operation.
2920  // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2921  // the value in a GPR and for logic operations, it will recursively produce
2922  // a GPR sequence for the operation.
2923  auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2924  unsigned OperandOpcode = Operand.getOpcode();
2925  if (OperandOpcode == ISD::SETCC)
2926  return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2927  else if (OperandOpcode == ISD::TRUNCATE) {
2928  SDValue InputOp = Operand.getOperand(0);
2929  EVT InVT = InputOp.getValueType();
2930  return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2931  PPC::RLDICL, dl, InVT, InputOp,
2932  S->getI64Imm(0, dl),
2933  S->getI64Imm(63, dl)), 0);
2934  } else if (isLogicOp(OperandOpcode))
2935  return computeLogicOpInGPR(Operand);
2936  return SDValue();
2937  };
2938  LHS = getLogicOperand(LogicOp.getOperand(0));
2939  RHS = getLogicOperand(LogicOp.getOperand(1));
2940 
2941  // If a GPR sequence can't be produced for the LHS we can't proceed.
2942  // Not producing a GPR sequence for the RHS is only a problem if this isn't
2943  // a bitwise negation operation.
2944  if (!LHS || (!RHS && !IsBitwiseNegation))
2945  return SDValue();
2946 
2947  NumLogicOpsOnComparison++;
2948 
2949  // We will use the inputs as 64-bit values.
2950  if (LHS.getValueType() == MVT::i32)
2951  LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2952  if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2953  RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2954 
2955  unsigned NewOpc;
2956  switch (LogicOp.getOpcode()) {
2957  default: llvm_unreachable("Unknown logic operation.");
2958  case ISD::AND: NewOpc = PPC::AND8; break;
2959  case ISD::OR: NewOpc = PPC::OR8; break;
2960  case ISD::XOR: NewOpc = PPC::XOR8; break;
2961  }
2962 
2963  if (IsBitwiseNegation) {
2964  RHS = S->getI64Imm(1, dl);
2965  NewOpc = PPC::XORI8;
2966  }
2967 
2968  return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2969 
2970 }
2971 
2972 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2973 /// Otherwise just reinterpret it as a 64-bit value.
2974 /// Useful when emitting comparison code for 32-bit values without using
2975 /// the compare instruction (which only considers the lower 32-bits).
2976 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2977  assert(Input.getValueType() == MVT::i32 &&
2978  "Can only sign-extend 32-bit values here.");
2979  unsigned Opc = Input.getOpcode();
2980 
2981  // The value was sign extended and then truncated to 32-bits. No need to
2982  // sign extend it again.
2983  if (Opc == ISD::TRUNCATE &&
2984  (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2985  Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2986  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2987 
2988  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2989  // The input is a sign-extending load. All ppc sign-extending loads
2990  // sign-extend to the full 64-bits.
2991  if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
2992  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2993 
2994  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2995  // We don't sign-extend constants.
2996  if (InputConst)
2997  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2998 
2999  SDLoc dl(Input);
3000  SignExtensionsAdded++;
3001  return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3002  MVT::i64, Input), 0);
3003 }
3004 
3005 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3006 /// Otherwise just reinterpret it as a 64-bit value.
3007 /// Useful when emitting comparison code for 32-bit values without using
3008 /// the compare instruction (which only considers the lower 32-bits).
3009 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3010  assert(Input.getValueType() == MVT::i32 &&
3011  "Can only zero-extend 32-bit values here.");
3012  unsigned Opc = Input.getOpcode();
3013 
3014  // The only condition under which we can omit the actual extend instruction:
3015  // - The value is a positive constant
3016  // - The value comes from a load that isn't a sign-extending load
3017  // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3018  bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3019  (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3020  Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3021  if (IsTruncateOfZExt)
3022  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3023 
3024  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3025  if (InputConst && InputConst->getSExtValue() >= 0)
3026  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3027 
3028  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3029  // The input is a load that doesn't sign-extend (it will be zero-extended).
3030  if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3031  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3032 
3033  // None of the above, need to zero-extend.
3034  SDLoc dl(Input);
3035  ZeroExtensionsAdded++;
3036  return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3037  S->getI64Imm(0, dl),
3038  S->getI64Imm(32, dl)), 0);
3039 }
3040 
3041 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3042 // course not actual zero/sign extensions that will generate machine code,
3043 // they're just a way to reinterpret a 32 bit value in a register as a
3044 // 64 bit value and vice-versa.
3045 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3046  ExtOrTruncConversion Conv) {
3047  SDLoc dl(NatWidthRes);
3048 
3049  // For reinterpreting 32-bit values as 64 bit values, we generate
3050  // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3051  if (Conv == ExtOrTruncConversion::Ext) {
3052  SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3053  SDValue SubRegIdx =
3054  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3055  return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3056  ImDef, NatWidthRes, SubRegIdx), 0);
3057  }
3058 
3059  assert(Conv == ExtOrTruncConversion::Trunc &&
3060  "Unknown convertion between 32 and 64 bit values.");
3061  // For reinterpreting 64-bit values as 32-bit values, we just need to
3062  // EXTRACT_SUBREG (i.e. extract the low word).
3063  SDValue SubRegIdx =
3064  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3065  return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3066  NatWidthRes, SubRegIdx), 0);
3067 }
3068 
3069 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3070 // Handle both zero-extensions and sign-extensions.
3071 SDValue
3072 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3073  ZeroCompare CmpTy) {
3074  EVT InVT = LHS.getValueType();
3075  bool Is32Bit = InVT == MVT::i32;
3076  SDValue ToExtend;
3077 
3078  // Produce the value that needs to be either zero or sign extended.
3079  switch (CmpTy) {
3080  case ZeroCompare::GEZExt:
3081  case ZeroCompare::GESExt:
3082  ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3083  dl, InVT, LHS, LHS), 0);
3084  break;
3085  case ZeroCompare::LEZExt:
3086  case ZeroCompare::LESExt: {
3087  if (Is32Bit) {
3088  // Upper 32 bits cannot be undefined for this sequence.
3089  LHS = signExtendInputIfNeeded(LHS);
3090  SDValue Neg =
3091  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3092  ToExtend =
3093  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3094  Neg, S->getI64Imm(1, dl),
3095  S->getI64Imm(63, dl)), 0);
3096  } else {
3097  SDValue Addi =
3098  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3099  S->getI64Imm(~0ULL, dl)), 0);
3100  ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3101  Addi, LHS), 0);
3102  }
3103  break;
3104  }
3105  }
3106 
3107  // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3108  if (!Is32Bit &&
3109  (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3110  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3111  ToExtend, S->getI64Imm(1, dl),
3112  S->getI64Imm(63, dl)), 0);
3113  if (!Is32Bit &&
3114  (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3115  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3116  S->getI64Imm(63, dl)), 0);
3117 
3118  assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3119  // For 32-bit sequences, the extensions differ between GE/LE cases.
3120  switch (CmpTy) {
3121  case ZeroCompare::GEZExt: {
3122  SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3123  S->getI32Imm(31, dl) };
3124  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3125  ShiftOps), 0);
3126  }
3127  case ZeroCompare::GESExt:
3128  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3129  S->getI32Imm(31, dl)), 0);
3130  case ZeroCompare::LEZExt:
3131  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3132  S->getI32Imm(1, dl)), 0);
3133  case ZeroCompare::LESExt:
3134  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3135  S->getI32Imm(-1, dl)), 0);
3136  }
3137 
3138  // The above case covers all the enumerators so it can't have a default clause
3139  // to avoid compiler warnings.
3140  llvm_unreachable("Unknown zero-comparison type.");
3141 }
3142 
3143 /// Produces a zero-extended result of comparing two 32-bit values according to
3144 /// the passed condition code.
3145 SDValue
3146 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3147  ISD::CondCode CC,
3148  int64_t RHSValue, SDLoc dl) {
3149  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3151  return SDValue();
3152  bool IsRHSZero = RHSValue == 0;
3153  bool IsRHSOne = RHSValue == 1;
3154  bool IsRHSNegOne = RHSValue == -1LL;
3155  switch (CC) {
3156  default: return SDValue();
3157  case ISD::SETEQ: {
3158  // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3159  // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3160  SDValue Xor = IsRHSZero ? LHS :
3161  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3162  SDValue Clz =
3163  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3164  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3165  S->getI32Imm(31, dl) };
3166  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3167  ShiftOps), 0);
3168  }
3169  case ISD::SETNE: {
3170  // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3171  // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3172  SDValue Xor = IsRHSZero ? LHS :
3173  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3174  SDValue Clz =
3175  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3176  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3177  S->getI32Imm(31, dl) };
3178  SDValue Shift =
3179  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3180  return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3181  S->getI32Imm(1, dl)), 0);
3182  }
3183  case ISD::SETGE: {
3184  // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3185  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3186  if(IsRHSZero)
3187  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3188 
3189  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3190  // by swapping inputs and falling through.
3191  std::swap(LHS, RHS);
3192  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3193  IsRHSZero = RHSConst && RHSConst->isNullValue();
3195  }
3196  case ISD::SETLE: {
3197  if (CmpInGPR == ICGPR_NonExtIn)
3198  return SDValue();
3199  // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3200  // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3201  if(IsRHSZero) {
3202  if (CmpInGPR == ICGPR_NonExtIn)
3203  return SDValue();
3204  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3205  }
3206 
3207  // The upper 32-bits of the register can't be undefined for this sequence.
3208  LHS = signExtendInputIfNeeded(LHS);
3209  RHS = signExtendInputIfNeeded(RHS);
3210  SDValue Sub =
3211  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3212  SDValue Shift =
3213  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3214  S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3215  0);
3216  return
3217  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3218  MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3219  }
3220  case ISD::SETGT: {
3221  // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3222  // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3223  // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3224  // Handle SETLT -1 (which is equivalent to SETGE 0).
3225  if (IsRHSNegOne)
3226  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3227 
3228  if (IsRHSZero) {
3229  if (CmpInGPR == ICGPR_NonExtIn)
3230  return SDValue();
3231  // The upper 32-bits of the register can't be undefined for this sequence.
3232  LHS = signExtendInputIfNeeded(LHS);
3233  RHS = signExtendInputIfNeeded(RHS);
3234  SDValue Neg =
3235  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3236  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3237  Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3238  }
3239  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3240  // (%b < %a) by swapping inputs and falling through.
3241  std::swap(LHS, RHS);
3242  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3243  IsRHSZero = RHSConst && RHSConst->isNullValue();
3244  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3246  }
3247  case ISD::SETLT: {
3248  // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3249  // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3250  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3251  // Handle SETLT 1 (which is equivalent to SETLE 0).
3252  if (IsRHSOne) {
3253  if (CmpInGPR == ICGPR_NonExtIn)
3254  return SDValue();
3255  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3256  }
3257 
3258  if (IsRHSZero) {
3259  SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3260  S->getI32Imm(31, dl) };
3261  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3262  ShiftOps), 0);
3263  }
3264 
3265  if (CmpInGPR == ICGPR_NonExtIn)
3266  return SDValue();
3267  // The upper 32-bits of the register can't be undefined for this sequence.
3268  LHS = signExtendInputIfNeeded(LHS);
3269  RHS = signExtendInputIfNeeded(RHS);
3270  SDValue SUBFNode =
3271  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3272  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3273  SUBFNode, S->getI64Imm(1, dl),
3274  S->getI64Imm(63, dl)), 0);
3275  }
3276  case ISD::SETUGE:
3277  // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3278  // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3279  std::swap(LHS, RHS);
3281  case ISD::SETULE: {
3282  if (CmpInGPR == ICGPR_NonExtIn)
3283  return SDValue();
3284  // The upper 32-bits of the register can't be undefined for this sequence.
3285  LHS = zeroExtendInputIfNeeded(LHS);
3286  RHS = zeroExtendInputIfNeeded(RHS);
3287  SDValue Subtract =
3288  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3289  SDValue SrdiNode =
3290  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3291  Subtract, S->getI64Imm(1, dl),
3292  S->getI64Imm(63, dl)), 0);
3293  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3294  S->getI32Imm(1, dl)), 0);
3295  }
3296  case ISD::SETUGT:
3297  // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3298  // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3299  std::swap(LHS, RHS);
3301  case ISD::SETULT: {
3302  if (CmpInGPR == ICGPR_NonExtIn)
3303  return SDValue();
3304  // The upper 32-bits of the register can't be undefined for this sequence.
3305  LHS = zeroExtendInputIfNeeded(LHS);
3306  RHS = zeroExtendInputIfNeeded(RHS);
3307  SDValue Subtract =
3308  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3309  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3310  Subtract, S->getI64Imm(1, dl),
3311  S->getI64Imm(63, dl)), 0);
3312  }
3313  }
3314 }
3315 
3316 /// Produces a sign-extended result of comparing two 32-bit values according to
3317 /// the passed condition code.
3318 SDValue
3319 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3320  ISD::CondCode CC,
3321  int64_t RHSValue, SDLoc dl) {
3322  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3324  return SDValue();
3325  bool IsRHSZero = RHSValue == 0;
3326  bool IsRHSOne = RHSValue == 1;
3327  bool IsRHSNegOne = RHSValue == -1LL;
3328 
3329  switch (CC) {
3330  default: return SDValue();
3331  case ISD::SETEQ: {
3332  // (sext (setcc %a, %b, seteq)) ->
3333  // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3334  // (sext (setcc %a, 0, seteq)) ->
3335  // (ashr (shl (ctlz %a), 58), 63)
3336  SDValue CountInput = IsRHSZero ? LHS :
3337  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3338  SDValue Cntlzw =
3339  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3340  SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3341  S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3342  SDValue Slwi =
3343  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3344  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3345  }
3346  case ISD::SETNE: {
3347  // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3348  // flip the bit, finally take 2's complement.
3349  // (sext (setcc %a, %b, setne)) ->
3350  // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3351  // Same as above, but the first xor is not needed.
3352  // (sext (setcc %a, 0, setne)) ->
3353  // (neg (xor (lshr (ctlz %a), 5), 1))
3354  SDValue Xor = IsRHSZero ? LHS :
3355  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3356  SDValue Clz =
3357  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3358  SDValue ShiftOps[] =
3359  { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3360  SDValue Shift =
3361  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3362  SDValue Xori =
3363  SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3364  S->getI32Imm(1, dl)), 0);
3365  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3366  }
3367  case ISD::SETGE: {
3368  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3369  // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3370  if (IsRHSZero)
3371  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3372 
3373  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3374  // by swapping inputs and falling through.
3375  std::swap(LHS, RHS);
3376  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3377  IsRHSZero = RHSConst && RHSConst->isNullValue();
3379  }
3380  case ISD::SETLE: {
3381  if (CmpInGPR == ICGPR_NonExtIn)
3382  return SDValue();
3383  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3384  // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3385  if (IsRHSZero)
3386  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3387 
3388  // The upper 32-bits of the register can't be undefined for this sequence.
3389  LHS = signExtendInputIfNeeded(LHS);
3390  RHS = signExtendInputIfNeeded(RHS);
3391  SDValue SUBFNode =
3392  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3393  LHS, RHS), 0);
3394  SDValue Srdi =
3395  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3396  SUBFNode, S->getI64Imm(1, dl),
3397  S->getI64Imm(63, dl)), 0);
3398  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3399  S->getI32Imm(-1, dl)), 0);
3400  }
3401  case ISD::SETGT: {
3402  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3403  // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3404  // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3405  if (IsRHSNegOne)
3406  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3407  if (IsRHSZero) {
3408  if (CmpInGPR == ICGPR_NonExtIn)
3409  return SDValue();
3410  // The upper 32-bits of the register can't be undefined for this sequence.
3411  LHS = signExtendInputIfNeeded(LHS);
3412  RHS = signExtendInputIfNeeded(RHS);
3413  SDValue Neg =
3414  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3415  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3416  S->getI64Imm(63, dl)), 0);
3417  }
3418  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3419  // (%b < %a) by swapping inputs and falling through.
3420  std::swap(LHS, RHS);
3421  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3422  IsRHSZero = RHSConst && RHSConst->isNullValue();
3423  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3425  }
3426  case ISD::SETLT: {
3427  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3428  // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3429  // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3430  if (IsRHSOne) {
3431  if (CmpInGPR == ICGPR_NonExtIn)
3432  return SDValue();
3433  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3434  }
3435  if (IsRHSZero)
3436  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3437  S->getI32Imm(31, dl)), 0);
3438 
3439  if (CmpInGPR == ICGPR_NonExtIn)
3440  return SDValue();
3441  // The upper 32-bits of the register can't be undefined for this sequence.
3442  LHS = signExtendInputIfNeeded(LHS);
3443  RHS = signExtendInputIfNeeded(RHS);
3444  SDValue SUBFNode =
3445  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3446  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3447  SUBFNode, S->getI64Imm(63, dl)), 0);
3448  }
3449  case ISD::SETUGE:
3450  // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3451  // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3452  std::swap(LHS, RHS);
3454  case ISD::SETULE: {
3455  if (CmpInGPR == ICGPR_NonExtIn)
3456  return SDValue();
3457  // The upper 32-bits of the register can't be undefined for this sequence.
3458  LHS = zeroExtendInputIfNeeded(LHS);
3459  RHS = zeroExtendInputIfNeeded(RHS);
3460  SDValue Subtract =
3461  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3462  SDValue Shift =
3463  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3464  S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3465  0);
3466  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3467  S->getI32Imm(-1, dl)), 0);
3468  }
3469  case ISD::SETUGT:
3470  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3471  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3472  std::swap(LHS, RHS);
3474  case ISD::SETULT: {
3475  if (CmpInGPR == ICGPR_NonExtIn)
3476  return SDValue();
3477  // The upper 32-bits of the register can't be undefined for this sequence.
3478  LHS = zeroExtendInputIfNeeded(LHS);
3479  RHS = zeroExtendInputIfNeeded(RHS);
3480  SDValue Subtract =
3481  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3482  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3483  Subtract, S->getI64Imm(63, dl)), 0);
3484  }
3485  }
3486 }
3487 
3488 /// Produces a zero-extended result of comparing two 64-bit values according to
3489 /// the passed condition code.
3490 SDValue
3491 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3492  ISD::CondCode CC,
3493  int64_t RHSValue, SDLoc dl) {
3494  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3496  return SDValue();
3497  bool IsRHSZero = RHSValue == 0;
3498  bool IsRHSOne = RHSValue == 1;
3499  bool IsRHSNegOne = RHSValue == -1LL;
3500  switch (CC) {
3501  default: return SDValue();
3502  case ISD::SETEQ: {
3503  // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3504  // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3505  SDValue Xor = IsRHSZero ? LHS :
3506  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3507  SDValue Clz =
3508  SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3509  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3510  S->getI64Imm(58, dl),
3511  S->getI64Imm(63, dl)), 0);
3512  }
3513  case ISD::SETNE: {
3514  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3515  // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3516  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3517  // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3518  SDValue Xor = IsRHSZero ? LHS :
3519  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3520  SDValue AC =
3521  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3522  Xor, S->getI32Imm(~0U, dl)), 0);
3523  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3524  Xor, AC.getValue(1)), 0);
3525  }
3526  case ISD::SETGE: {
3527  // {subc.reg, subc.CA} = (subcarry %a, %b)
3528  // (zext (setcc %a, %b, setge)) ->
3529  // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3530  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3531  if (IsRHSZero)
3532  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3533  std::swap(LHS, RHS);
3534  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3535  IsRHSZero = RHSConst && RHSConst->isNullValue();
3537  }
3538  case ISD::SETLE: {
3539  // {subc.reg, subc.CA} = (subcarry %b, %a)
3540  // (zext (setcc %a, %b, setge)) ->
3541  // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3542  // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3543  if (IsRHSZero)
3544  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3545  SDValue ShiftL =
3546  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3547  S->getI64Imm(1, dl),
3548  S->getI64Imm(63, dl)), 0);
3549  SDValue ShiftR =
3550  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3551  S->getI64Imm(63, dl)), 0);
3552  SDValue SubtractCarry =
3553  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3554  LHS, RHS), 1);
3555  return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3556  ShiftR, ShiftL, SubtractCarry), 0);
3557  }
3558  case ISD::SETGT: {
3559  // {subc.reg, subc.CA} = (subcarry %b, %a)
3560  // (zext (setcc %a, %b, setgt)) ->
3561  // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3562  // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3563  if (IsRHSNegOne)
3564  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3565  if (IsRHSZero) {
3566  SDValue Addi =
3567  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3568  S->getI64Imm(~0ULL, dl)), 0);
3569  SDValue Nor =
3570  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3571  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3572  S->getI64Imm(1, dl),
3573  S->getI64Imm(63, dl)), 0);
3574  }
3575  std::swap(LHS, RHS);
3576  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3577  IsRHSZero = RHSConst && RHSConst->isNullValue();
3578  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3580  }
3581  case ISD::SETLT: {
3582  // {subc.reg, subc.CA} = (subcarry %a, %b)
3583  // (zext (setcc %a, %b, setlt)) ->
3584  // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3585  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3586  if (IsRHSOne)
3587  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3588  if (IsRHSZero)
3589  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3590  S->getI64Imm(1, dl),
3591  S->getI64Imm(63, dl)), 0);
3592  SDValue SRADINode =
3593  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3594  LHS, S->getI64Imm(63, dl)), 0);
3595  SDValue SRDINode =
3596  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3597  RHS, S->getI64Imm(1, dl),
3598  S->getI64Imm(63, dl)), 0);
3599  SDValue SUBFC8Carry =
3600  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3601  RHS, LHS), 1);
3602  SDValue ADDE8Node =
3603  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3604  SRDINode, SRADINode, SUBFC8Carry), 0);
3605  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3606  ADDE8Node, S->getI64Imm(1, dl)), 0);
3607  }
3608  case ISD::SETUGE:
3609  // {subc.reg, subc.CA} = (subcarry %a, %b)
3610  // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3611  std::swap(LHS, RHS);
3613  case ISD::SETULE: {
3614  // {subc.reg, subc.CA} = (subcarry %b, %a)
3615  // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3616  SDValue SUBFC8Carry =
3617  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3618  LHS, RHS), 1);
3619  SDValue SUBFE8Node =
3620  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3621  LHS, LHS, SUBFC8Carry), 0);
3622  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3623  SUBFE8Node, S->getI64Imm(1, dl)), 0);
3624  }
3625  case ISD::SETUGT:
3626  // {subc.reg, subc.CA} = (subcarry %b, %a)
3627  // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3628  std::swap(LHS, RHS);
3630  case ISD::SETULT: {
3631  // {subc.reg, subc.CA} = (subcarry %a, %b)
3632  // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3633  SDValue SubtractCarry =
3634  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3635  RHS, LHS), 1);
3636  SDValue ExtSub =
3637  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3638  LHS, LHS, SubtractCarry), 0);
3639  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3640  ExtSub), 0);
3641  }
3642  }
3643 }
3644 
3645 /// Produces a sign-extended result of comparing two 64-bit values according to
3646 /// the passed condition code.
3647 SDValue
3648 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3649  ISD::CondCode CC,
3650  int64_t RHSValue, SDLoc dl) {
3651  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3653  return SDValue();
3654  bool IsRHSZero = RHSValue == 0;
3655  bool IsRHSOne = RHSValue == 1;
3656  bool IsRHSNegOne = RHSValue == -1LL;
3657  switch (CC) {
3658  default: return SDValue();
3659  case ISD::SETEQ: {
3660  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3661  // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3662  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3663  // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3664  SDValue AddInput = IsRHSZero ? LHS :
3665  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3666  SDValue Addic =
3667  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3668  AddInput, S->getI32Imm(~0U, dl)), 0);
3669  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3670  Addic, Addic.getValue(1)), 0);
3671  }
3672  case ISD::SETNE: {
3673  // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3674  // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3675  // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3676  // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3677  SDValue Xor = IsRHSZero ? LHS :
3678  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3679  SDValue SC =
3680  SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3681  Xor, S->getI32Imm(0, dl)), 0);
3682  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3683  SC, SC.getValue(1)), 0);
3684  }
3685  case ISD::SETGE: {
3686  // {subc.reg, subc.CA} = (subcarry %a, %b)
3687  // (zext (setcc %a, %b, setge)) ->
3688  // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3689  // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3690  if (IsRHSZero)
3691  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3692  std::swap(LHS, RHS);
3693  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3694  IsRHSZero = RHSConst && RHSConst->isNullValue();
3696  }
3697  case ISD::SETLE: {
3698  // {subc.reg, subc.CA} = (subcarry %b, %a)
3699  // (zext (setcc %a, %b, setge)) ->
3700  // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3701  // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3702  if (IsRHSZero)
3703  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3704  SDValue ShiftR =
3705  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3706  S->getI64Imm(63, dl)), 0);
3707  SDValue ShiftL =
3708  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3709  S->getI64Imm(1, dl),
3710  S->getI64Imm(63, dl)), 0);
3711  SDValue SubtractCarry =
3712  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3713  LHS, RHS), 1);
3714  SDValue Adde =
3715  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3716  ShiftR, ShiftL, SubtractCarry), 0);
3717  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3718  }
3719  case ISD::SETGT: {
3720  // {subc.reg, subc.CA} = (subcarry %b, %a)
3721  // (zext (setcc %a, %b, setgt)) ->
3722  // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3723  // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3724  if (IsRHSNegOne)
3725  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3726  if (IsRHSZero) {
3727  SDValue Add =
3728  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3729  S->getI64Imm(-1, dl)), 0);
3730  SDValue Nor =
3731  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3732  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3733  S->getI64Imm(63, dl)), 0);
3734  }
3735  std::swap(LHS, RHS);
3736  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3737  IsRHSZero = RHSConst && RHSConst->isNullValue();
3738  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3740  }
3741  case ISD::SETLT: {
3742  // {subc.reg, subc.CA} = (subcarry %a, %b)
3743  // (zext (setcc %a, %b, setlt)) ->
3744  // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3745  // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3746  if (IsRHSOne)
3747  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3748  if (IsRHSZero) {
3749  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3750  S->getI64Imm(63, dl)), 0);
3751  }
3752  SDValue SRADINode =
3753  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3754  LHS, S->getI64Imm(63, dl)), 0);
3755  SDValue SRDINode =
3756  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3757  RHS, S->getI64Imm(1, dl),
3758  S->getI64Imm(63, dl)), 0);
3759  SDValue SUBFC8Carry =
3760  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3761  RHS, LHS), 1);
3762  SDValue ADDE8Node =
3763  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3764  SRDINode, SRADINode, SUBFC8Carry), 0);
3765  SDValue XORI8Node =
3766  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3767  ADDE8Node, S->getI64Imm(1, dl)), 0);
3768  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3769  XORI8Node), 0);
3770  }
3771  case ISD::SETUGE:
3772  // {subc.reg, subc.CA} = (subcarry %a, %b)
3773  // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3774  std::swap(LHS, RHS);
3776  case ISD::SETULE: {
3777  // {subc.reg, subc.CA} = (subcarry %b, %a)
3778  // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3779  SDValue SubtractCarry =
3780  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3781  LHS, RHS), 1);
3782  SDValue ExtSub =
3783  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3784  LHS, SubtractCarry), 0);
3785  return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3786  ExtSub, ExtSub), 0);
3787  }
3788  case ISD::SETUGT:
3789  // {subc.reg, subc.CA} = (subcarry %b, %a)
3790  // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3791  std::swap(LHS, RHS);
3793  case ISD::SETULT: {
3794  // {subc.reg, subc.CA} = (subcarry %a, %b)
3795  // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3796  SDValue SubCarry =
3797  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3798  RHS, LHS), 1);
3799  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3800  LHS, LHS, SubCarry), 0);
3801  }
3802  }
3803 }
3804 
3805 /// Do all uses of this SDValue need the result in a GPR?
3806 /// This is meant to be used on values that have type i1 since
3807 /// it is somewhat meaningless to ask if values of other types
3808 /// should be kept in GPR's.
3809 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3810  assert(Compare.getOpcode() == ISD::SETCC &&
3811  "An ISD::SETCC node required here.");
3812 
3813  // For values that have a single use, the caller should obviously already have
3814  // checked if that use is an extending use. We check the other uses here.
3815  if (Compare.hasOneUse())
3816  return true;
3817  // We want the value in a GPR if it is being extended, used for a select, or
3818  // used in logical operations.
3819  for (auto CompareUse : Compare.getNode()->uses())
3820  if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3821  CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3822  CompareUse->getOpcode() != ISD::SELECT &&
3823  !isLogicOp(CompareUse->getOpcode())) {
3824  OmittedForNonExtendUses++;
3825  return false;
3826  }
3827  return true;
3828 }
3829 
3830 /// Returns an equivalent of a SETCC node but with the result the same width as
3831 /// the inputs. This can also be used for SELECT_CC if either the true or false
3832 /// values is a power of two while the other is zero.
3833 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3834  SetccInGPROpts ConvOpts) {
3835  assert((Compare.getOpcode() == ISD::SETCC ||
3836  Compare.getOpcode() == ISD::SELECT_CC) &&
3837  "An ISD::SETCC node required here.");
3838 
3839  // Don't convert this comparison to a GPR sequence because there are uses
3840  // of the i1 result (i.e. uses that require the result in the CR).
3841  if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3842  return SDValue();
3843 
3844  SDValue LHS = Compare.getOperand(0);
3845  SDValue RHS = Compare.getOperand(1);
3846 
3847  // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3848  int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3849  ISD::CondCode CC =
3850  cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3851  EVT InputVT = LHS.getValueType();
3852  if (InputVT != MVT::i32 && InputVT != MVT::i64)
3853  return SDValue();
3854 
3855  if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3856  ConvOpts == SetccInGPROpts::SExtInvert)
3857  CC = ISD::getSetCCInverse(CC, InputVT);
3858 
3859  bool Inputs32Bit = InputVT == MVT::i32;
3860 
3861  SDLoc dl(Compare);
3862  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3863  int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
3864  bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3865  ConvOpts == SetccInGPROpts::SExtInvert;
3866 
3867  if (IsSext && Inputs32Bit)
3868  return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3869  else if (Inputs32Bit)
3870  return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3871  else if (IsSext)
3872  return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3873  return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3874 }
3875 
3876 } // end anonymous namespace
3877 
3878 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3879  if (N->getValueType(0) != MVT::i32 &&
3880  N->getValueType(0) != MVT::i64)
3881  return false;
3882 
3883  // This optimization will emit code that assumes 64-bit registers
3884  // so we don't want to run it in 32-bit mode. Also don't run it
3885  // on functions that are not to be optimized.
3886  if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3887  return false;
3888 
3889  // For POWER10, it is more profitable to use the set boolean extension
3890  // instructions rather than the integer compare elimination codegen.
3891  // Users can override this via the command line option, `--ppc-gpr-icmps`.
3892  if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
3893  return false;
3894 
3895  switch (N->getOpcode()) {
3896  default: break;
3897  case ISD::ZERO_EXTEND:
3898  case ISD::SIGN_EXTEND:
3899  case ISD::AND:
3900  case ISD::OR:
3901  case ISD::XOR: {
3902  IntegerCompareEliminator ICmpElim(CurDAG, this);
3903  if (SDNode *New = ICmpElim.Select(N)) {
3904  ReplaceNode(N, New);
3905  return true;
3906  }
3907  }
3908  }
3909  return false;
3910 }
3911 
3912 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3913  if (N->getValueType(0) != MVT::i32 &&
3914  N->getValueType(0) != MVT::i64)
3915  return false;
3916 
3917  if (!UseBitPermRewriter)
3918  return false;
3919 
3920  switch (N->getOpcode()) {
3921  default: break;
3922  case ISD::ROTL:
3923  case ISD::SHL:
3924  case ISD::SRL:
3925  case ISD::AND:
3926  case ISD::OR: {
3927  BitPermutationSelector BPS(CurDAG);
3928  if (SDNode *New = BPS.Select(N)) {
3929  ReplaceNode(N, New);
3930  return true;
3931  }
3932  return false;
3933  }
3934  }
3935 
3936  return false;
3937 }
3938 
3939 /// SelectCC - Select a comparison of the specified values with the specified
3940 /// condition code, returning the CR# of the expression.
3941 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3942  const SDLoc &dl, SDValue Chain) {
3943  // Always select the LHS.
3944  unsigned Opc;
3945 
3946  if (LHS.getValueType() == MVT::i32) {
3947  unsigned Imm;
3948  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3949  if (isInt32Immediate(RHS, Imm)) {
3950  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3951  if (isUInt<16>(Imm))
3952  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3953  getI32Imm(Imm & 0xFFFF, dl)),
3954  0);
3955  // If this is a 16-bit signed immediate, fold it.
3956  if (isInt<16>((int)Imm))
3957  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3958  getI32Imm(Imm & 0xFFFF, dl)),
3959  0);
3960 
3961  // For non-equality comparisons, the default code would materialize the
3962  // constant, then compare against it, like this:
3963  // lis r2, 4660
3964  // ori r2, r2, 22136
3965  // cmpw cr0, r3, r2
3966  // Since we are just comparing for equality, we can emit this instead:
3967  // xoris r0,r3,0x1234
3968  // cmplwi cr0,r0,0x5678
3969  // beq cr0,L6
3970  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3971  getI32Imm(Imm >> 16, dl)), 0);
3972  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3973  getI32Imm(Imm & 0xFFFF, dl)), 0);
3974  }
3975  Opc = PPC::CMPLW;
3976  } else if (ISD::isUnsignedIntSetCC(CC)) {
3977  if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
3978  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3979  getI32Imm(Imm & 0xFFFF, dl)), 0);
3980  Opc = PPC::CMPLW;
3981  } else {
3982  int16_t SImm;
3983  if (isIntS16Immediate(RHS, SImm))
3984  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3985  getI32Imm((int)SImm & 0xFFFF,
3986  dl)),
3987  0);
3988  Opc = PPC::CMPW;
3989  }
3990  } else if (LHS.getValueType() == MVT::i64) {
3991  uint64_t Imm;
3992  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3993  if (isInt64Immediate(RHS.getNode(), Imm)) {
3994  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3995  if (isUInt<16>(Imm))
3996  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3997  getI32Imm(Imm & 0xFFFF, dl)),
3998  0);
3999  // If this is a 16-bit signed immediate, fold it.
4000  if (isInt<16>(Imm))
4001  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4002  getI32Imm(Imm & 0xFFFF, dl)),
4003  0);
4004 
4005  // For non-equality comparisons, the default code would materialize the
4006  // constant, then compare against it, like this:
4007  // lis r2, 4660
4008  // ori r2, r2, 22136
4009  // cmpd cr0, r3, r2
4010  // Since we are just comparing for equality, we can emit this instead:
4011  // xoris r0,r3,0x1234
4012  // cmpldi cr0,r0,0x5678
4013  // beq cr0,L6
4014  if (isUInt<32>(Imm)) {
4015  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4016  getI64Imm(Imm >> 16, dl)), 0);
4017  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4018  getI64Imm(Imm & 0xFFFF, dl)),
4019  0);
4020  }
4021  }
4022  Opc = PPC::CMPLD;
4023  } else if (ISD::isUnsignedIntSetCC(CC)) {
4024  if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4025  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4026  getI64Imm(Imm & 0xFFFF, dl)), 0);
4027  Opc = PPC::CMPLD;
4028  } else {
4029  int16_t SImm;
4030  if (isIntS16Immediate(RHS, SImm))
4031  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4032  getI64Imm(SImm & 0xFFFF, dl)),
4033  0);
4034  Opc = PPC::CMPD;
4035  }
4036  } else if (LHS.getValueType() == MVT::f32) {
4037  if (Subtarget->hasSPE()) {
4038  switch (CC) {
4039  default:
4040  case ISD::SETEQ:
4041  case ISD::SETNE:
4042  Opc = PPC::EFSCMPEQ;
4043  break;
4044  case ISD::SETLT:
4045  case ISD::SETGE:
4046  case ISD::SETOLT:
4047  case ISD::SETOGE:
4048  case ISD::SETULT:
4049  case ISD::SETUGE:
4050  Opc = PPC::EFSCMPLT;
4051  break;
4052  case ISD::SETGT:
4053  case ISD::SETLE:
4054  case ISD::SETOGT:
4055  case ISD::SETOLE:
4056  case ISD::SETUGT:
4057  case ISD::SETULE:
4058  Opc = PPC::EFSCMPGT;
4059  break;
4060  }
4061  } else
4062  Opc = PPC::FCMPUS;
4063  } else if (LHS.getValueType() == MVT::f64) {
4064  if (Subtarget->hasSPE()) {
4065  switch (CC) {
4066  default:
4067  case ISD::SETEQ:
4068  case ISD::SETNE:
4069  Opc = PPC::EFDCMPEQ;
4070  break;
4071  case ISD::SETLT:
4072  case ISD::SETGE:
4073  case ISD::SETOLT:
4074  case ISD::SETOGE:
4075  case ISD::SETULT:
4076  case ISD::SETUGE:
4077  Opc = PPC::EFDCMPLT;
4078  break;
4079  case ISD::SETGT:
4080  case ISD::SETLE:
4081  case ISD::SETOGT:
4082  case ISD::SETOLE:
4083  case ISD::SETUGT:
4084  case ISD::SETULE:
4085  Opc = PPC::EFDCMPGT;
4086  break;
4087  }
4088  } else
4089  Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4090  } else {
4091  assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4092  assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4093  Opc = PPC::XSCMPUQP;
4094  }
4095  if (Chain)
4096  return SDValue(
4097  CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4098  0);
4099  else
4100  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4101 }
4102 
4104  const PPCSubtarget *Subtarget) {
4105  // For SPE instructions, the result is in GT bit of the CR
4106  bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4107 
4108  switch (CC) {
4109  case ISD::SETUEQ:
4110  case ISD::SETONE:
4111  case ISD::SETOLE:
4112  case ISD::SETOGE:
4113  llvm_unreachable("Should be lowered by legalize!");
4114  default: llvm_unreachable("Unknown condition!");
4115  case ISD::SETOEQ:
4116  case ISD::SETEQ:
4117  return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4118  case ISD::SETUNE:
4119  case ISD::SETNE:
4120  return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4121  case ISD::SETOLT:
4122  case ISD::SETLT:
4123  return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4124  case ISD::SETULE:
4125  case ISD::SETLE:
4126  return PPC::PRED_LE;
4127  case ISD::SETOGT:
4128  case ISD::SETGT:
4129  return PPC::PRED_GT;
4130  case ISD::SETUGE:
4131  case ISD::SETGE:
4132  return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4133  case ISD::SETO: return PPC::PRED_NU;
4134  case ISD::SETUO: return PPC::PRED_UN;
4135  // These two are invalid for floating point. Assume we have int.
4136  case ISD::SETULT: return PPC::PRED_LT;
4137  case ISD::SETUGT: return PPC::PRED_GT;
4138  }
4139 }
4140 
4141 /// getCRIdxForSetCC - Return the index of the condition register field
4142 /// associated with the SetCC condition, and whether or not the field is
4143 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
4144 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4145  Invert = false;
4146  switch (CC) {
4147  default: llvm_unreachable("Unknown condition!");
4148  case ISD::SETOLT:
4149  case ISD::SETLT: return 0; // Bit #0 = SETOLT
4150  case ISD::SETOGT:
4151  case ISD::SETGT: return 1; // Bit #1 = SETOGT
4152  case ISD::SETOEQ:
4153  case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4154  case ISD::SETUO: return 3; // Bit #3 = SETUO
4155  case ISD::SETUGE:
4156  case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4157  case ISD::SETULE:
4158  case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4159  case ISD::SETUNE:
4160  case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4161  case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4162  case ISD::SETUEQ:
4163  case ISD::SETOGE:
4164  case ISD::SETOLE:
4165  case ISD::SETONE:
4166  llvm_unreachable("Invalid branch code: should be expanded by legalize");
4167  // These are invalid for floating point. Assume integer.
4168  case ISD::SETULT: return 0;
4169  case ISD::SETUGT: return 1;
4170  }
4171 }
4172 
4173 // getVCmpInst: return the vector compare instruction for the specified
4174 // vector type and condition code. Since this is for altivec specific code,
4175 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4176 // and v4f32).
4177 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4178  bool HasVSX, bool &Swap, bool &Negate) {
4179  Swap = false;
4180  Negate = false;
4181 
4182  if (VecVT.isFloatingPoint()) {
4183  /* Handle some cases by swapping input operands. */
4184  switch (CC) {
4185  case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4186  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4187  case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4188  case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4189  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4190  case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4191  default: break;
4192  }
4193  /* Handle some cases by negating the result. */
4194  switch (CC) {
4195  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4196  case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4197  case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4198  case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4199  default: break;
4200  }
4201  /* We have instructions implementing the remaining cases. */
4202  switch (CC) {
4203  case ISD::SETEQ:
4204  case ISD::SETOEQ:
4205  if (VecVT == MVT::v4f32)
4206  return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4207  else if (VecVT == MVT::v2f64)
4208  return PPC::XVCMPEQDP;
4209  break;
4210  case ISD::SETGT:
4211  case ISD::SETOGT:
4212  if (VecVT == MVT::v4f32)
4213  return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4214  else if (VecVT == MVT::v2f64)
4215  return PPC::XVCMPGTDP;
4216  break;
4217  case ISD::SETGE:
4218  case ISD::SETOGE:
4219  if (VecVT == MVT::v4f32)
4220  return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4221  else if (VecVT == MVT::v2f64)
4222  return PPC::XVCMPGEDP;
4223  break;
4224  default:
4225  break;
4226  }
4227  llvm_unreachable("Invalid floating-point vector compare condition");
4228  } else {
4229  /* Handle some cases by swapping input operands. */
4230  switch (CC) {
4231  case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4232  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4233  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4234  case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4235  default: break;
4236  }
4237  /* Handle some cases by negating the result. */
4238  switch (CC) {
4239  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4240  case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4241  case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4242  case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4243  default: break;
4244  }
4245  /* We have instructions implementing the remaining cases. */
4246  switch (CC) {
4247  case ISD::SETEQ:
4248  case ISD::SETUEQ:
4249  if (VecVT == MVT::v16i8)
4250  return PPC::VCMPEQUB;
4251  else if (VecVT == MVT::v8i16)
4252  return PPC::VCMPEQUH;
4253  else if (VecVT == MVT::v4i32)
4254  return PPC::VCMPEQUW;
4255  else if (VecVT == MVT::v2i64)
4256  return PPC::VCMPEQUD;
4257  else if (VecVT == MVT::v1i128)
4258  return PPC::VCMPEQUQ;
4259  break;
4260  case ISD::SETGT:
4261  if (VecVT == MVT::v16i8)
4262  return PPC::VCMPGTSB;
4263  else if (VecVT == MVT::v8i16)
4264  return PPC::VCMPGTSH;
4265  else if (VecVT == MVT::v4i32)
4266  return PPC::VCMPGTSW;
4267  else if (VecVT == MVT::v2i64)
4268  return PPC::VCMPGTSD;
4269  else if (VecVT == MVT::v1i128)
4270  return PPC::VCMPGTSQ;
4271  break;
4272  case ISD::SETUGT:
4273  if (VecVT == MVT::v16i8)
4274  return PPC::VCMPGTUB;
4275  else if (VecVT == MVT::v8i16)
4276  return PPC::VCMPGTUH;
4277  else if (VecVT == MVT::v4i32)
4278  return PPC::VCMPGTUW;
4279  else if (VecVT == MVT::v2i64)
4280  return PPC::VCMPGTUD;
4281  else if (VecVT == MVT::v1i128)
4282  return PPC::VCMPGTUQ;
4283  break;
4284  default:
4285  break;
4286  }
4287  llvm_unreachable("Invalid integer vector compare condition");
4288  }
4289 }
4290 
4291 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4292  SDLoc dl(N);
4293  unsigned Imm;
4294  bool IsStrict = N->isStrictFPOpcode();
4295  ISD::CondCode CC =
4296  cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4297  EVT PtrVT =
4298  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4299  bool isPPC64 = (PtrVT == MVT::i64);
4300  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4301 
4302  SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4303  SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4304 
4305  if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4306  // We can codegen setcc op, imm very efficiently compared to a brcond.
4307  // Check for those cases here.
4308  // setcc op, 0
4309  if (Imm == 0) {
4310  SDValue Op = LHS;
4311  switch (CC) {
4312  default: break;
4313  case ISD::SETEQ: {
4314  Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4315  SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4316  getI32Imm(31, dl) };
4317  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4318  return true;
4319  }
4320  case ISD::SETNE: {
4321  if (isPPC64) break;
4322  SDValue AD =
4323  SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4324  Op, getI32Imm(~0U, dl)), 0);
4325  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4326  return true;
4327  }
4328  case ISD::SETLT: {
4329  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4330  getI32Imm(31, dl) };
4331  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4332  return true;
4333  }
4334  case ISD::SETGT: {
4335  SDValue T =
4336  SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4337  T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4338  SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4339  getI32Imm(31, dl) };
4340  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4341  return true;
4342  }
4343  }
4344  } else if (Imm == ~0U) { // setcc op, -1
4345  SDValue Op = LHS;
4346  switch (CC) {
4347  default: break;
4348  case ISD::SETEQ:
4349  if (isPPC64) break;
4350  Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4351  Op, getI32Imm(1, dl)), 0);
4352  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4353  SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4354  MVT::i32,
4355  getI32Imm(0, dl)),
4356  0), Op.getValue(1));
4357  return true;
4358  case ISD::SETNE: {
4359  if (isPPC64) break;
4360  Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4361  SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4362  Op, getI32Imm(~0U, dl));
4363  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4364  SDValue(AD, 1));
4365  return true;
4366  }
4367  case ISD::SETLT: {
4368  SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4369  getI32Imm(1, dl)), 0);
4370  SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4371  Op), 0);
4372  SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4373  getI32Imm(31, dl) };
4374  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4375  return true;
4376  }
4377  case ISD::SETGT: {
4378  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4379  getI32Imm(31, dl) };
4380  Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4381  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4382  return true;
4383  }
4384  }
4385  }
4386  }
4387 
4388  // Altivec Vector compare instructions do not set any CR register by default and
4389  // vector compare operations return the same type as the operands.
4390  if (!IsStrict && LHS.getValueType().isVector()) {
4391  if (Subtarget->hasSPE())
4392  return false;
4393 
4394  EVT VecVT = LHS.getValueType();
4395  bool Swap, Negate;
4396  unsigned int VCmpInst =
4397  getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4398  if (Swap)
4399  std::swap(LHS, RHS);
4400 
4401  EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4402  if (Negate) {
4403  SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4404  CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4405  ResVT, VCmp, VCmp);
4406  return true;
4407  }
4408 
4409  CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4410  return true;
4411  }
4412 
4413  if (Subtarget->useCRBits())
4414  return false;
4415 
4416  bool Inv;
4417  unsigned Idx = getCRIdxForSetCC(CC, Inv);
4418  SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4419  if (IsStrict)
4420  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4421  SDValue IntCR;
4422 
4423  // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4424  // The correct compare instruction is already set by SelectCC()
4425  if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4426  Idx = 1;
4427  }
4428 
4429  // Force the ccreg into CR7.
4430  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4431 
4432  SDValue InFlag(nullptr, 0); // Null incoming flag value.
4433  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4434  InFlag).getValue(1);
4435 
4436  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4437  CCReg), 0);
4438 
4439  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4440  getI32Imm(31, dl), getI32Imm(31, dl) };
4441  if (!Inv) {
4442  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4443  return true;
4444  }
4445 
4446  // Get the specified bit.
4447  SDValue Tmp =
4448  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4449  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4450  return true;
4451 }
4452 
4453 /// Does this node represent a load/store node whose address can be represented
4454 /// with a register plus an immediate that's a multiple of \p Val:
4455 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4456  LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4457  StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4458  SDValue AddrOp;
4459  if (LDN)
4460  AddrOp = LDN->getOperand(1);
4461  else if (STN)
4462  AddrOp = STN->getOperand(2);
4463 
4464  // If the address points a frame object or a frame object with an offset,
4465  // we need to check the object alignment.
4466  short Imm = 0;
4467  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4468  AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4469  AddrOp)) {
4470  // If op0 is a frame index that is under aligned, we can't do it either,
4471  // because it is translated to r31 or r1 + slot + offset. We won't know the
4472  // slot number until the stack frame is finalized.
4473  const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4474  unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4475  if ((SlotAlign % Val) != 0)
4476  return false;
4477 
4478  // If we have an offset, we need further check on the offset.
4479  if (AddrOp.getOpcode() != ISD::ADD)
4480  return true;
4481  }
4482 
4483  if (AddrOp.getOpcode() == ISD::ADD)
4484  return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4485 
4486  // If the address comes from the outside, the offset will be zero.
4487  return AddrOp.getOpcode() == ISD::CopyFromReg;
4488 }
4489 
4490 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4491  // Transfer memoperands.
4492  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4493  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4494 }
4495 
4496 static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4497  bool &NeedSwapOps, bool &IsUnCmp) {
4498 
4499  assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4500 
4501  SDValue LHS = N->getOperand(0);
4502  SDValue RHS = N->getOperand(1);
4503  SDValue TrueRes = N->getOperand(2);
4504  SDValue FalseRes = N->getOperand(3);
4505  ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4506  if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4507  N->getSimpleValueType(0) != MVT::i32))
4508  return false;
4509 
4510  // We are looking for any of:
4511  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4512  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4513  // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4514  // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4515  int64_t TrueResVal = TrueConst->getSExtValue();
4516  if ((TrueResVal < -1 || TrueResVal > 1) ||
4517  (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4518  (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4519  (TrueResVal == 0 &&
4520  (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4521  return false;
4522 
4523  SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4524  ? FalseRes
4525  : FalseRes.getOperand(0);
4526  bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4527  if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4528  SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4529  return false;
4530 
4531  // Without this setb optimization, the outer SELECT_CC will be manually
4532  // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4533  // transforms pseudo instruction to isel instruction. When there are more than
4534  // one use for result like zext/sext, with current optimization we only see
4535  // isel is replaced by setb but can't see any significant gain. Since
4536  // setb has longer latency than original isel, we should avoid this. Another
4537  // point is that setb requires comparison always kept, it can break the
4538  // opportunity to get the comparison away if we have in future.
4539  if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4540  return false;
4541 
4542  SDValue InnerLHS = SetOrSelCC.getOperand(0);
4543  SDValue InnerRHS = SetOrSelCC.getOperand(1);
4544  ISD::CondCode InnerCC =
4545  cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4546  // If the inner comparison is a select_cc, make sure the true/false values are
4547  // 1/-1 and canonicalize it if needed.
4548  if (InnerIsSel) {
4549  ConstantSDNode *SelCCTrueConst =
4550  dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4551  ConstantSDNode *SelCCFalseConst =
4552  dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4553  if (!SelCCTrueConst || !SelCCFalseConst)
4554  return false;
4555  int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4556  int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4557  // The values must be -1/1 (requiring a swap) or 1/-1.
4558  if (SelCCTVal == -1 && SelCCFVal == 1) {
4559  std::swap(InnerLHS, InnerRHS);
4560  } else if (SelCCTVal != 1 || SelCCFVal != -1)
4561  return false;
4562  }
4563 
4564  // Canonicalize unsigned case
4565  if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4566  IsUnCmp = true;
4567  InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4568  }
4569 
4570  bool InnerSwapped = false;
4571  if (LHS == InnerRHS && RHS == InnerLHS)
4572  InnerSwapped = true;
4573  else if (LHS != InnerLHS || RHS != InnerRHS)
4574  return false;
4575 
4576  switch (CC) {
4577  // (select_cc lhs, rhs, 0, \
4578  // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4579  case ISD::SETEQ:
4580  if (!InnerIsSel)
4581  return false;
4582  if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4583  return false;
4584  NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4585  break;
4586 
4587  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4588  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4589  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4590  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4591  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4592  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4593  case ISD::SETULT:
4594  if (!IsUnCmp && InnerCC != ISD::SETNE)
4595  return false;
4596  IsUnCmp = true;
4598  case ISD::SETLT:
4599  if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4600  (InnerCC == ISD::SETLT && InnerSwapped))
4601  NeedSwapOps = (TrueResVal == 1);
4602  else
4603  return false;
4604  break;
4605 
4606  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4607  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4608  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4609  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4610  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4611  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4612  case ISD::SETUGT:
4613  if (!IsUnCmp && InnerCC != ISD::SETNE)
4614  return false;
4615  IsUnCmp = true;
4617  case ISD::SETGT:
4618  if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4619  (InnerCC == ISD::SETGT && InnerSwapped))
4620  NeedSwapOps = (TrueResVal == -1);
4621  else
4622  return false;
4623  break;
4624 
4625  default:
4626  return false;
4627  }
4628 
4629  LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4630  LLVM_DEBUG(N->dump());
4631 
4632  return true;
4633 }
4634 
4635 // Return true if it's a software square-root/divide operand.
4636 static bool isSWTestOp(SDValue N) {
4637  if (N.getOpcode() == PPCISD::FTSQRT)
4638  return true;
4639  if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
4640  return false;
4641  switch (N.getConstantOperandVal(0)) {
4642  case Intrinsic::ppc_vsx_xvtdivdp:
4643  case Intrinsic::ppc_vsx_xvtdivsp:
4644  case Intrinsic::ppc_vsx_xvtsqrtdp:
4645  case Intrinsic::ppc_vsx_xvtsqrtsp:
4646  return true;
4647  }
4648  return false;
4649 }
4650 
4651 bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4652  assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4653  // We are looking for following patterns, where `truncate to i1` actually has
4654  // the same semantic with `and 1`.
4655  // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4656  // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4657  // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4658  // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4659  // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4660  // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4661  // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4662  // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4663  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4664  if (CC != ISD::SETEQ && CC != ISD::SETNE)
4665  return false;
4666 
4667  SDValue CmpRHS = N->getOperand(3);
4668  if (!isa<ConstantSDNode>(CmpRHS) ||
4669  cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
4670  return false;
4671 
4672  SDValue CmpLHS = N->getOperand(2);
4673  if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4674  return false;
4675 
4676  unsigned PCC = 0;
4677  bool IsCCNE = CC == ISD::SETNE;
4678  if (CmpLHS.getOpcode() == ISD::AND &&
4679  isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4680  switch (CmpLHS.getConstantOperandVal(1)) {
4681  case 1:
4682  PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4683  break;
4684  case 2:
4685  PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4686  break;
4687  case 4:
4688  PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4689  break;
4690  case 8:
4691  PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4692  break;
4693  default:
4694  return false;
4695  }
4696  else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4697  CmpLHS.getValueType() == MVT::i1)
4698  PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4699 
4700  if (PCC) {
4701  SDLoc dl(N);
4702  SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4703  N->getOperand(0)};
4704  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4705  return true;
4706  }
4707  return false;
4708 }
4709 
4710 bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4711  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4712  unsigned Imm;
4713  if (!isInt32Immediate(N->getOperand(1), Imm))
4714  return false;
4715 
4716  SDLoc dl(N);
4717  SDValue Val = N->getOperand(0);
4718  unsigned SH, MB, ME;
4719  // If this is an and of a value rotated between 0 and 31 bits and then and'd
4720  // with a mask, emit rlwinm
4721  if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4722  Val = Val.getOperand(0);
4723  SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4724  getI32Imm(ME, dl)};
4725  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4726  return true;
4727  }
4728 
4729  // If this is just a masked value where the input is not handled, and
4730  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4731  if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4732  SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4733  getI32Imm(ME, dl)};
4734  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4735  return true;
4736  }
4737 
4738  // AND X, 0 -> 0, not "rlwinm 32".
4739  if (Imm == 0) {
4740  ReplaceUses(SDValue(N, 0), N->getOperand(1));
4741  return true;
4742  }
4743 
4744  return false;
4745 }
4746 
4747 bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4748  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4749  uint64_t Imm64;
4750  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4751  return false;
4752 
4753  unsigned MB, ME;
4754  if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
4755  // MB ME
4756  // +----------------------+
4757  // |xxxxxxxxxxx00011111000|
4758  // +----------------------+
4759  // 0 32 64
4760  // We can only do it if the MB is larger than 32 and MB <= ME
4761  // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
4762  // we didn't rotate it.
4763  SDLoc dl(N);
4764  SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
4765  getI64Imm(ME - 32, dl)};
4766  CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
4767  return true;
4768  }
4769 
4770  return false;
4771 }
4772 
4773 bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
4774  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4775  uint64_t Imm64;
4776  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4777  return false;
4778 
4779  // Do nothing if it is 16-bit imm as the pattern in the .td file handle
4780  // it well with "andi.".
4781  if (isUInt<16>(Imm64))
4782  return false;
4783 
4784  SDLoc Loc(N);
4785  SDValue Val = N->getOperand(0);
4786 
4787  // Optimized with two rldicl's as follows:
4788  // Add missing bits on left to the mask and check that the mask is a
4789  // wrapped run of ones, i.e.
4790  // Change pattern |0001111100000011111111|
4791  // to |1111111100000011111111|.
4792  unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
4793  if (NumOfLeadingZeros != 0)
4794  Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
4795 
4796  unsigned MB, ME;
4797  if (!isRunOfOnes64(Imm64, MB, ME))
4798  return false;
4799 
4800  // ME MB MB-ME+63
4801  // +----------------------+ +----------------------+
4802  // |1111111100000011111111| -> |0000001111111111111111|
4803  // +----------------------+ +----------------------+
4804  // 0 63 0 63
4805  // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
4806  unsigned OnesOnLeft = ME + 1;
4807  unsigned ZerosInBetween = (MB - ME + 63) & 63;
4808  // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
4809  // on the left the bits that are already zeros in the mask.
4810  Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
4811  getI64Imm(OnesOnLeft, Loc),
4812  getI64Imm(ZerosInBetween, Loc)),
4813  0);
4814  // MB-ME+63 ME MB
4815  // +----------------------+ +----------------------+
4816  // |0000001111111111111111| -> |0001111100000011111111|
4817  // +----------------------+ +----------------------+
4818  // 0 63 0 63
4819  // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
4820  // left the number of ones we previously added.
4821  SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
4822  getI64Imm(NumOfLeadingZeros, Loc)};
4823  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4824  return true;
4825 }
4826 
4827 bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
4828  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4829  unsigned Imm;
4830  if (!isInt32Immediate(N->getOperand(1), Imm))
4831  return false;
4832 
4833  SDValue Val = N->getOperand(0);
4834  unsigned Imm2;
4835  // ISD::OR doesn't get all the bitfield insertion fun.
4836  // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4837  // bitfield insert.
4838  if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
4839  return false;
4840 
4841  // The idea here is to check whether this is equivalent to:
4842  // (c1 & m) | (x & ~m)
4843  // where m is a run-of-ones mask. The logic here is that, for each bit in
4844  // c1 and c2:
4845  // - if both are 1, then the output will be 1.
4846  // - if both are 0, then the output will be 0.
4847  // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4848  // come from x.
4849  // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4850  // be 0.
4851  // If that last condition is never the case, then we can form m from the
4852  // bits that are the same between c1 and c2.
4853  unsigned MB, ME;
4854  if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
4855  SDLoc dl(N);
4856  SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
4857  getI32Imm(MB, dl), getI32Imm(ME, dl)};
4858  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4859  return true;
4860  }
4861 
4862  return false;
4863 }
4864 
4865 bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
4866  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4867  uint64_t Imm64;
4868  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
4869  return false;
4870 
4871  // If this is a 64-bit zero-extension mask, emit rldicl.
4872  unsigned MB = 64 - countTrailingOnes(Imm64);
4873  unsigned SH = 0;
4874  unsigned Imm;
4875  SDValue Val = N->getOperand(0);
4876  SDLoc dl(N);
4877 
4878  if (Val.getOpcode() == ISD::ANY_EXTEND) {
4879  auto Op0 = Val.getOperand(0);
4880  if (Op0.getOpcode() == ISD::SRL &&
4881  isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
4882 
4883  auto ResultType = Val.getNode()->getValueType(0);
4884  auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
4885  SDValue IDVal(ImDef, 0);
4886 
4887  Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
4888  IDVal, Op0.getOperand(0),
4889  getI32Imm(1, dl)),
4890  0);
4891  SH = 64 - Imm;
4892  }
4893  }
4894 
4895  // If the operand is a logical right shift, we can fold it into this
4896  // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4897  // for n <= mb. The right shift is really a left rotate followed by a
4898  // mask, and this mask is a more-restrictive sub-mask of the mask implied
4899  // by the shift.
4900  if (Val.getOpcode() == ISD::SRL &&
4901  isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
4902  assert(Imm < 64 && "Illegal shift amount");
4903  Val = Val.getOperand(0);
4904  SH = 64 - Imm;
4905  }
4906 
4907  SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
4908  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4909  return true;
4910 }
4911 
4912 bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
4913  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4914  uint64_t Imm64;
4915  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
4916  !isMask_64(~Imm64))
4917  return false;
4918 
4919  // If this is a negated 64-bit zero-extension mask,
4920  // i.e. the immediate is a sequence of ones from most significant side
4921  // and all zero for reminder, we should use rldicr.
4922  unsigned MB = 63 - countTrailingOnes(~Imm64);
4923  unsigned SH = 0;
4924  SDLoc dl(N);
4925  SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
4926  CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4927  return true;
4928 }
4929 
4930 bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
4931  assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
4932  uint64_t Imm64;
4933  unsigned MB, ME;
4934  SDValue N0 = N->getOperand(0);
4935 
4936  // We won't get fewer instructions if the imm is 32-bit integer.
4937  // rldimi requires the imm to have consecutive ones with both sides zero.
4938  // Also, make sure the first Op has only one use, otherwise this may increase
4939  // register pressure since rldimi is destructive.
4940  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
4941  isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
4942  return false;
4943 
4944  unsigned SH = 63 - ME;
4945  SDLoc Dl(N);
4946  // Use select64Imm for making LI instr instead of directly putting Imm64
4947  SDValue Ops[] = {