LLVM  15.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a pattern matching instruction selector for PowerPC,
10 // converting from a legalized dag to a PPC dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "PPC.h"
17 #include "PPCISelLowering.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCSubtarget.h"
20 #include "PPCTargetMachine.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GlobalValue.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/IntrinsicsPowerPC.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CodeGen.h"
52 #include "llvm/Support/Compiler.h"
53 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/KnownBits.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <iterator>
63 #include <limits>
64 #include <memory>
65 #include <new>
66 #include <tuple>
67 #include <utility>
68 
69 using namespace llvm;
70 
71 #define DEBUG_TYPE "ppc-codegen"
72 
73 STATISTIC(NumSextSetcc,
74  "Number of (sext(setcc)) nodes expanded into GPR sequence.");
75 STATISTIC(NumZextSetcc,
76  "Number of (zext(setcc)) nodes expanded into GPR sequence.");
77 STATISTIC(SignExtensionsAdded,
78  "Number of sign extensions for compare inputs added.");
79 STATISTIC(ZeroExtensionsAdded,
80  "Number of zero extensions for compare inputs added.");
81 STATISTIC(NumLogicOpsOnComparison,
82  "Number of logical ops on i1 values calculated in GPR.");
83 STATISTIC(OmittedForNonExtendUses,
84  "Number of compares not eliminated as they have non-extending uses.");
85 STATISTIC(NumP9Setb,
86  "Number of compares lowered to setb.");
87 
88 // FIXME: Remove this once the bug has been fixed!
89 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
90 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
91 
92 static cl::opt<bool>
93  UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
94  cl::desc("use aggressive ppc isel for bit permutations"),
95  cl::Hidden);
97  "ppc-bit-perm-rewriter-stress-rotates",
98  cl::desc("stress rotate selection in aggressive ppc isel for "
99  "bit permutations"),
100  cl::Hidden);
101 
103  "ppc-use-branch-hint", cl::init(true),
104  cl::desc("Enable static hinting of branches on ppc"),
105  cl::Hidden);
106 
108  "ppc-tls-opt", cl::init(true),
109  cl::desc("Enable tls optimization peephole"),
110  cl::Hidden);
111 
115 
117  "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
118  cl::desc("Specify the types of comparisons to emit GPR-only code for."),
119  cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
120  clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
121  clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
122  clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
123  clEnumValN(ICGPR_NonExtIn, "nonextin",
124  "Only comparisons where inputs don't need [sz]ext."),
125  clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
126  clEnumValN(ICGPR_ZextI32, "zexti32",
127  "Only i32 comparisons with zext result."),
128  clEnumValN(ICGPR_ZextI64, "zexti64",
129  "Only i64 comparisons with zext result."),
130  clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
131  clEnumValN(ICGPR_SextI32, "sexti32",
132  "Only i32 comparisons with sext result."),
133  clEnumValN(ICGPR_SextI64, "sexti64",
134  "Only i64 comparisons with sext result.")));
135 namespace {
136 
137  //===--------------------------------------------------------------------===//
138  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
139  /// instructions for SelectionDAG operations.
140  ///
141  class PPCDAGToDAGISel : public SelectionDAGISel {
142  const PPCTargetMachine &TM;
143  const PPCSubtarget *Subtarget = nullptr;
144  const PPCTargetLowering *PPCLowering = nullptr;
145  unsigned GlobalBaseReg = 0;
146 
147  public:
148  explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
149  : SelectionDAGISel(tm, OptLevel), TM(tm) {}
150 
151  bool runOnMachineFunction(MachineFunction &MF) override {
152  // Make sure we re-emit a set of the global base reg if necessary
153  GlobalBaseReg = 0;
154  Subtarget = &MF.getSubtarget<PPCSubtarget>();
155  PPCLowering = Subtarget->getTargetLowering();
156  if (Subtarget->hasROPProtect()) {
157  // Create a place on the stack for the ROP Protection Hash.
158  // The ROP Protection Hash will always be 8 bytes and aligned to 8
159  // bytes.
160  MachineFrameInfo &MFI = MF.getFrameInfo();
162  const int Result = MFI.CreateStackObject(8, Align(8), false);
163  FI->setROPProtectionHashSaveIndex(Result);
164  }
166 
167  return true;
168  }
169 
170  void PreprocessISelDAG() override;
171  void PostprocessISelDAG() override;
172 
173  /// getI16Imm - Return a target constant with the specified value, of type
174  /// i16.
175  inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
176  return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
177  }
178 
179  /// getI32Imm - Return a target constant with the specified value, of type
180  /// i32.
181  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
182  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
183  }
184 
185  /// getI64Imm - Return a target constant with the specified value, of type
186  /// i64.
187  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
188  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
189  }
190 
191  /// getSmallIPtrImm - Return a target constant of pointer type.
192  inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) {
193  return CurDAG->getTargetConstant(
194  Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
195  }
196 
197  /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
198  /// rotate and mask opcode and mask operation.
199  static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
200  unsigned &SH, unsigned &MB, unsigned &ME);
201 
202  /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
203  /// base register. Return the virtual register that holds this value.
204  SDNode *getGlobalBaseReg();
205 
206  void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0);
207 
208  // Select - Convert the specified operand from a target-independent to a
209  // target-specific node if it hasn't already been changed.
210  void Select(SDNode *N) override;
211 
212  bool tryBitfieldInsert(SDNode *N);
213  bool tryBitPermutation(SDNode *N);
214  bool tryIntCompareInGPR(SDNode *N);
215 
216  // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
217  // an X-Form load instruction with the offset being a relocation coming from
218  // the PPCISD::ADD_TLS.
219  bool tryTLSXFormLoad(LoadSDNode *N);
220  // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
221  // an X-Form store instruction with the offset being a relocation coming from
222  // the PPCISD::ADD_TLS.
223  bool tryTLSXFormStore(StoreSDNode *N);
224  /// SelectCC - Select a comparison of the specified values with the
225  /// specified condition code, returning the CR# of the expression.
226  SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
227  const SDLoc &dl, SDValue Chain = SDValue());
228 
229  /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
230  /// immediate field. Note that the operand at this point is already the
231  /// result of a prior SelectAddressRegImm call.
232  bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
233  if (N.getOpcode() == ISD::TargetConstant ||
234  N.getOpcode() == ISD::TargetGlobalAddress) {
235  Out = N;
236  return true;
237  }
238 
239  return false;
240  }
241 
242  /// SelectDSForm - Returns true if address N can be represented by the
243  /// addressing mode of DSForm instructions (a base register, plus a signed
244  /// 16-bit displacement that is a multiple of 4.
245  bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
246  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
247  Align(4)) == PPC::AM_DSForm;
248  }
249 
250  /// SelectDQForm - Returns true if address N can be represented by the
251  /// addressing mode of DQForm instructions (a base register, plus a signed
252  /// 16-bit displacement that is a multiple of 16.
253  bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
254  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
255  Align(16)) == PPC::AM_DQForm;
256  }
257 
258  /// SelectDForm - Returns true if address N can be represented by
259  /// the addressing mode of DForm instructions (a base register, plus a
260  /// signed 16-bit immediate.
261  bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
262  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
263  None) == PPC::AM_DForm;
264  }
265 
266  /// SelectPCRelForm - Returns true if address N can be represented by
267  /// PC-Relative addressing mode.
268  bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
269  SDValue &Base) {
270  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
271  None) == PPC::AM_PCRel;
272  }
273 
274  /// SelectPDForm - Returns true if address N can be represented by Prefixed
275  /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
276  bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
277  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
279  }
280 
281  /// SelectXForm - Returns true if address N can be represented by the
282  /// addressing mode of XForm instructions (an indexed [r+r] operation).
283  bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
284  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
285  None) == PPC::AM_XForm;
286  }
287 
288  /// SelectForceXForm - Given the specified address, force it to be
289  /// represented as an indexed [r+r] operation (an XForm instruction).
290  bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
291  SDValue &Base) {
292  return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
294  }
295 
296  /// SelectAddrIdx - Given the specified address, check to see if it can be
297  /// represented as an indexed [r+r] operation.
298  /// This is for xform instructions whose associated displacement form is D.
299  /// The last parameter \p 0 means associated D form has no requirment for 16
300  /// bit signed displacement.
301  /// Returns false if it can be represented by [r+imm], which are preferred.
302  bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
303  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);
304  }
305 
306  /// SelectAddrIdx4 - Given the specified address, check to see if it can be
307  /// represented as an indexed [r+r] operation.
308  /// This is for xform instructions whose associated displacement form is DS.
309  /// The last parameter \p 4 means associated DS form 16 bit signed
310  /// displacement must be a multiple of 4.
311  /// Returns false if it can be represented by [r+imm], which are preferred.
312  bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
313  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
314  Align(4));
315  }
316 
317  /// SelectAddrIdx16 - Given the specified address, check to see if it can be
318  /// represented as an indexed [r+r] operation.
319  /// This is for xform instructions whose associated displacement form is DQ.
320  /// The last parameter \p 16 means associated DQ form 16 bit signed
321  /// displacement must be a multiple of 16.
322  /// Returns false if it can be represented by [r+imm], which are preferred.
323  bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
324  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
325  Align(16));
326  }
327 
328  /// SelectAddrIdxOnly - Given the specified address, force it to be
329  /// represented as an indexed [r+r] operation.
330  bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
331  return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
332  }
333 
334  /// SelectAddrImm - Returns true if the address N can be represented by
335  /// a base register plus a signed 16-bit displacement [r+imm].
336  /// The last parameter \p 0 means D form has no requirment for 16 bit signed
337  /// displacement.
338  bool SelectAddrImm(SDValue N, SDValue &Disp,
339  SDValue &Base) {
340  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);
341  }
342 
343  /// SelectAddrImmX4 - Returns true if the address N can be represented by
344  /// a base register plus a signed 16-bit displacement that is a multiple of
345  /// 4 (last parameter). Suitable for use by STD and friends.
346  bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
347  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
348  }
349 
350  /// SelectAddrImmX16 - Returns true if the address N can be represented by
351  /// a base register plus a signed 16-bit displacement that is a multiple of
352  /// 16(last parameter). Suitable for use by STXV and friends.
353  bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
354  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
355  Align(16));
356  }
357 
358  /// SelectAddrImmX34 - Returns true if the address N can be represented by
359  /// a base register plus a signed 34-bit displacement. Suitable for use by
360  /// PSTXVP and friends.
361  bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
362  return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
363  }
364 
365  // Select an address into a single register.
366  bool SelectAddr(SDValue N, SDValue &Base) {
367  Base = N;
368  return true;
369  }
370 
371  bool SelectAddrPCRel(SDValue N, SDValue &Base) {
372  return PPCLowering->SelectAddressPCRel(N, Base);
373  }
374 
375  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
376  /// inline asm expressions. It is always correct to compute the value into
377  /// a register. The case of adding a (possibly relocatable) constant to a
378  /// register can be improved, but it is wrong to substitute Reg+Reg for
379  /// Reg in an asm, because the load or store opcode would have to change.
380  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
381  unsigned ConstraintID,
382  std::vector<SDValue> &OutOps) override {
383  switch(ConstraintID) {
384  default:
385  errs() << "ConstraintID: " << ConstraintID << "\n";
386  llvm_unreachable("Unexpected asm memory constraint");
393  // We need to make sure that this one operand does not end up in r0
394  // (because we might end up lowering this as 0(%op)).
395  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
396  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
397  SDLoc dl(Op);
398  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
399  SDValue NewOp =
400  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
401  dl, Op.getValueType(),
402  Op, RC), 0);
403 
404  OutOps.push_back(NewOp);
405  return false;
406  }
407  return true;
408  }
409 
410  StringRef getPassName() const override {
411  return "PowerPC DAG->DAG Pattern Instruction Selection";
412  }
413 
414 // Include the pieces autogenerated from the target description.
415 #include "PPCGenDAGISel.inc"
416 
417 private:
418  bool trySETCC(SDNode *N);
419  bool tryFoldSWTestBRCC(SDNode *N);
420  bool tryAsSingleRLDICL(SDNode *N);
421  bool tryAsSingleRLDICR(SDNode *N);
422  bool tryAsSingleRLWINM(SDNode *N);
423  bool tryAsSingleRLWINM8(SDNode *N);
424  bool tryAsSingleRLWIMI(SDNode *N);
425  bool tryAsPairOfRLDICL(SDNode *N);
426  bool tryAsSingleRLDIMI(SDNode *N);
427 
428  void PeepholePPC64();
429  void PeepholePPC64ZExt();
430  void PeepholeCROps();
431 
432  SDValue combineToCMPB(SDNode *N);
433  void foldBoolExts(SDValue &Res, SDNode *&N);
434 
435  bool AllUsersSelectZero(SDNode *N);
436  void SwapAllSelectUsers(SDNode *N);
437 
438  bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
439  void transferMemOperands(SDNode *N, SDNode *Result);
440  };
441 
442 } // end anonymous namespace
443 
444 /// getGlobalBaseReg - Output the instructions required to put the
445 /// base address to use for accessing globals into a register.
446 ///
447 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
448  if (!GlobalBaseReg) {
449  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
450  // Insert the set of GlobalBaseReg into the first MBB of the function
451  MachineBasicBlock &FirstMBB = MF->front();
453  const Module *M = MF->getFunction().getParent();
454  DebugLoc dl;
455 
456  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
457  if (Subtarget->isTargetELF()) {
458  GlobalBaseReg = PPC::R30;
459  if (!Subtarget->isSecurePlt() &&
460  M->getPICLevel() == PICLevel::SmallPIC) {
461  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
462  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
463  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
464  } else {
465  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
466  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
467  Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
468  BuildMI(FirstMBB, MBBI, dl,
469  TII.get(PPC::UpdateGBR), GlobalBaseReg)
471  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
472  }
473  } else {
474  GlobalBaseReg =
475  RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
476  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
477  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
478  }
479  } else {
480  // We must ensure that this sequence is dominated by the prologue.
481  // FIXME: This is a bit of a big hammer since we don't get the benefits
482  // of shrink-wrapping whenever we emit this instruction. Considering
483  // this is used in any function where we emit a jump table, this may be
484  // a significant limitation. We should consider inserting this in the
485  // block where it is used and then commoning this sequence up if it
486  // appears in multiple places.
487  // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
488  // MovePCtoLR8.
489  MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
490  GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
491  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
492  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
493  }
494  }
495  return CurDAG->getRegister(GlobalBaseReg,
496  PPCLowering->getPointerTy(CurDAG->getDataLayout()))
497  .getNode();
498 }
499 
500 // Check if a SDValue has the toc-data attribute.
501 static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
502  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
503  if (!GA)
504  return false;
505 
506  const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
507  if (!GV)
508  return false;
509 
510  if (!GV->hasAttribute("toc-data"))
511  return false;
512 
513  // TODO: These asserts should be updated as more support for the toc data
514  // transformation is added (struct support, etc.).
515 
516  assert(
517  PointerSize >= GV->getAlign().valueOrOne().value() &&
518  "GlobalVariables with an alignment requirement stricter than TOC entry "
519  "size not supported by the toc data transformation.");
520 
521  Type *GVType = GV->getValueType();
522 
523  assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
524  "supported by the toc data transformation.");
525 
526  if (GVType->isVectorTy())
527  report_fatal_error("A GlobalVariable of Vector type is not currently "
528  "supported by the toc data transformation.");
529 
530  if (GVType->isArrayTy())
531  report_fatal_error("A GlobalVariable of Array type is not currently "
532  "supported by the toc data transformation.");
533 
534  if (GVType->isStructTy())
535  report_fatal_error("A GlobalVariable of Struct type is not currently "
536  "supported by the toc data transformation.");
537 
538  assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&
539  "A GlobalVariable with size larger than a TOC entry is not currently "
540  "supported by the toc data transformation.");
541 
542  if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())
543  report_fatal_error("A GlobalVariable with private or local linkage is not "
544  "currently supported by the toc data transformation.");
545 
546  assert(!GV->hasCommonLinkage() &&
547  "Tentative definitions cannot have the mapping class XMC_TD.");
548 
549  return true;
550 }
551 
552 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
553 /// operand. If so Imm will receive the 32-bit value.
554 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
555  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
556  Imm = cast<ConstantSDNode>(N)->getZExtValue();
557  return true;
558  }
559  return false;
560 }
561 
562 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
563 /// operand. If so Imm will receive the 64-bit value.
565  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
566  Imm = cast<ConstantSDNode>(N)->getZExtValue();
567  return true;
568  }
569  return false;
570 }
571 
572 // isInt32Immediate - This method tests to see if a constant operand.
573 // If so Imm will receive the 32 bit value.
574 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
575  return isInt32Immediate(N.getNode(), Imm);
576 }
577 
578 /// isInt64Immediate - This method tests to see if the value is a 64-bit
579 /// constant operand. If so Imm will receive the 64-bit value.
581  return isInt64Immediate(N.getNode(), Imm);
582 }
583 
584 static unsigned getBranchHint(unsigned PCC,
585  const FunctionLoweringInfo &FuncInfo,
586  const SDValue &DestMBB) {
587  assert(isa<BasicBlockSDNode>(DestMBB));
588 
589  if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
590 
591  const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
592  const Instruction *BBTerm = BB->getTerminator();
593 
594  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
595 
596  const BasicBlock *TBB = BBTerm->getSuccessor(0);
597  const BasicBlock *FBB = BBTerm->getSuccessor(1);
598 
599  auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
600  auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
601 
602  // We only want to handle cases which are easy to predict at static time, e.g.
603  // C++ throw statement, that is very likely not taken, or calling never
604  // returned function, e.g. stdlib exit(). So we set Threshold to filter
605  // unwanted cases.
606  //
607  // Below is LLVM branch weight table, we only want to handle case 1, 2
608  //
609  // Case Taken:Nontaken Example
610  // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
611  // 2. Invoke-terminating 1:1048575
612  // 3. Coldblock 4:64 __builtin_expect
613  // 4. Loop Branch 124:4 For loop
614  // 5. PH/ZH/FPH 20:12
615  const uint32_t Threshold = 10000;
616 
617  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
618  return PPC::BR_NO_HINT;
619 
620  LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
621  << "::" << BB->getName() << "'\n"
622  << " -> " << TBB->getName() << ": " << TProb << "\n"
623  << " -> " << FBB->getName() << ": " << FProb << "\n");
624 
625  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
626 
627  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
628  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
629  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
630  std::swap(TProb, FProb);
631 
632  return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
633 }
634 
635 // isOpcWithIntImmediate - This method tests to see if the node is a specific
636 // opcode and that it has a immediate integer right operand.
637 // If so Imm will receive the 32 bit value.
638 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
639  return N->getOpcode() == Opc
640  && isInt32Immediate(N->getOperand(1).getNode(), Imm);
641 }
642 
643 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) {
644  SDLoc dl(SN);
645  int FI = cast<FrameIndexSDNode>(N)->getIndex();
646  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
647  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
648  if (SN->hasOneUse())
649  CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
650  getSmallIPtrImm(Offset, dl));
651  else
652  ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
653  getSmallIPtrImm(Offset, dl)));
654 }
655 
656 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
657  bool isShiftMask, unsigned &SH,
658  unsigned &MB, unsigned &ME) {
659  // Don't even go down this path for i64, since different logic will be
660  // necessary for rldicl/rldicr/rldimi.
661  if (N->getValueType(0) != MVT::i32)
662  return false;
663 
664  unsigned Shift = 32;
665  unsigned Indeterminant = ~0; // bit mask marking indeterminant results
666  unsigned Opcode = N->getOpcode();
667  if (N->getNumOperands() != 2 ||
668  !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
669  return false;
670 
671  if (Opcode == ISD::SHL) {
672  // apply shift left to mask if it comes first
673  if (isShiftMask) Mask = Mask << Shift;
674  // determine which bits are made indeterminant by shift
675  Indeterminant = ~(0xFFFFFFFFu << Shift);
676  } else if (Opcode == ISD::SRL) {
677  // apply shift right to mask if it comes first
678  if (isShiftMask) Mask = Mask >> Shift;
679  // determine which bits are made indeterminant by shift
680  Indeterminant = ~(0xFFFFFFFFu >> Shift);
681  // adjust for the left rotate
682  Shift = 32 - Shift;
683  } else if (Opcode == ISD::ROTL) {
684  Indeterminant = 0;
685  } else {
686  return false;
687  }
688 
689  // if the mask doesn't intersect any Indeterminant bits
690  if (Mask && !(Mask & Indeterminant)) {
691  SH = Shift & 31;
692  // make sure the mask is still a mask (wrap arounds may not be)
693  return isRunOfOnes(Mask, MB, ME);
694  }
695  return false;
696 }
697 
698 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
699  SDValue Base = ST->getBasePtr();
700  if (Base.getOpcode() != PPCISD::ADD_TLS)
701  return false;
702  SDValue Offset = ST->getOffset();
703  if (!Offset.isUndef())
704  return false;
705  if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
706  return false;
707 
708  SDLoc dl(ST);
709  EVT MemVT = ST->getMemoryVT();
710  EVT RegVT = ST->getValue().getValueType();
711 
712  unsigned Opcode;
713  switch (MemVT.getSimpleVT().SimpleTy) {
714  default:
715  return false;
716  case MVT::i8: {
717  Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
718  break;
719  }
720  case MVT::i16: {
721  Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
722  break;
723  }
724  case MVT::i32: {
725  Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
726  break;
727  }
728  case MVT::i64: {
729  Opcode = PPC::STDXTLS;
730  break;
731  }
732  }
733  SDValue Chain = ST->getChain();
734  SDVTList VTs = ST->getVTList();
735  SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
736  Chain};
737  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
738  transferMemOperands(ST, MN);
739  ReplaceNode(ST, MN);
740  return true;
741 }
742 
743 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
744  SDValue Base = LD->getBasePtr();
745  if (Base.getOpcode() != PPCISD::ADD_TLS)
746  return false;
747  SDValue Offset = LD->getOffset();
748  if (!Offset.isUndef())
749  return false;
750  if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
751  return false;
752 
753  SDLoc dl(LD);
754  EVT MemVT = LD->getMemoryVT();
755  EVT RegVT = LD->getValueType(0);
756  unsigned Opcode;
757  switch (MemVT.getSimpleVT().SimpleTy) {
758  default:
759  return false;
760  case MVT::i8: {
761  Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
762  break;
763  }
764  case MVT::i16: {
765  Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
766  break;
767  }
768  case MVT::i32: {
769  Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
770  break;
771  }
772  case MVT::i64: {
773  Opcode = PPC::LDXTLS;
774  break;
775  }
776  }
777  SDValue Chain = LD->getChain();
778  SDVTList VTs = LD->getVTList();
779  SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
780  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
781  transferMemOperands(LD, MN);
782  ReplaceNode(LD, MN);
783  return true;
784 }
785 
786 /// Turn an or of two masked values into the rotate left word immediate then
787 /// mask insert (rlwimi) instruction.
788 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
789  SDValue Op0 = N->getOperand(0);
790  SDValue Op1 = N->getOperand(1);
791  SDLoc dl(N);
792 
793  KnownBits LKnown = CurDAG->computeKnownBits(Op0);
794  KnownBits RKnown = CurDAG->computeKnownBits(Op1);
795 
796  unsigned TargetMask = LKnown.Zero.getZExtValue();
797  unsigned InsertMask = RKnown.Zero.getZExtValue();
798 
799  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
800  unsigned Op0Opc = Op0.getOpcode();
801  unsigned Op1Opc = Op1.getOpcode();
802  unsigned Value, SH = 0;
803  TargetMask = ~TargetMask;
804  InsertMask = ~InsertMask;
805 
806  // If the LHS has a foldable shift and the RHS does not, then swap it to the
807  // RHS so that we can fold the shift into the insert.
808  if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
809  if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
810  Op0.getOperand(0).getOpcode() == ISD::SRL) {
811  if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
812  Op1.getOperand(0).getOpcode() != ISD::SRL) {
813  std::swap(Op0, Op1);
814  std::swap(Op0Opc, Op1Opc);
815  std::swap(TargetMask, InsertMask);
816  }
817  }
818  } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
819  if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
820  Op1.getOperand(0).getOpcode() != ISD::SRL) {
821  std::swap(Op0, Op1);
822  std::swap(Op0Opc, Op1Opc);
823  std::swap(TargetMask, InsertMask);
824  }
825  }
826 
827  unsigned MB, ME;
828  if (isRunOfOnes(InsertMask, MB, ME)) {
829  if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
830  isInt32Immediate(Op1.getOperand(1), Value)) {
831  Op1 = Op1.getOperand(0);
832  SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
833  }
834  if (Op1Opc == ISD::AND) {
835  // The AND mask might not be a constant, and we need to make sure that
836  // if we're going to fold the masking with the insert, all bits not
837  // know to be zero in the mask are known to be one.
838  KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
839  bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
840 
841  unsigned SHOpc = Op1.getOperand(0).getOpcode();
842  if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
844  // Note that Value must be in range here (less than 32) because
845  // otherwise there would not be any bits set in InsertMask.
846  Op1 = Op1.getOperand(0).getOperand(0);
847  SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
848  }
849  }
850 
851  SH &= 31;
852  SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
853  getI32Imm(ME, dl) };
854  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
855  return true;
856  }
857  }
858  return false;
859 }
860 
861 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
862  unsigned MaxTruncation = 0;
863  // Cannot use range-based for loop here as we need the actual use (i.e. we
864  // need the operand number corresponding to the use). A range-based for
865  // will unbox the use and provide an SDNode*.
866  for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
867  Use != UseEnd; ++Use) {
868  unsigned Opc =
869  Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
870  switch (Opc) {
871  default: return 0;
872  case ISD::TRUNCATE:
873  if (Use->isMachineOpcode())
874  return 0;
875  MaxTruncation =
876  std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
877  continue;
878  case ISD::STORE: {
879  if (Use->isMachineOpcode())
880  return 0;
881  StoreSDNode *STN = cast<StoreSDNode>(*Use);
882  unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
883  if (MemVTSize == 64 || Use.getOperandNo() != 0)
884  return 0;
885  MaxTruncation = std::max(MaxTruncation, MemVTSize);
886  continue;
887  }
888  case PPC::STW8:
889  case PPC::STWX8:
890  case PPC::STWU8:
891  case PPC::STWUX8:
892  if (Use.getOperandNo() != 0)
893  return 0;
894  MaxTruncation = std::max(MaxTruncation, 32u);
895  continue;
896  case PPC::STH8:
897  case PPC::STHX8:
898  case PPC::STHU8:
899  case PPC::STHUX8:
900  if (Use.getOperandNo() != 0)
901  return 0;
902  MaxTruncation = std::max(MaxTruncation, 16u);
903  continue;
904  case PPC::STB8:
905  case PPC::STBX8:
906  case PPC::STBU8:
907  case PPC::STBUX8:
908  if (Use.getOperandNo() != 0)
909  return 0;
910  MaxTruncation = std::max(MaxTruncation, 8u);
911  continue;
912  }
913  }
914  return MaxTruncation;
915 }
916 
917 // For any 32 < Num < 64, check if the Imm contains at least Num consecutive
918 // zeros and return the number of bits by the left of these consecutive zeros.
919 static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
920  unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
921  unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
922  if ((HiTZ + LoLZ) >= Num)
923  return (32 + HiTZ);
924  return 0;
925 }
926 
927 // Direct materialization of 64-bit constants by enumerated patterns.
928 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
929  uint64_t Imm, unsigned &InstCnt) {
930  unsigned TZ = countTrailingZeros<uint64_t>(Imm);
931  unsigned LZ = countLeadingZeros<uint64_t>(Imm);
932  unsigned TO = countTrailingOnes<uint64_t>(Imm);
933  unsigned LO = countLeadingOnes<uint64_t>(Imm);
934  unsigned Hi32 = Hi_32(Imm);
935  unsigned Lo32 = Lo_32(Imm);
936  SDNode *Result = nullptr;
937  unsigned Shift = 0;
938 
939  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
940  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
941  };
942 
943  // Following patterns use 1 instructions to materialize the Imm.
944  InstCnt = 1;
945  // 1-1) Patterns : {zeros}{15-bit valve}
946  // {ones}{15-bit valve}
947  if (isInt<16>(Imm)) {
948  SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
949  return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
950  }
951  // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
952  // {ones}{15-bit valve}{16 zeros}
953  if (TZ > 15 && (LZ > 32 || LO > 32))
954  return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
955  getI32Imm((Imm >> 16) & 0xffff));
956 
957  // Following patterns use 2 instructions to materialize the Imm.
958  InstCnt = 2;
959  assert(LZ < 64 && "Unexpected leading zeros here.");
960  // Count of ones follwing the leading zeros.
961  unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
962  // 2-1) Patterns : {zeros}{31-bit value}
963  // {ones}{31-bit value}
964  if (isInt<32>(Imm)) {
965  uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
966  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
967  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
968  return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
969  getI32Imm(Imm & 0xffff));
970  }
971  // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
972  // {zeros}{15-bit value}{zeros}
973  // {zeros}{ones}{15-bit value}
974  // {ones}{15-bit value}{zeros}
975  // We can take advantage of LI's sign-extension semantics to generate leading
976  // ones, and then use RLDIC to mask off the ones in both sides after rotation.
977  if ((LZ + FO + TZ) > 48) {
978  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
979  getI32Imm((Imm >> TZ) & 0xffff));
980  return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
981  getI32Imm(TZ), getI32Imm(LZ));
982  }
983  // 2-3) Pattern : {zeros}{15-bit value}{ones}
984  // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
985  // therefore we can take advantage of LI's sign-extension semantics, and then
986  // mask them off after rotation.
987  //
988  // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
989  // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
990  // +------------------------+ +------------------------+
991  // 63 0 63 0
992  // Imm (Imm >> (48 - LZ) & 0xffff)
993  // +----sext-----|--16-bit--+ +clear-|-----------------+
994  // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
995  // +------------------------+ +------------------------+
996  // 63 0 63 0
997  // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
998  if ((LZ + TO) > 48) {
999  // Since the immediates with (LZ > 32) have been handled by previous
1000  // patterns, here we have (LZ <= 32) to make sure we will not shift right
1001  // the Imm by a negative value.
1002  assert(LZ <= 32 && "Unexpected shift value.");
1003  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1004  getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1005  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1006  getI32Imm(48 - LZ), getI32Imm(LZ));
1007  }
1008  // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1009  // {ones}{15-bit value}{ones}
1010  // We can take advantage of LI's sign-extension semantics to generate leading
1011  // ones, and then use RLDICL to mask off the ones in left sides (if required)
1012  // after rotation.
1013  //
1014  // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1015  // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1016  // +------------------------+ +------------------------+
1017  // 63 0 63 0
1018  // Imm (Imm >> TO) & 0xffff
1019  // +----sext-----|--16-bit--+ +LZ|---------------------+
1020  // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1021  // +------------------------+ +------------------------+
1022  // 63 0 63 0
1023  // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1024  if ((LZ + FO + TO) > 48) {
1025  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1026  getI32Imm((Imm >> TO) & 0xffff));
1027  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1028  getI32Imm(TO), getI32Imm(LZ));
1029  }
1030  // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1031  // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1032  // value, we can use LI for Lo16 without generating leading ones then add the
1033  // Hi16(in Lo32).
1034  if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1035  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1036  getI32Imm(Lo32 & 0xffff));
1037  return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1038  getI32Imm(Lo32 >> 16));
1039  }
1040  // 2-6) Patterns : {******}{49 zeros}{******}
1041  // {******}{49 ones}{******}
1042  // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1043  // bits remain on both sides. Rotate right the Imm to construct an int<16>
1044  // value, use LI for int<16> value and then use RLDICL without mask to rotate
1045  // it back.
1046  //
1047  // 1) findContiguousZerosAtLeast(Imm, 49)
1048  // +------|--zeros-|------+ +---ones--||---15 bit--+
1049  // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1050  // +----------------------+ +----------------------+
1051  // 63 0 63 0
1052  //
1053  // 2) findContiguousZerosAtLeast(~Imm, 49)
1054  // +------|--ones--|------+ +---ones--||---15 bit--+
1055  // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1056  // +----------------------+ +----------------------+
1057  // 63 0 63 0
1058  if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1059  (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1060  uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1061  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1062  getI32Imm(RotImm & 0xffff));
1063  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1064  getI32Imm(Shift), getI32Imm(0));
1065  }
1066 
1067  // Following patterns use 3 instructions to materialize the Imm.
1068  InstCnt = 3;
1069  // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1070  // {zeros}{31-bit value}{zeros}
1071  // {zeros}{ones}{31-bit value}
1072  // {ones}{31-bit value}{zeros}
1073  // We can take advantage of LIS's sign-extension semantics to generate leading
1074  // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1075  // ones in both sides after rotation.
1076  if ((LZ + FO + TZ) > 32) {
1077  uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1078  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1079  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1080  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1081  getI32Imm((Imm >> TZ) & 0xffff));
1082  return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1083  getI32Imm(TZ), getI32Imm(LZ));
1084  }
1085  // 3-2) Pattern : {zeros}{31-bit value}{ones}
1086  // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,
1087  // therefore we can take advantage of LIS's sign-extension semantics, add
1088  // the remaining bits with ORI, and then mask them off after rotation.
1089  // This is similar to Pattern 2-3, please refer to the diagram there.
1090  if ((LZ + TO) > 32) {
1091  // Since the immediates with (LZ > 32) have been handled by previous
1092  // patterns, here we have (LZ <= 32) to make sure we will not shift right
1093  // the Imm by a negative value.
1094  assert(LZ <= 32 && "Unexpected shift value.");
1095  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1096  getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1097  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1098  getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1099  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1100  getI32Imm(32 - LZ), getI32Imm(LZ));
1101  }
1102  // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1103  // {ones}{31-bit value}{ones}
1104  // We can take advantage of LIS's sign-extension semantics to generate leading
1105  // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1106  // ones in left sides (if required) after rotation.
1107  // This is similar to Pattern 2-4, please refer to the diagram there.
1108  if ((LZ + FO + TO) > 32) {
1109  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1110  getI32Imm((Imm >> (TO + 16)) & 0xffff));
1111  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1112  getI32Imm((Imm >> TO) & 0xffff));
1113  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1114  getI32Imm(TO), getI32Imm(LZ));
1115  }
1116  // 3-4) Patterns : High word == Low word
1117  if (Hi32 == Lo32) {
1118  // Handle the first 32 bits.
1119  uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1120  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1121  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1122  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1123  getI32Imm(Lo32 & 0xffff));
1124  // Use rldimi to insert the Low word into High word.
1125  SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1126  getI32Imm(0)};
1127  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1128  }
1129  // 3-5) Patterns : {******}{33 zeros}{******}
1130  // {******}{33 ones}{******}
1131  // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1132  // bits remain on both sides. Rotate right the Imm to construct an int<32>
1133  // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1134  // rotate it back.
1135  // This is similar to Pattern 2-6, please refer to the diagram there.
1136  if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1137  (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1138  uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1139  uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1140  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1141  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1142  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1143  getI32Imm(RotImm & 0xffff));
1144  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1145  getI32Imm(Shift), getI32Imm(0));
1146  }
1147 
1148  InstCnt = 0;
1149  return nullptr;
1150 }
1151 
1152 // Try to select instructions to generate a 64 bit immediate using prefix as
1153 // well as non prefix instructions. The function will return the SDNode
1154 // to materialize that constant or it will return nullptr if it does not
1155 // find one. The variable InstCnt is set to the number of instructions that
1156 // were selected.
1158  uint64_t Imm, unsigned &InstCnt) {
1159  unsigned TZ = countTrailingZeros<uint64_t>(Imm);
1160  unsigned LZ = countLeadingZeros<uint64_t>(Imm);
1161  unsigned TO = countTrailingOnes<uint64_t>(Imm);
1162  unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1163  unsigned Hi32 = Hi_32(Imm);
1164  unsigned Lo32 = Lo_32(Imm);
1165 
1166  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1167  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1168  };
1169 
1170  auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1171  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1172  };
1173 
1174  // Following patterns use 1 instruction to materialize Imm.
1175  InstCnt = 1;
1176 
1177  // The pli instruction can materialize up to 34 bits directly.
1178  // If a constant fits within 34-bits, emit the pli instruction here directly.
1179  if (isInt<34>(Imm))
1180  return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1181  CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1182 
1183  // Require at least two instructions.
1184  InstCnt = 2;
1185  SDNode *Result = nullptr;
1186  // Patterns : {zeros}{ones}{33-bit value}{zeros}
1187  // {zeros}{33-bit value}{zeros}
1188  // {zeros}{ones}{33-bit value}
1189  // {ones}{33-bit value}{zeros}
1190  // We can take advantage of PLI's sign-extension semantics to generate leading
1191  // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1192  if ((LZ + FO + TZ) > 30) {
1193  APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1194  APInt Extended = SignedInt34.sext(64);
1195  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1196  getI64Imm(*Extended.getRawData()));
1197  return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1198  getI32Imm(TZ), getI32Imm(LZ));
1199  }
1200  // Pattern : {zeros}{33-bit value}{ones}
1201  // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1202  // therefore we can take advantage of PLI's sign-extension semantics, and then
1203  // mask them off after rotation.
1204  //
1205  // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1206  // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1207  // +------------------------+ +------------------------+
1208  // 63 0 63 0
1209  //
1210  // +----sext-----|--34-bit--+ +clear-|-----------------+
1211  // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1212  // +------------------------+ +------------------------+
1213  // 63 0 63 0
1214  if ((LZ + TO) > 30) {
1215  APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1216  APInt Extended = SignedInt34.sext(64);
1217  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1218  getI64Imm(*Extended.getRawData()));
1219  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1220  getI32Imm(30 - LZ), getI32Imm(LZ));
1221  }
1222  // Patterns : {zeros}{ones}{33-bit value}{ones}
1223  // {ones}{33-bit value}{ones}
1224  // Similar to LI we can take advantage of PLI's sign-extension semantics to
1225  // generate leading ones, and then use RLDICL to mask off the ones in left
1226  // sides (if required) after rotation.
1227  if ((LZ + FO + TO) > 30) {
1228  APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1229  APInt Extended = SignedInt34.sext(64);
1230  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1231  getI64Imm(*Extended.getRawData()));
1232  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1233  getI32Imm(TO), getI32Imm(LZ));
1234  }
1235  // Patterns : {******}{31 zeros}{******}
1236  // : {******}{31 ones}{******}
1237  // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1238  // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1239  // for the int<33> value and then use RLDICL without a mask to rotate it back.
1240  //
1241  // +------|--ones--|------+ +---ones--||---33 bit--+
1242  // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1243  // +----------------------+ +----------------------+
1244  // 63 0 63 0
1245  for (unsigned Shift = 0; Shift < 63; ++Shift) {
1246  uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1247  if (isInt<34>(RotImm)) {
1248  Result =
1249  CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1250  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1251  SDValue(Result, 0), getI32Imm(Shift),
1252  getI32Imm(0));
1253  }
1254  }
1255 
1256  // Patterns : High word == Low word
1257  // This is basically a splat of a 32 bit immediate.
1258  if (Hi32 == Lo32) {
1259  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1260  SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1261  getI32Imm(0)};
1262  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1263  }
1264 
1265  InstCnt = 3;
1266  // Catch-all
1267  // This pattern can form any 64 bit immediate in 3 instructions.
1268  SDNode *ResultHi =
1269  CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1270  SDNode *ResultLo =
1271  CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1272  SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1273  getI32Imm(0)};
1274  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1275 }
1276 
1277 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1278  unsigned *InstCnt = nullptr) {
1279  unsigned InstCntDirect = 0;
1280  // No more than 3 instructions is used if we can select the i64 immediate
1281  // directly.
1282  SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1283 
1284  const PPCSubtarget &Subtarget =
1286 
1287  // If we have prefixed instructions and there is a chance we can
1288  // materialize the constant with fewer prefixed instructions than
1289  // non-prefixed, try that.
1290  if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1291  unsigned InstCntDirectP = 0;
1292  SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1293  // Use the prefix case in either of two cases:
1294  // 1) We have no result from the non-prefix case to use.
1295  // 2) The non-prefix case uses more instructions than the prefix case.
1296  // If the prefix and non-prefix cases use the same number of instructions
1297  // we will prefer the non-prefix case.
1298  if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1299  if (InstCnt)
1300  *InstCnt = InstCntDirectP;
1301  return ResultP;
1302  }
1303  }
1304 
1305  if (Result) {
1306  if (InstCnt)
1307  *InstCnt = InstCntDirect;
1308  return Result;
1309  }
1310  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1311  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1312  };
1313  // Handle the upper 32 bit value.
1314  Result =
1315  selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1316  // Add in the last bits as required.
1317  if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
1318  Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1319  SDValue(Result, 0), getI32Imm(Hi16));
1320  ++InstCntDirect;
1321  }
1322  if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
1323  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1324  getI32Imm(Lo16));
1325  ++InstCntDirect;
1326  }
1327  if (InstCnt)
1328  *InstCnt = InstCntDirect;
1329  return Result;
1330 }
1331 
1332 // Select a 64-bit constant.
1334  SDLoc dl(N);
1335 
1336  // Get 64 bit value.
1337  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1338  if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1339  uint64_t SextImm = SignExtend64(Imm, MinSize);
1340  SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1341  if (isInt<16>(SextImm))
1342  return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1343  }
1344  return selectI64Imm(CurDAG, dl, Imm);
1345 }
1346 
1347 namespace {
1348 
1349 class BitPermutationSelector {
1350  struct ValueBit {
1351  SDValue V;
1352 
1353  // The bit number in the value, using a convention where bit 0 is the
1354  // lowest-order bit.
1355  unsigned Idx;
1356 
1357  // ConstZero means a bit we need to mask off.
1358  // Variable is a bit comes from an input variable.
1359  // VariableKnownToBeZero is also a bit comes from an input variable,
1360  // but it is known to be already zero. So we do not need to mask them.
1361  enum Kind {
1362  ConstZero,
1363  Variable,
1364  VariableKnownToBeZero
1365  } K;
1366 
1367  ValueBit(SDValue V, unsigned I, Kind K = Variable)
1368  : V(V), Idx(I), K(K) {}
1369  ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1370 
1371  bool isZero() const {
1372  return K == ConstZero || K == VariableKnownToBeZero;
1373  }
1374 
1375  bool hasValue() const {
1376  return K == Variable || K == VariableKnownToBeZero;
1377  }
1378 
1379  SDValue getValue() const {
1380  assert(hasValue() && "Cannot get the value of a constant bit");
1381  return V;
1382  }
1383 
1384  unsigned getValueBitIndex() const {
1385  assert(hasValue() && "Cannot get the value bit index of a constant bit");
1386  return Idx;
1387  }
1388  };
1389 
1390  // A bit group has the same underlying value and the same rotate factor.
1391  struct BitGroup {
1392  SDValue V;
1393  unsigned RLAmt;
1394  unsigned StartIdx, EndIdx;
1395 
1396  // This rotation amount assumes that the lower 32 bits of the quantity are
1397  // replicated in the high 32 bits by the rotation operator (which is done
1398  // by rlwinm and friends in 64-bit mode).
1399  bool Repl32;
1400  // Did converting to Repl32 == true change the rotation factor? If it did,
1401  // it decreased it by 32.
1402  bool Repl32CR;
1403  // Was this group coalesced after setting Repl32 to true?
1404  bool Repl32Coalesced;
1405 
1406  BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1407  : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1408  Repl32Coalesced(false) {
1409  LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1410  << " [" << S << ", " << E << "]\n");
1411  }
1412  };
1413 
1414  // Information on each (Value, RLAmt) pair (like the number of groups
1415  // associated with each) used to choose the lowering method.
1416  struct ValueRotInfo {
1417  SDValue V;
1418  unsigned RLAmt = std::numeric_limits<unsigned>::max();
1419  unsigned NumGroups = 0;
1420  unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1421  bool Repl32 = false;
1422 
1423  ValueRotInfo() = default;
1424 
1425  // For sorting (in reverse order) by NumGroups, and then by
1426  // FirstGroupStartIdx.
1427  bool operator < (const ValueRotInfo &Other) const {
1428  // We need to sort so that the non-Repl32 come first because, when we're
1429  // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1430  // masking operation.
1431  if (Repl32 < Other.Repl32)
1432  return true;
1433  else if (Repl32 > Other.Repl32)
1434  return false;
1435  else if (NumGroups > Other.NumGroups)
1436  return true;
1437  else if (NumGroups < Other.NumGroups)
1438  return false;
1439  else if (RLAmt == 0 && Other.RLAmt != 0)
1440  return true;
1441  else if (RLAmt != 0 && Other.RLAmt == 0)
1442  return false;
1443  else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1444  return true;
1445  return false;
1446  }
1447  };
1448 
1449  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1450  using ValueBitsMemoizer =
1452  ValueBitsMemoizer Memoizer;
1453 
1454  // Return a pair of bool and a SmallVector pointer to a memoization entry.
1455  // The bool is true if something interesting was deduced, otherwise if we're
1456  // providing only a generic representation of V (or something else likewise
1457  // uninteresting for instruction selection) through the SmallVector.
1458  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1459  unsigned NumBits) {
1460  auto &ValueEntry = Memoizer[V];
1461  if (ValueEntry)
1462  return std::make_pair(ValueEntry->first, &ValueEntry->second);
1463  ValueEntry.reset(new ValueBitsMemoizedValue());
1464  bool &Interesting = ValueEntry->first;
1465  SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1466  Bits.resize(NumBits);
1467 
1468  switch (V.getOpcode()) {
1469  default: break;
1470  case ISD::ROTL:
1471  if (isa<ConstantSDNode>(V.getOperand(1))) {
1472  unsigned RotAmt = V.getConstantOperandVal(1);
1473 
1474  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1475 
1476  for (unsigned i = 0; i < NumBits; ++i)
1477  Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1478 
1479  return std::make_pair(Interesting = true, &Bits);
1480  }
1481  break;
1482  case ISD::SHL:
1483  case PPCISD::SHL:
1484  if (isa<ConstantSDNode>(V.getOperand(1))) {
1485  unsigned ShiftAmt = V.getConstantOperandVal(1);
1486 
1487  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1488 
1489  for (unsigned i = ShiftAmt; i < NumBits; ++i)
1490  Bits[i] = LHSBits[i - ShiftAmt];
1491 
1492  for (unsigned i = 0; i < ShiftAmt; ++i)
1493  Bits[i] = ValueBit(ValueBit::ConstZero);
1494 
1495  return std::make_pair(Interesting = true, &Bits);
1496  }
1497  break;
1498  case ISD::SRL:
1499  case PPCISD::SRL:
1500  if (isa<ConstantSDNode>(V.getOperand(1))) {
1501  unsigned ShiftAmt = V.getConstantOperandVal(1);
1502 
1503  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1504 
1505  for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1506  Bits[i] = LHSBits[i + ShiftAmt];
1507 
1508  for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1509  Bits[i] = ValueBit(ValueBit::ConstZero);
1510 
1511  return std::make_pair(Interesting = true, &Bits);
1512  }
1513  break;
1514  case ISD::AND:
1515  if (isa<ConstantSDNode>(V.getOperand(1))) {
1517 
1518  const SmallVector<ValueBit, 64> *LHSBits;
1519  // Mark this as interesting, only if the LHS was also interesting. This
1520  // prevents the overall procedure from matching a single immediate 'and'
1521  // (which is non-optimal because such an and might be folded with other
1522  // things if we don't select it here).
1523  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1524 
1525  for (unsigned i = 0; i < NumBits; ++i)
1526  if (((Mask >> i) & 1) == 1)
1527  Bits[i] = (*LHSBits)[i];
1528  else {
1529  // AND instruction masks this bit. If the input is already zero,
1530  // we have nothing to do here. Otherwise, make the bit ConstZero.
1531  if ((*LHSBits)[i].isZero())
1532  Bits[i] = (*LHSBits)[i];
1533  else
1534  Bits[i] = ValueBit(ValueBit::ConstZero);
1535  }
1536 
1537  return std::make_pair(Interesting, &Bits);
1538  }
1539  break;
1540  case ISD::OR: {
1541  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1542  const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1543 
1544  bool AllDisjoint = true;
1545  SDValue LastVal = SDValue();
1546  unsigned LastIdx = 0;
1547  for (unsigned i = 0; i < NumBits; ++i) {
1548  if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1549  // If both inputs are known to be zero and one is ConstZero and
1550  // another is VariableKnownToBeZero, we can select whichever
1551  // we like. To minimize the number of bit groups, we select
1552  // VariableKnownToBeZero if this bit is the next bit of the same
1553  // input variable from the previous bit. Otherwise, we select
1554  // ConstZero.
1555  if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1556  LHSBits[i].getValueBitIndex() == LastIdx + 1)
1557  Bits[i] = LHSBits[i];
1558  else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1559  RHSBits[i].getValueBitIndex() == LastIdx + 1)
1560  Bits[i] = RHSBits[i];
1561  else
1562  Bits[i] = ValueBit(ValueBit::ConstZero);
1563  }
1564  else if (LHSBits[i].isZero())
1565  Bits[i] = RHSBits[i];
1566  else if (RHSBits[i].isZero())
1567  Bits[i] = LHSBits[i];
1568  else {
1569  AllDisjoint = false;
1570  break;
1571  }
1572  // We remember the value and bit index of this bit.
1573  if (Bits[i].hasValue()) {
1574  LastVal = Bits[i].getValue();
1575  LastIdx = Bits[i].getValueBitIndex();
1576  }
1577  else {
1578  if (LastVal) LastVal = SDValue();
1579  LastIdx = 0;
1580  }
1581  }
1582 
1583  if (!AllDisjoint)
1584  break;
1585 
1586  return std::make_pair(Interesting = true, &Bits);
1587  }
1588  case ISD::ZERO_EXTEND: {
1589  // We support only the case with zero extension from i32 to i64 so far.
1590  if (V.getValueType() != MVT::i64 ||
1591  V.getOperand(0).getValueType() != MVT::i32)
1592  break;
1593 
1594  const SmallVector<ValueBit, 64> *LHSBits;
1595  const unsigned NumOperandBits = 32;
1596  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1597  NumOperandBits);
1598 
1599  for (unsigned i = 0; i < NumOperandBits; ++i)
1600  Bits[i] = (*LHSBits)[i];
1601 
1602  for (unsigned i = NumOperandBits; i < NumBits; ++i)
1603  Bits[i] = ValueBit(ValueBit::ConstZero);
1604 
1605  return std::make_pair(Interesting, &Bits);
1606  }
1607  case ISD::TRUNCATE: {
1609  EVT ToType = V.getValueType();
1610  // We support only the case with truncate from i64 to i32.
1611  if (FromType != MVT::i64 || ToType != MVT::i32)
1612  break;
1613  const unsigned NumAllBits = FromType.getSizeInBits();
1614  SmallVector<ValueBit, 64> *InBits;
1615  std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1616  NumAllBits);
1617  const unsigned NumValidBits = ToType.getSizeInBits();
1618 
1619  // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1620  // So, we cannot include this truncate.
1621  bool UseUpper32bit = false;
1622  for (unsigned i = 0; i < NumValidBits; ++i)
1623  if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1624  UseUpper32bit = true;
1625  break;
1626  }
1627  if (UseUpper32bit)
1628  break;
1629 
1630  for (unsigned i = 0; i < NumValidBits; ++i)
1631  Bits[i] = (*InBits)[i];
1632 
1633  return std::make_pair(Interesting, &Bits);
1634  }
1635  case ISD::AssertZext: {
1636  // For AssertZext, we look through the operand and
1637  // mark the bits known to be zero.
1638  const SmallVector<ValueBit, 64> *LHSBits;
1639  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1640  NumBits);
1641 
1642  EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1643  const unsigned NumValidBits = FromType.getSizeInBits();
1644  for (unsigned i = 0; i < NumValidBits; ++i)
1645  Bits[i] = (*LHSBits)[i];
1646 
1647  // These bits are known to be zero but the AssertZext may be from a value
1648  // that already has some constant zero bits (i.e. from a masking and).
1649  for (unsigned i = NumValidBits; i < NumBits; ++i)
1650  Bits[i] = (*LHSBits)[i].hasValue()
1651  ? ValueBit((*LHSBits)[i].getValue(),
1652  (*LHSBits)[i].getValueBitIndex(),
1653  ValueBit::VariableKnownToBeZero)
1654  : ValueBit(ValueBit::ConstZero);
1655 
1656  return std::make_pair(Interesting, &Bits);
1657  }
1658  case ISD::LOAD:
1659  LoadSDNode *LD = cast<LoadSDNode>(V);
1660  if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1661  EVT VT = LD->getMemoryVT();
1662  const unsigned NumValidBits = VT.getSizeInBits();
1663 
1664  for (unsigned i = 0; i < NumValidBits; ++i)
1665  Bits[i] = ValueBit(V, i);
1666 
1667  // These bits are known to be zero.
1668  for (unsigned i = NumValidBits; i < NumBits; ++i)
1669  Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1670 
1671  // Zero-extending load itself cannot be optimized. So, it is not
1672  // interesting by itself though it gives useful information.
1673  return std::make_pair(Interesting = false, &Bits);
1674  }
1675  break;
1676  }
1677 
1678  for (unsigned i = 0; i < NumBits; ++i)
1679  Bits[i] = ValueBit(V, i);
1680 
1681  return std::make_pair(Interesting = false, &Bits);
1682  }
1683 
1684  // For each value (except the constant ones), compute the left-rotate amount
1685  // to get it from its original to final position.
1686  void computeRotationAmounts() {
1687  NeedMask = false;
1688  RLAmt.resize(Bits.size());
1689  for (unsigned i = 0; i < Bits.size(); ++i)
1690  if (Bits[i].hasValue()) {
1691  unsigned VBI = Bits[i].getValueBitIndex();
1692  if (i >= VBI)
1693  RLAmt[i] = i - VBI;
1694  else
1695  RLAmt[i] = Bits.size() - (VBI - i);
1696  } else if (Bits[i].isZero()) {
1697  NeedMask = true;
1698  RLAmt[i] = UINT32_MAX;
1699  } else {
1700  llvm_unreachable("Unknown value bit type");
1701  }
1702  }
1703 
1704  // Collect groups of consecutive bits with the same underlying value and
1705  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1706  // they break up groups.
1707  void collectBitGroups(bool LateMask) {
1708  BitGroups.clear();
1709 
1710  unsigned LastRLAmt = RLAmt[0];
1711  SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1712  unsigned LastGroupStartIdx = 0;
1713  bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1714  for (unsigned i = 1; i < Bits.size(); ++i) {
1715  unsigned ThisRLAmt = RLAmt[i];
1716  SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1717  if (LateMask && !ThisValue) {
1718  ThisValue = LastValue;
1719  ThisRLAmt = LastRLAmt;
1720  // If we're doing late masking, then the first bit group always starts
1721  // at zero (even if the first bits were zero).
1722  if (BitGroups.empty())
1723  LastGroupStartIdx = 0;
1724  }
1725 
1726  // If this bit is known to be zero and the current group is a bit group
1727  // of zeros, we do not need to terminate the current bit group even the
1728  // Value or RLAmt does not match here. Instead, we terminate this group
1729  // when the first non-zero bit appears later.
1730  if (IsGroupOfZeros && Bits[i].isZero())
1731  continue;
1732 
1733  // If this bit has the same underlying value and the same rotate factor as
1734  // the last one, then they're part of the same group.
1735  if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1736  // We cannot continue the current group if this bits is not known to
1737  // be zero in a bit group of zeros.
1738  if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1739  continue;
1740 
1741  if (LastValue.getNode())
1742  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1743  i-1));
1744  LastRLAmt = ThisRLAmt;
1745  LastValue = ThisValue;
1746  LastGroupStartIdx = i;
1747  IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1748  }
1749  if (LastValue.getNode())
1750  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1751  Bits.size()-1));
1752 
1753  if (BitGroups.empty())
1754  return;
1755 
1756  // We might be able to combine the first and last groups.
1757  if (BitGroups.size() > 1) {
1758  // If the first and last groups are the same, then remove the first group
1759  // in favor of the last group, making the ending index of the last group
1760  // equal to the ending index of the to-be-removed first group.
1761  if (BitGroups[0].StartIdx == 0 &&
1762  BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1763  BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1764  BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1765  LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1766  BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1767  BitGroups.erase(BitGroups.begin());
1768  }
1769  }
1770  }
1771 
1772  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1773  // associated with each. If the number of groups are same, we prefer a group
1774  // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1775  // instruction. If there is a degeneracy, pick the one that occurs
1776  // first (in the final value).
1777  void collectValueRotInfo() {
1778  ValueRots.clear();
1779 
1780  for (auto &BG : BitGroups) {
1781  unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1782  ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1783  VRI.V = BG.V;
1784  VRI.RLAmt = BG.RLAmt;
1785  VRI.Repl32 = BG.Repl32;
1786  VRI.NumGroups += 1;
1787  VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1788  }
1789 
1790  // Now that we've collected the various ValueRotInfo instances, we need to
1791  // sort them.
1792  ValueRotsVec.clear();
1793  for (auto &I : ValueRots) {
1794  ValueRotsVec.push_back(I.second);
1795  }
1796  llvm::sort(ValueRotsVec);
1797  }
1798 
1799  // In 64-bit mode, rlwinm and friends have a rotation operator that
1800  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1801  // indices of these instructions can only be in the lower 32 bits, so they
1802  // can only represent some 64-bit bit groups. However, when they can be used,
1803  // the 32-bit replication can be used to represent, as a single bit group,
1804  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1805  // groups when possible. Returns true if any of the bit groups were
1806  // converted.
1807  void assignRepl32BitGroups() {
1808  // If we have bits like this:
1809  //
1810  // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1811  // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1812  // Groups: | RLAmt = 8 | RLAmt = 40 |
1813  //
1814  // But, making use of a 32-bit operation that replicates the low-order 32
1815  // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1816  // of 8.
1817 
1818  auto IsAllLow32 = [this](BitGroup & BG) {
1819  if (BG.StartIdx <= BG.EndIdx) {
1820  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1821  if (!Bits[i].hasValue())
1822  continue;
1823  if (Bits[i].getValueBitIndex() >= 32)
1824  return false;
1825  }
1826  } else {
1827  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1828  if (!Bits[i].hasValue())
1829  continue;
1830  if (Bits[i].getValueBitIndex() >= 32)
1831  return false;
1832  }
1833  for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1834  if (!Bits[i].hasValue())
1835  continue;
1836  if (Bits[i].getValueBitIndex() >= 32)
1837  return false;
1838  }
1839  }
1840 
1841  return true;
1842  };
1843 
1844  for (auto &BG : BitGroups) {
1845  // If this bit group has RLAmt of 0 and will not be merged with
1846  // another bit group, we don't benefit from Repl32. We don't mark
1847  // such group to give more freedom for later instruction selection.
1848  if (BG.RLAmt == 0) {
1849  auto PotentiallyMerged = [this](BitGroup & BG) {
1850  for (auto &BG2 : BitGroups)
1851  if (&BG != &BG2 && BG.V == BG2.V &&
1852  (BG2.RLAmt == 0 || BG2.RLAmt == 32))
1853  return true;
1854  return false;
1855  };
1856  if (!PotentiallyMerged(BG))
1857  continue;
1858  }
1859  if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1860  if (IsAllLow32(BG)) {
1861  if (BG.RLAmt >= 32) {
1862  BG.RLAmt -= 32;
1863  BG.Repl32CR = true;
1864  }
1865 
1866  BG.Repl32 = true;
1867 
1868  LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1869  << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
1870  << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1871  }
1872  }
1873  }
1874 
1875  // Now walk through the bit groups, consolidating where possible.
1876  for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1877  // We might want to remove this bit group by merging it with the previous
1878  // group (which might be the ending group).
1879  auto IP = (I == BitGroups.begin()) ?
1880  std::prev(BitGroups.end()) : std::prev(I);
1881  if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1882  I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1883 
1884  LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1885  << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
1886  << I->StartIdx << ", " << I->EndIdx
1887  << "] with group with range [" << IP->StartIdx << ", "
1888  << IP->EndIdx << "]\n");
1889 
1890  IP->EndIdx = I->EndIdx;
1891  IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1892  IP->Repl32Coalesced = true;
1893  I = BitGroups.erase(I);
1894  continue;
1895  } else {
1896  // There is a special case worth handling: If there is a single group
1897  // covering the entire upper 32 bits, and it can be merged with both
1898  // the next and previous groups (which might be the same group), then
1899  // do so. If it is the same group (so there will be only one group in
1900  // total), then we need to reverse the order of the range so that it
1901  // covers the entire 64 bits.
1902  if (I->StartIdx == 32 && I->EndIdx == 63) {
1903  assert(std::next(I) == BitGroups.end() &&
1904  "bit group ends at index 63 but there is another?");
1905  auto IN = BitGroups.begin();
1906 
1907  if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1908  (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1909  IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1910  IsAllLow32(*I)) {
1911 
1912  LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
1913  << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
1914  << ", " << I->EndIdx
1915  << "] with 32-bit replicated groups with ranges ["
1916  << IP->StartIdx << ", " << IP->EndIdx << "] and ["
1917  << IN->StartIdx << ", " << IN->EndIdx << "]\n");
1918 
1919  if (IP == IN) {
1920  // There is only one other group; change it to cover the whole
1921  // range (backward, so that it can still be Repl32 but cover the
1922  // whole 64-bit range).
1923  IP->StartIdx = 31;
1924  IP->EndIdx = 30;
1925  IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1926  IP->Repl32Coalesced = true;
1927  I = BitGroups.erase(I);
1928  } else {
1929  // There are two separate groups, one before this group and one
1930  // after us (at the beginning). We're going to remove this group,
1931  // but also the group at the very beginning.
1932  IP->EndIdx = IN->EndIdx;
1933  IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1934  IP->Repl32Coalesced = true;
1935  I = BitGroups.erase(I);
1936  BitGroups.erase(BitGroups.begin());
1937  }
1938 
1939  // This must be the last group in the vector (and we might have
1940  // just invalidated the iterator above), so break here.
1941  break;
1942  }
1943  }
1944  }
1945 
1946  ++I;
1947  }
1948  }
1949 
1950  SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1951  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1952  }
1953 
1954  uint64_t getZerosMask() {
1955  uint64_t Mask = 0;
1956  for (unsigned i = 0; i < Bits.size(); ++i) {
1957  if (Bits[i].hasValue())
1958  continue;
1959  Mask |= (UINT64_C(1) << i);
1960  }
1961 
1962  return ~Mask;
1963  }
1964 
1965  // This method extends an input value to 64 bit if input is 32-bit integer.
1966  // While selecting instructions in BitPermutationSelector in 64-bit mode,
1967  // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1968  // In such case, we extend it to 64 bit to be consistent with other values.
1969  SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1970  if (V.getValueSizeInBits() == 64)
1971  return V;
1972 
1973  assert(V.getValueSizeInBits() == 32);
1974  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1975  SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1976  MVT::i64), 0);
1977  SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1978  MVT::i64, ImDef, V,
1979  SubRegIdx), 0);
1980  return ExtVal;
1981  }
1982 
1983  SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
1984  if (V.getValueSizeInBits() == 32)
1985  return V;
1986 
1987  assert(V.getValueSizeInBits() == 64);
1988  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1989  SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
1990  MVT::i32, V, SubRegIdx), 0);
1991  return SubVal;
1992  }
1993 
1994  // Depending on the number of groups for a particular value, it might be
1995  // better to rotate, mask explicitly (using andi/andis), and then or the
1996  // result. Select this part of the result first.
1997  void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1999  return;
2000 
2001  for (ValueRotInfo &VRI : ValueRotsVec) {
2002  unsigned Mask = 0;
2003  for (unsigned i = 0; i < Bits.size(); ++i) {
2004  if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2005  continue;
2006  if (RLAmt[i] != VRI.RLAmt)
2007  continue;
2008  Mask |= (1u << i);
2009  }
2010 
2011  // Compute the masks for andi/andis that would be necessary.
2012  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2013  assert((ANDIMask != 0 || ANDISMask != 0) &&
2014  "No set bits in mask for value bit groups");
2015  bool NeedsRotate = VRI.RLAmt != 0;
2016 
2017  // We're trying to minimize the number of instructions. If we have one
2018  // group, using one of andi/andis can break even. If we have three
2019  // groups, we can use both andi and andis and break even (to use both
2020  // andi and andis we also need to or the results together). We need four
2021  // groups if we also need to rotate. To use andi/andis we need to do more
2022  // than break even because rotate-and-mask instructions tend to be easier
2023  // to schedule.
2024 
2025  // FIXME: We've biased here against using andi/andis, which is right for
2026  // POWER cores, but not optimal everywhere. For example, on the A2,
2027  // andi/andis have single-cycle latency whereas the rotate-and-mask
2028  // instructions take two cycles, and it would be better to bias toward
2029  // andi/andis in break-even cases.
2030 
2031  unsigned NumAndInsts = (unsigned) NeedsRotate +
2032  (unsigned) (ANDIMask != 0) +
2033  (unsigned) (ANDISMask != 0) +
2034  (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2035  (unsigned) (bool) Res;
2036 
2037  LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2038  << " RL: " << VRI.RLAmt << ":"
2039  << "\n\t\t\tisel using masking: " << NumAndInsts
2040  << " using rotates: " << VRI.NumGroups << "\n");
2041 
2042  if (NumAndInsts >= VRI.NumGroups)
2043  continue;
2044 
2045  LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2046 
2047  if (InstCnt) *InstCnt += NumAndInsts;
2048 
2049  SDValue VRot;
2050  if (VRI.RLAmt) {
2051  SDValue Ops[] =
2052  { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2053  getI32Imm(0, dl), getI32Imm(31, dl) };
2054  VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2055  Ops), 0);
2056  } else {
2057  VRot = TruncateToInt32(VRI.V, dl);
2058  }
2059 
2060  SDValue ANDIVal, ANDISVal;
2061  if (ANDIMask != 0)
2062  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2063  VRot, getI32Imm(ANDIMask, dl)),
2064  0);
2065  if (ANDISMask != 0)
2066  ANDISVal =
2067  SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2068  getI32Imm(ANDISMask, dl)),
2069  0);
2070 
2071  SDValue TotalVal;
2072  if (!ANDIVal)
2073  TotalVal = ANDISVal;
2074  else if (!ANDISVal)
2075  TotalVal = ANDIVal;
2076  else
2077  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2078  ANDIVal, ANDISVal), 0);
2079 
2080  if (!Res)
2081  Res = TotalVal;
2082  else
2083  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2084  Res, TotalVal), 0);
2085 
2086  // Now, remove all groups with this underlying value and rotation
2087  // factor.
2088  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2089  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2090  });
2091  }
2092  }
2093 
2094  // Instruction selection for the 32-bit case.
2095  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2096  SDLoc dl(N);
2097  SDValue Res;
2098 
2099  if (InstCnt) *InstCnt = 0;
2100 
2101  // Take care of cases that should use andi/andis first.
2102  SelectAndParts32(dl, Res, InstCnt);
2103 
2104  // If we've not yet selected a 'starting' instruction, and we have no zeros
2105  // to fill in, select the (Value, RLAmt) with the highest priority (largest
2106  // number of groups), and start with this rotated value.
2107  if ((!NeedMask || LateMask) && !Res) {
2108  ValueRotInfo &VRI = ValueRotsVec[0];
2109  if (VRI.RLAmt) {
2110  if (InstCnt) *InstCnt += 1;
2111  SDValue Ops[] =
2112  { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2113  getI32Imm(0, dl), getI32Imm(31, dl) };
2114  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2115  0);
2116  } else {
2117  Res = TruncateToInt32(VRI.V, dl);
2118  }
2119 
2120  // Now, remove all groups with this underlying value and rotation factor.
2121  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2122  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2123  });
2124  }
2125 
2126  if (InstCnt) *InstCnt += BitGroups.size();
2127 
2128  // Insert the other groups (one at a time).
2129  for (auto &BG : BitGroups) {
2130  if (!Res) {
2131  SDValue Ops[] =
2132  { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2133  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2134  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2135  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2136  } else {
2137  SDValue Ops[] =
2138  { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2139  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2140  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2141  Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2142  }
2143  }
2144 
2145  if (LateMask) {
2146  unsigned Mask = (unsigned) getZerosMask();
2147 
2148  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2149  assert((ANDIMask != 0 || ANDISMask != 0) &&
2150  "No set bits in zeros mask?");
2151 
2152  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2153  (unsigned) (ANDISMask != 0) +
2154  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2155 
2156  SDValue ANDIVal, ANDISVal;
2157  if (ANDIMask != 0)
2158  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2159  Res, getI32Imm(ANDIMask, dl)),
2160  0);
2161  if (ANDISMask != 0)
2162  ANDISVal =
2163  SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2164  getI32Imm(ANDISMask, dl)),
2165  0);
2166 
2167  if (!ANDIVal)
2168  Res = ANDISVal;
2169  else if (!ANDISVal)
2170  Res = ANDIVal;
2171  else
2172  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2173  ANDIVal, ANDISVal), 0);
2174  }
2175 
2176  return Res.getNode();
2177  }
2178 
2179  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2180  unsigned MaskStart, unsigned MaskEnd,
2181  bool IsIns) {
2182  // In the notation used by the instructions, 'start' and 'end' are reversed
2183  // because bits are counted from high to low order.
2184  unsigned InstMaskStart = 64 - MaskEnd - 1,
2185  InstMaskEnd = 64 - MaskStart - 1;
2186 
2187  if (Repl32)
2188  return 1;
2189 
2190  if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2191  InstMaskEnd == 63 - RLAmt)
2192  return 1;
2193 
2194  return 2;
2195  }
2196 
2197  // For 64-bit values, not all combinations of rotates and masks are
2198  // available. Produce one if it is available.
2199  SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2200  bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2201  unsigned *InstCnt = nullptr) {
2202  // In the notation used by the instructions, 'start' and 'end' are reversed
2203  // because bits are counted from high to low order.
2204  unsigned InstMaskStart = 64 - MaskEnd - 1,
2205  InstMaskEnd = 64 - MaskStart - 1;
2206 
2207  if (InstCnt) *InstCnt += 1;
2208 
2209  if (Repl32) {
2210  // This rotation amount assumes that the lower 32 bits of the quantity
2211  // are replicated in the high 32 bits by the rotation operator (which is
2212  // done by rlwinm and friends).
2213  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2214  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2215  SDValue Ops[] =
2216  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2217  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2218  return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2219  Ops), 0);
2220  }
2221 
2222  if (InstMaskEnd == 63) {
2223  SDValue Ops[] =
2224  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2225  getI32Imm(InstMaskStart, dl) };
2226  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2227  }
2228 
2229  if (InstMaskStart == 0) {
2230  SDValue Ops[] =
2231  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2232  getI32Imm(InstMaskEnd, dl) };
2233  return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2234  }
2235 
2236  if (InstMaskEnd == 63 - RLAmt) {
2237  SDValue Ops[] =
2238  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2239  getI32Imm(InstMaskStart, dl) };
2240  return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2241  }
2242 
2243  // We cannot do this with a single instruction, so we'll use two. The
2244  // problem is that we're not free to choose both a rotation amount and mask
2245  // start and end independently. We can choose an arbitrary mask start and
2246  // end, but then the rotation amount is fixed. Rotation, however, can be
2247  // inverted, and so by applying an "inverse" rotation first, we can get the
2248  // desired result.
2249  if (InstCnt) *InstCnt += 1;
2250 
2251  // The rotation mask for the second instruction must be MaskStart.
2252  unsigned RLAmt2 = MaskStart;
2253  // The first instruction must rotate V so that the overall rotation amount
2254  // is RLAmt.
2255  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2256  if (RLAmt1)
2257  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2258  return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2259  }
2260 
2261  // For 64-bit values, not all combinations of rotates and masks are
2262  // available. Produce a rotate-mask-and-insert if one is available.
2263  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2264  unsigned RLAmt, bool Repl32, unsigned MaskStart,
2265  unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2266  // In the notation used by the instructions, 'start' and 'end' are reversed
2267  // because bits are counted from high to low order.
2268  unsigned InstMaskStart = 64 - MaskEnd - 1,
2269  InstMaskEnd = 64 - MaskStart - 1;
2270 
2271  if (InstCnt) *InstCnt += 1;
2272 
2273  if (Repl32) {
2274  // This rotation amount assumes that the lower 32 bits of the quantity
2275  // are replicated in the high 32 bits by the rotation operator (which is
2276  // done by rlwinm and friends).
2277  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2278  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2279  SDValue Ops[] =
2280  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2281  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2282  return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2283  Ops), 0);
2284  }
2285 
2286  if (InstMaskEnd == 63 - RLAmt) {
2287  SDValue Ops[] =
2288  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2289  getI32Imm(InstMaskStart, dl) };
2290  return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2291  }
2292 
2293  // We cannot do this with a single instruction, so we'll use two. The
2294  // problem is that we're not free to choose both a rotation amount and mask
2295  // start and end independently. We can choose an arbitrary mask start and
2296  // end, but then the rotation amount is fixed. Rotation, however, can be
2297  // inverted, and so by applying an "inverse" rotation first, we can get the
2298  // desired result.
2299  if (InstCnt) *InstCnt += 1;
2300 
2301  // The rotation mask for the second instruction must be MaskStart.
2302  unsigned RLAmt2 = MaskStart;
2303  // The first instruction must rotate V so that the overall rotation amount
2304  // is RLAmt.
2305  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2306  if (RLAmt1)
2307  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2308  return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2309  }
2310 
2311  void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2313  return;
2314 
2315  // The idea here is the same as in the 32-bit version, but with additional
2316  // complications from the fact that Repl32 might be true. Because we
2317  // aggressively convert bit groups to Repl32 form (which, for small
2318  // rotation factors, involves no other change), and then coalesce, it might
2319  // be the case that a single 64-bit masking operation could handle both
2320  // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2321  // form allowed coalescing, then we must use a 32-bit rotaton in order to
2322  // completely capture the new combined bit group.
2323 
2324  for (ValueRotInfo &VRI : ValueRotsVec) {
2325  uint64_t Mask = 0;
2326 
2327  // We need to add to the mask all bits from the associated bit groups.
2328  // If Repl32 is false, we need to add bits from bit groups that have
2329  // Repl32 true, but are trivially convertable to Repl32 false. Such a
2330  // group is trivially convertable if it overlaps only with the lower 32
2331  // bits, and the group has not been coalesced.
2332  auto MatchingBG = [VRI](const BitGroup &BG) {
2333  if (VRI.V != BG.V)
2334  return false;
2335 
2336  unsigned EffRLAmt = BG.RLAmt;
2337  if (!VRI.Repl32 && BG.Repl32) {
2338  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2339  !BG.Repl32Coalesced) {
2340  if (BG.Repl32CR)
2341  EffRLAmt += 32;
2342  } else {
2343  return false;
2344  }
2345  } else if (VRI.Repl32 != BG.Repl32) {
2346  return false;
2347  }
2348 
2349  return VRI.RLAmt == EffRLAmt;
2350  };
2351 
2352  for (auto &BG : BitGroups) {
2353  if (!MatchingBG(BG))
2354  continue;
2355 
2356  if (BG.StartIdx <= BG.EndIdx) {
2357  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2358  Mask |= (UINT64_C(1) << i);
2359  } else {
2360  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2361  Mask |= (UINT64_C(1) << i);
2362  for (unsigned i = 0; i <= BG.EndIdx; ++i)
2363  Mask |= (UINT64_C(1) << i);
2364  }
2365  }
2366 
2367  // We can use the 32-bit andi/andis technique if the mask does not
2368  // require any higher-order bits. This can save an instruction compared
2369  // to always using the general 64-bit technique.
2370  bool Use32BitInsts = isUInt<32>(Mask);
2371  // Compute the masks for andi/andis that would be necessary.
2372  unsigned ANDIMask = (Mask & UINT16_MAX),
2373  ANDISMask = (Mask >> 16) & UINT16_MAX;
2374 
2375  bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2376 
2377  unsigned NumAndInsts = (unsigned) NeedsRotate +
2378  (unsigned) (bool) Res;
2379  unsigned NumOfSelectInsts = 0;
2380  selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2381  assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2382  if (Use32BitInsts)
2383  NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2384  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2385  else
2386  NumAndInsts += NumOfSelectInsts + /* and */ 1;
2387 
2388  unsigned NumRLInsts = 0;
2389  bool FirstBG = true;
2390  bool MoreBG = false;
2391  for (auto &BG : BitGroups) {
2392  if (!MatchingBG(BG)) {
2393  MoreBG = true;
2394  continue;
2395  }
2396  NumRLInsts +=
2397  SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2398  !FirstBG);
2399  FirstBG = false;
2400  }
2401 
2402  LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2403  << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2404  << "\n\t\t\tisel using masking: " << NumAndInsts
2405  << " using rotates: " << NumRLInsts << "\n");
2406 
2407  // When we'd use andi/andis, we bias toward using the rotates (andi only
2408  // has a record form, and is cracked on POWER cores). However, when using
2409  // general 64-bit constant formation, bias toward the constant form,
2410  // because that exposes more opportunities for CSE.
2411  if (NumAndInsts > NumRLInsts)
2412  continue;
2413  // When merging multiple bit groups, instruction or is used.
2414  // But when rotate is used, rldimi can inert the rotated value into any
2415  // register, so instruction or can be avoided.
2416  if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2417  continue;
2418 
2419  LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2420 
2421  if (InstCnt) *InstCnt += NumAndInsts;
2422 
2423  SDValue VRot;
2424  // We actually need to generate a rotation if we have a non-zero rotation
2425  // factor or, in the Repl32 case, if we care about any of the
2426  // higher-order replicated bits. In the latter case, we generate a mask
2427  // backward so that it actually includes the entire 64 bits.
2428  if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2429  VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2430  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2431  else
2432  VRot = VRI.V;
2433 
2434  SDValue TotalVal;
2435  if (Use32BitInsts) {
2436  assert((ANDIMask != 0 || ANDISMask != 0) &&
2437  "No set bits in mask when using 32-bit ands for 64-bit value");
2438 
2439  SDValue ANDIVal, ANDISVal;
2440  if (ANDIMask != 0)
2441  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2442  ExtendToInt64(VRot, dl),
2443  getI32Imm(ANDIMask, dl)),
2444  0);
2445  if (ANDISMask != 0)
2446  ANDISVal =
2447  SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2448  ExtendToInt64(VRot, dl),
2449  getI32Imm(ANDISMask, dl)),
2450  0);
2451 
2452  if (!ANDIVal)
2453  TotalVal = ANDISVal;
2454  else if (!ANDISVal)
2455  TotalVal = ANDIVal;
2456  else
2457  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2458  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2459  } else {
2460  TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2461  TotalVal =
2462  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2463  ExtendToInt64(VRot, dl), TotalVal),
2464  0);
2465  }
2466 
2467  if (!Res)
2468  Res = TotalVal;
2469  else
2470  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2471  ExtendToInt64(Res, dl), TotalVal),
2472  0);
2473 
2474  // Now, remove all groups with this underlying value and rotation
2475  // factor.
2476  eraseMatchingBitGroups(MatchingBG);
2477  }
2478  }
2479 
2480  // Instruction selection for the 64-bit case.
2481  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2482  SDLoc dl(N);
2483  SDValue Res;
2484 
2485  if (InstCnt) *InstCnt = 0;
2486 
2487  // Take care of cases that should use andi/andis first.
2488  SelectAndParts64(dl, Res, InstCnt);
2489 
2490  // If we've not yet selected a 'starting' instruction, and we have no zeros
2491  // to fill in, select the (Value, RLAmt) with the highest priority (largest
2492  // number of groups), and start with this rotated value.
2493  if ((!NeedMask || LateMask) && !Res) {
2494  // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2495  // groups will come first, and so the VRI representing the largest number
2496  // of groups might not be first (it might be the first Repl32 groups).
2497  unsigned MaxGroupsIdx = 0;
2498  if (!ValueRotsVec[0].Repl32) {
2499  for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2500  if (ValueRotsVec[i].Repl32) {
2501  if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2502  MaxGroupsIdx = i;
2503  break;
2504  }
2505  }
2506 
2507  ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2508  bool NeedsRotate = false;
2509  if (VRI.RLAmt) {
2510  NeedsRotate = true;
2511  } else if (VRI.Repl32) {
2512  for (auto &BG : BitGroups) {
2513  if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2514  BG.Repl32 != VRI.Repl32)
2515  continue;
2516 
2517  // We don't need a rotate if the bit group is confined to the lower
2518  // 32 bits.
2519  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2520  continue;
2521 
2522  NeedsRotate = true;
2523  break;
2524  }
2525  }
2526 
2527  if (NeedsRotate)
2528  Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2529  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2530  InstCnt);
2531  else
2532  Res = VRI.V;
2533 
2534  // Now, remove all groups with this underlying value and rotation factor.
2535  if (Res)
2536  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2537  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2538  BG.Repl32 == VRI.Repl32;
2539  });
2540  }
2541 
2542  // Because 64-bit rotates are more flexible than inserts, we might have a
2543  // preference regarding which one we do first (to save one instruction).
2544  if (!Res)
2545  for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2546  if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2547  false) <
2548  SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2549  true)) {
2550  if (I != BitGroups.begin()) {
2551  BitGroup BG = *I;
2552  BitGroups.erase(I);
2553  BitGroups.insert(BitGroups.begin(), BG);
2554  }
2555 
2556  break;
2557  }
2558  }
2559 
2560  // Insert the other groups (one at a time).
2561  for (auto &BG : BitGroups) {
2562  if (!Res)
2563  Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2564  BG.EndIdx, InstCnt);
2565  else
2566  Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2567  BG.StartIdx, BG.EndIdx, InstCnt);
2568  }
2569 
2570  if (LateMask) {
2571  uint64_t Mask = getZerosMask();
2572 
2573  // We can use the 32-bit andi/andis technique if the mask does not
2574  // require any higher-order bits. This can save an instruction compared
2575  // to always using the general 64-bit technique.
2576  bool Use32BitInsts = isUInt<32>(Mask);
2577  // Compute the masks for andi/andis that would be necessary.
2578  unsigned ANDIMask = (Mask & UINT16_MAX),
2579  ANDISMask = (Mask >> 16) & UINT16_MAX;
2580 
2581  if (Use32BitInsts) {
2582  assert((ANDIMask != 0 || ANDISMask != 0) &&
2583  "No set bits in mask when using 32-bit ands for 64-bit value");
2584 
2585  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2586  (unsigned) (ANDISMask != 0) +
2587  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2588 
2589  SDValue ANDIVal, ANDISVal;
2590  if (ANDIMask != 0)
2591  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2592  ExtendToInt64(Res, dl),
2593  getI32Imm(ANDIMask, dl)),
2594  0);
2595  if (ANDISMask != 0)
2596  ANDISVal =
2597  SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2598  ExtendToInt64(Res, dl),
2599  getI32Imm(ANDISMask, dl)),
2600  0);
2601 
2602  if (!ANDIVal)
2603  Res = ANDISVal;
2604  else if (!ANDISVal)
2605  Res = ANDIVal;
2606  else
2607  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2608  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2609  } else {
2610  unsigned NumOfSelectInsts = 0;
2611  SDValue MaskVal =
2612  SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2613  Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2614  ExtendToInt64(Res, dl), MaskVal),
2615  0);
2616  if (InstCnt)
2617  *InstCnt += NumOfSelectInsts + /* and */ 1;
2618  }
2619  }
2620 
2621  return Res.getNode();
2622  }
2623 
2624  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2625  // Fill in BitGroups.
2626  collectBitGroups(LateMask);
2627  if (BitGroups.empty())
2628  return nullptr;
2629 
2630  // For 64-bit values, figure out when we can use 32-bit instructions.
2631  if (Bits.size() == 64)
2632  assignRepl32BitGroups();
2633 
2634  // Fill in ValueRotsVec.
2635  collectValueRotInfo();
2636 
2637  if (Bits.size() == 32) {
2638  return Select32(N, LateMask, InstCnt);
2639  } else {
2640  assert(Bits.size() == 64 && "Not 64 bits here?");
2641  return Select64(N, LateMask, InstCnt);
2642  }
2643 
2644  return nullptr;
2645  }
2646 
2647  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2648  erase_if(BitGroups, F);
2649  }
2650 
2652 
2653  bool NeedMask = false;
2655 
2656  SmallVector<BitGroup, 16> BitGroups;
2657 
2658  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2659  SmallVector<ValueRotInfo, 16> ValueRotsVec;
2660 
2661  SelectionDAG *CurDAG = nullptr;
2662 
2663 public:
2664  BitPermutationSelector(SelectionDAG *DAG)
2665  : CurDAG(DAG) {}
2666 
2667  // Here we try to match complex bit permutations into a set of
2668  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2669  // known to produce optimal code for common cases (like i32 byte swapping).
2670  SDNode *Select(SDNode *N) {
2671  Memoizer.clear();
2672  auto Result =
2673  getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2674  if (!Result.first)
2675  return nullptr;
2676  Bits = std::move(*Result.second);
2677 
2678  LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2679  " selection for: ");
2680  LLVM_DEBUG(N->dump(CurDAG));
2681 
2682  // Fill it RLAmt and set NeedMask.
2683  computeRotationAmounts();
2684 
2685  if (!NeedMask)
2686  return Select(N, false);
2687 
2688  // We currently have two techniques for handling results with zeros: early
2689  // masking (the default) and late masking. Late masking is sometimes more
2690  // efficient, but because the structure of the bit groups is different, it
2691  // is hard to tell without generating both and comparing the results. With
2692  // late masking, we ignore zeros in the resulting value when inserting each
2693  // set of bit groups, and then mask in the zeros at the end. With early
2694  // masking, we only insert the non-zero parts of the result at every step.
2695 
2696  unsigned InstCnt = 0, InstCntLateMask = 0;
2697  LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2698  SDNode *RN = Select(N, false, &InstCnt);
2699  LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2700 
2701  LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2702  SDNode *RNLM = Select(N, true, &InstCntLateMask);
2703  LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2704  << " instructions\n");
2705 
2706  if (InstCnt <= InstCntLateMask) {
2707  LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2708  return RN;
2709  }
2710 
2711  LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2712  return RNLM;
2713  }
2714 };
2715 
2716 class IntegerCompareEliminator {
2717  SelectionDAG *CurDAG;
2718  PPCDAGToDAGISel *S;
2719  // Conversion type for interpreting results of a 32-bit instruction as
2720  // a 64-bit value or vice versa.
2721  enum ExtOrTruncConversion { Ext, Trunc };
2722 
2723  // Modifiers to guide how an ISD::SETCC node's result is to be computed
2724  // in a GPR.
2725  // ZExtOrig - use the original condition code, zero-extend value
2726  // ZExtInvert - invert the condition code, zero-extend value
2727  // SExtOrig - use the original condition code, sign-extend value
2728  // SExtInvert - invert the condition code, sign-extend value
2729  enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2730 
2731  // Comparisons against zero to emit GPR code sequences for. Each of these
2732  // sequences may need to be emitted for two or more equivalent patterns.
2733  // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2734  // matters as well as the extension type: sext (-1/0), zext (1/0).
2735  // GEZExt - (zext (LHS >= 0))
2736  // GESExt - (sext (LHS >= 0))
2737  // LEZExt - (zext (LHS <= 0))
2738  // LESExt - (sext (LHS <= 0))
2739  enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2740 
2741  SDNode *tryEXTEND(SDNode *N);
2742  SDNode *tryLogicOpOfCompares(SDNode *N);
2743  SDValue computeLogicOpInGPR(SDValue LogicOp);
2744  SDValue signExtendInputIfNeeded(SDValue Input);
2745  SDValue zeroExtendInputIfNeeded(SDValue Input);
2746  SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2747  SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2748  ZeroCompare CmpTy);
2749  SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2750  int64_t RHSValue, SDLoc dl);
2751  SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2752  int64_t RHSValue, SDLoc dl);
2753  SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2754  int64_t RHSValue, SDLoc dl);
2755  SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2756  int64_t RHSValue, SDLoc dl);
2757  SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2758 
2759 public:
2760  IntegerCompareEliminator(SelectionDAG *DAG,
2761  PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2762  assert(CurDAG->getTargetLoweringInfo()
2763  .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2764  "Only expecting to use this on 64 bit targets.");
2765  }
2766  SDNode *Select(SDNode *N) {
2767  if (CmpInGPR == ICGPR_None)
2768  return nullptr;
2769  switch (N->getOpcode()) {
2770  default: break;
2771  case ISD::ZERO_EXTEND:
2772  if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2774  return nullptr;
2776  case ISD::SIGN_EXTEND:
2777  if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2779  return nullptr;
2780  return tryEXTEND(N);
2781  case ISD::AND:
2782  case ISD::OR:
2783  case ISD::XOR:
2784  return tryLogicOpOfCompares(N);
2785  }
2786  return nullptr;
2787  }
2788 };
2789 
2790 static bool isLogicOp(unsigned Opc) {
2791  return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2792 }
2793 // The obvious case for wanting to keep the value in a GPR. Namely, the
2794 // result of the comparison is actually needed in a GPR.
2795 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2796  assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2797  N->getOpcode() == ISD::SIGN_EXTEND) &&
2798  "Expecting a zero/sign extend node!");
2799  SDValue WideRes;
2800  // If we are zero-extending the result of a logical operation on i1
2801  // values, we can keep the values in GPRs.
2802  if (isLogicOp(N->getOperand(0).getOpcode()) &&
2803  N->getOperand(0).getValueType() == MVT::i1 &&
2804  N->getOpcode() == ISD::ZERO_EXTEND)
2805  WideRes = computeLogicOpInGPR(N->getOperand(0));
2806  else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2807  return nullptr;
2808  else
2809  WideRes =
2810  getSETCCInGPR(N->getOperand(0),
2811  N->getOpcode() == ISD::SIGN_EXTEND ?
2812  SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2813 
2814  if (!WideRes)
2815  return nullptr;
2816 
2817  SDLoc dl(N);
2818  bool Input32Bit = WideRes.getValueType() == MVT::i32;
2819  bool Output32Bit = N->getValueType(0) == MVT::i32;
2820 
2821  NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2822  NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2823 
2824  SDValue ConvOp = WideRes;
2825  if (Input32Bit != Output32Bit)
2826  ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2827  ExtOrTruncConversion::Trunc);
2828  return ConvOp.getNode();
2829 }
2830 
2831 // Attempt to perform logical operations on the results of comparisons while
2832 // keeping the values in GPRs. Without doing so, these would end up being
2833 // lowered to CR-logical operations which suffer from significant latency and
2834 // low ILP.
2835 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2836  if (N->getValueType(0) != MVT::i1)
2837  return nullptr;
2838  assert(isLogicOp(N->getOpcode()) &&
2839  "Expected a logic operation on setcc results.");
2840  SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2841  if (!LoweredLogical)
2842  return nullptr;
2843 
2844  SDLoc dl(N);
2845  bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2846  unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2847  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2848  SDValue LHS = LoweredLogical.getOperand(0);
2849  SDValue RHS = LoweredLogical.getOperand(1);
2850  SDValue WideOp;
2851  SDValue OpToConvToRecForm;
2852 
2853  // Look through any 32-bit to 64-bit implicit extend nodes to find the
2854  // opcode that is input to the XORI.
2855  if (IsBitwiseNegate &&
2856  LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2857  OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2858  else if (IsBitwiseNegate)
2859  // If the input to the XORI isn't an extension, that's what we're after.
2860  OpToConvToRecForm = LoweredLogical.getOperand(0);
2861  else
2862  // If this is not an XORI, it is a reg-reg logical op and we can convert
2863  // it to record-form.
2864  OpToConvToRecForm = LoweredLogical;
2865 
2866  // Get the record-form version of the node we're looking to use to get the
2867  // CR result from.
2868  uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2869  int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2870 
2871  // Convert the right node to record-form. This is either the logical we're
2872  // looking at or it is the input node to the negation (if we're looking at
2873  // a bitwise negation).
2874  if (NewOpc != -1 && IsBitwiseNegate) {
2875  // The input to the XORI has a record-form. Use it.
2876  assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2877  "Expected a PPC::XORI8 only for bitwise negation.");
2878  // Emit the record-form instruction.
2879  std::vector<SDValue> Ops;
2880  for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2881  Ops.push_back(OpToConvToRecForm.getOperand(i));
2882 
2883  WideOp =
2884  SDValue(CurDAG->getMachineNode(NewOpc, dl,
2885  OpToConvToRecForm.getValueType(),
2886  MVT::Glue, Ops), 0);
2887  } else {
2888  assert((NewOpc != -1 || !IsBitwiseNegate) &&
2889  "No record form available for AND8/OR8/XOR8?");
2890  WideOp =
2891  SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
2892  dl, MVT::i64, MVT::Glue, LHS, RHS),
2893  0);
2894  }
2895 
2896  // Select this node to a single bit from CR0 set by the record-form node
2897  // just created. For bitwise negation, use the EQ bit which is the equivalent
2898  // of negating the result (i.e. it is a bit set when the result of the
2899  // operation is zero).
2900  SDValue SRIdxVal =
2901  CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2902  SDValue CRBit =
2903  SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2904  MVT::i1, CR0Reg, SRIdxVal,
2905  WideOp.getValue(1)), 0);
2906  return CRBit.getNode();
2907 }
2908 
2909 // Lower a logical operation on i1 values into a GPR sequence if possible.
2910 // The result can be kept in a GPR if requested.
2911 // Three types of inputs can be handled:
2912 // - SETCC
2913 // - TRUNCATE
2914 // - Logical operation (AND/OR/XOR)
2915 // There is also a special case that is handled (namely a complement operation
2916 // achieved with xor %a, -1).
2917 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2918  assert(isLogicOp(LogicOp.getOpcode()) &&
2919  "Can only handle logic operations here.");
2920  assert(LogicOp.getValueType() == MVT::i1 &&
2921  "Can only handle logic operations on i1 values here.");
2922  SDLoc dl(LogicOp);
2923  SDValue LHS, RHS;
2924 
2925  // Special case: xor %a, -1
2926  bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2927 
2928  // Produces a GPR sequence for each operand of the binary logic operation.
2929  // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2930  // the value in a GPR and for logic operations, it will recursively produce
2931  // a GPR sequence for the operation.
2932  auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2933  unsigned OperandOpcode = Operand.getOpcode();
2934  if (OperandOpcode == ISD::SETCC)
2935  return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2936  else if (OperandOpcode == ISD::TRUNCATE) {
2937  SDValue InputOp = Operand.getOperand(0);
2938  EVT InVT = InputOp.getValueType();
2939  return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2940  PPC::RLDICL, dl, InVT, InputOp,
2941  S->getI64Imm(0, dl),
2942  S->getI64Imm(63, dl)), 0);
2943  } else if (isLogicOp(OperandOpcode))
2944  return computeLogicOpInGPR(Operand);
2945  return SDValue();
2946  };
2947  LHS = getLogicOperand(LogicOp.getOperand(0));
2948  RHS = getLogicOperand(LogicOp.getOperand(1));
2949 
2950  // If a GPR sequence can't be produced for the LHS we can't proceed.
2951  // Not producing a GPR sequence for the RHS is only a problem if this isn't
2952  // a bitwise negation operation.
2953  if (!LHS || (!RHS && !IsBitwiseNegation))
2954  return SDValue();
2955 
2956  NumLogicOpsOnComparison++;
2957 
2958  // We will use the inputs as 64-bit values.
2959  if (LHS.getValueType() == MVT::i32)
2960  LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2961  if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2962  RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2963 
2964  unsigned NewOpc;
2965  switch (LogicOp.getOpcode()) {
2966  default: llvm_unreachable("Unknown logic operation.");
2967  case ISD::AND: NewOpc = PPC::AND8; break;
2968  case ISD::OR: NewOpc = PPC::OR8; break;
2969  case ISD::XOR: NewOpc = PPC::XOR8; break;
2970  }
2971 
2972  if (IsBitwiseNegation) {
2973  RHS = S->getI64Imm(1, dl);
2974  NewOpc = PPC::XORI8;
2975  }
2976 
2977  return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2978 
2979 }
2980 
2981 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2982 /// Otherwise just reinterpret it as a 64-bit value.
2983 /// Useful when emitting comparison code for 32-bit values without using
2984 /// the compare instruction (which only considers the lower 32-bits).
2985 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2986  assert(Input.getValueType() == MVT::i32 &&
2987  "Can only sign-extend 32-bit values here.");
2988  unsigned Opc = Input.getOpcode();
2989 
2990  // The value was sign extended and then truncated to 32-bits. No need to
2991  // sign extend it again.
2992  if (Opc == ISD::TRUNCATE &&
2993  (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2994  Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2995  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2996 
2997  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2998  // The input is a sign-extending load. All ppc sign-extending loads
2999  // sign-extend to the full 64-bits.
3000  if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3001  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3002 
3003  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3004  // We don't sign-extend constants.
3005  if (InputConst)
3006  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3007 
3008  SDLoc dl(Input);
3009  SignExtensionsAdded++;
3010  return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3011  MVT::i64, Input), 0);
3012 }
3013 
3014 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3015 /// Otherwise just reinterpret it as a 64-bit value.
3016 /// Useful when emitting comparison code for 32-bit values without using
3017 /// the compare instruction (which only considers the lower 32-bits).
3018 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3019  assert(Input.getValueType() == MVT::i32 &&
3020  "Can only zero-extend 32-bit values here.");
3021  unsigned Opc = Input.getOpcode();
3022 
3023  // The only condition under which we can omit the actual extend instruction:
3024  // - The value is a positive constant
3025  // - The value comes from a load that isn't a sign-extending load
3026  // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3027  bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3028  (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3029  Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3030  if (IsTruncateOfZExt)
3031  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3032 
3033  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3034  if (InputConst && InputConst->getSExtValue() >= 0)
3035  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3036 
3037  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3038  // The input is a load that doesn't sign-extend (it will be zero-extended).
3039  if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3040  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3041 
3042  // None of the above, need to zero-extend.
3043  SDLoc dl(Input);
3044  ZeroExtensionsAdded++;
3045  return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3046  S->getI64Imm(0, dl),
3047  S->getI64Imm(32, dl)), 0);
3048 }
3049 
3050 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3051 // course not actual zero/sign extensions that will generate machine code,
3052 // they're just a way to reinterpret a 32 bit value in a register as a
3053 // 64 bit value and vice-versa.
3054 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3055  ExtOrTruncConversion Conv) {
3056  SDLoc dl(NatWidthRes);
3057 
3058  // For reinterpreting 32-bit values as 64 bit values, we generate
3059  // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3060  if (Conv == ExtOrTruncConversion::Ext) {
3061  SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3062  SDValue SubRegIdx =
3063  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3064  return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3065  ImDef, NatWidthRes, SubRegIdx), 0);
3066  }
3067 
3068  assert(Conv == ExtOrTruncConversion::Trunc &&
3069  "Unknown convertion between 32 and 64 bit values.");
3070  // For reinterpreting 64-bit values as 32-bit values, we just need to
3071  // EXTRACT_SUBREG (i.e. extract the low word).
3072  SDValue SubRegIdx =
3073  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3074  return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3075  NatWidthRes, SubRegIdx), 0);
3076 }
3077 
3078 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3079 // Handle both zero-extensions and sign-extensions.
3080 SDValue
3081 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3082  ZeroCompare CmpTy) {
3083  EVT InVT = LHS.getValueType();
3084  bool Is32Bit = InVT == MVT::i32;
3085  SDValue ToExtend;
3086 
3087  // Produce the value that needs to be either zero or sign extended.
3088  switch (CmpTy) {
3089  case ZeroCompare::GEZExt:
3090  case ZeroCompare::GESExt:
3091  ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3092  dl, InVT, LHS, LHS), 0);
3093  break;
3094  case ZeroCompare::LEZExt:
3095  case ZeroCompare::LESExt: {
3096  if (Is32Bit) {
3097  // Upper 32 bits cannot be undefined for this sequence.
3098  LHS = signExtendInputIfNeeded(LHS);
3099  SDValue Neg =
3100  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3101  ToExtend =
3102  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3103  Neg, S->getI64Imm(1, dl),
3104  S->getI64Imm(63, dl)), 0);
3105  } else {
3106  SDValue Addi =
3107  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3108  S->getI64Imm(~0ULL, dl)), 0);
3109  ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3110  Addi, LHS), 0);
3111  }
3112  break;
3113  }
3114  }
3115 
3116  // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3117  if (!Is32Bit &&
3118  (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3119  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3120  ToExtend, S->getI64Imm(1, dl),
3121  S->getI64Imm(63, dl)), 0);
3122  if (!Is32Bit &&
3123  (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3124  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3125  S->getI64Imm(63, dl)), 0);
3126 
3127  assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3128  // For 32-bit sequences, the extensions differ between GE/LE cases.
3129  switch (CmpTy) {
3130  case ZeroCompare::GEZExt: {
3131  SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3132  S->getI32Imm(31, dl) };
3133  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3134  ShiftOps), 0);
3135  }
3136  case ZeroCompare::GESExt:
3137  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3138  S->getI32Imm(31, dl)), 0);
3139  case ZeroCompare::LEZExt:
3140  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3141  S->getI32Imm(1, dl)), 0);
3142  case ZeroCompare::LESExt:
3143  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3144  S->getI32Imm(-1, dl)), 0);
3145  }
3146 
3147  // The above case covers all the enumerators so it can't have a default clause
3148  // to avoid compiler warnings.
3149  llvm_unreachable("Unknown zero-comparison type.");
3150 }
3151 
3152 /// Produces a zero-extended result of comparing two 32-bit values according to
3153 /// the passed condition code.
3154 SDValue
3155 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3156  ISD::CondCode CC,
3157  int64_t RHSValue, SDLoc dl) {
3158  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3160  return SDValue();
3161  bool IsRHSZero = RHSValue == 0;
3162  bool IsRHSOne = RHSValue == 1;
3163  bool IsRHSNegOne = RHSValue == -1LL;
3164  switch (CC) {
3165  default: return SDValue();
3166  case ISD::SETEQ: {
3167  // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3168  // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3169  SDValue Xor = IsRHSZero ? LHS :
3170  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3171  SDValue Clz =
3172  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3173  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3174  S->getI32Imm(31, dl) };
3175  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3176  ShiftOps), 0);
3177  }
3178  case ISD::SETNE: {
3179  // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3180  // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3181  SDValue Xor = IsRHSZero ? LHS :
3182  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3183  SDValue Clz =
3184  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3185  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3186  S->getI32Imm(31, dl) };
3187  SDValue Shift =
3188  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3189  return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3190  S->getI32Imm(1, dl)), 0);
3191  }
3192  case ISD::SETGE: {
3193  // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3194  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3195  if(IsRHSZero)
3196  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3197 
3198  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3199  // by swapping inputs and falling through.
3200  std::swap(LHS, RHS);
3201  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3202  IsRHSZero = RHSConst && RHSConst->isZero();
3204  }
3205  case ISD::SETLE: {
3206  if (CmpInGPR == ICGPR_NonExtIn)
3207  return SDValue();
3208  // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3209  // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3210  if(IsRHSZero) {
3211  if (CmpInGPR == ICGPR_NonExtIn)
3212  return SDValue();
3213  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3214  }
3215 
3216  // The upper 32-bits of the register can't be undefined for this sequence.
3217  LHS = signExtendInputIfNeeded(LHS);
3218  RHS = signExtendInputIfNeeded(RHS);
3219  SDValue Sub =
3220  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3221  SDValue Shift =
3222  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3223  S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3224  0);
3225  return
3226  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3227  MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3228  }
3229  case ISD::SETGT: {
3230  // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3231  // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3232  // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3233  // Handle SETLT -1 (which is equivalent to SETGE 0).
3234  if (IsRHSNegOne)
3235  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3236 
3237  if (IsRHSZero) {
3238  if (CmpInGPR == ICGPR_NonExtIn)
3239  return SDValue();
3240  // The upper 32-bits of the register can't be undefined for this sequence.
3241  LHS = signExtendInputIfNeeded(LHS);
3242  RHS = signExtendInputIfNeeded(RHS);
3243  SDValue Neg =
3244  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3245  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3246  Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3247  }
3248  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3249  // (%b < %a) by swapping inputs and falling through.
3250  std::swap(LHS, RHS);
3251  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3252  IsRHSZero = RHSConst && RHSConst->isZero();
3253  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3255  }
3256  case ISD::SETLT: {
3257  // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3258  // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3259  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3260  // Handle SETLT 1 (which is equivalent to SETLE 0).
3261  if (IsRHSOne) {
3262  if (CmpInGPR == ICGPR_NonExtIn)
3263  return SDValue();
3264  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3265  }
3266 
3267  if (IsRHSZero) {
3268  SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3269  S->getI32Imm(31, dl) };
3270  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3271  ShiftOps), 0);
3272  }
3273 
3274  if (CmpInGPR == ICGPR_NonExtIn)
3275  return SDValue();
3276  // The upper 32-bits of the register can't be undefined for this sequence.
3277  LHS = signExtendInputIfNeeded(LHS);
3278  RHS = signExtendInputIfNeeded(RHS);
3279  SDValue SUBFNode =
3280  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3281  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3282  SUBFNode, S->getI64Imm(1, dl),
3283  S->getI64Imm(63, dl)), 0);
3284  }
3285  case ISD::SETUGE:
3286  // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3287  // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3288  std::swap(LHS, RHS);
3290  case ISD::SETULE: {
3291  if (CmpInGPR == ICGPR_NonExtIn)
3292  return SDValue();
3293  // The upper 32-bits of the register can't be undefined for this sequence.
3294  LHS = zeroExtendInputIfNeeded(LHS);
3295  RHS = zeroExtendInputIfNeeded(RHS);
3296  SDValue Subtract =
3297  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3298  SDValue SrdiNode =
3299  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3300  Subtract, S->getI64Imm(1, dl),
3301  S->getI64Imm(63, dl)), 0);
3302  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3303  S->getI32Imm(1, dl)), 0);
3304  }
3305  case ISD::SETUGT:
3306  // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3307  // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3308  std::swap(LHS, RHS);
3310  case ISD::SETULT: {
3311  if (CmpInGPR == ICGPR_NonExtIn)
3312  return SDValue();
3313  // The upper 32-bits of the register can't be undefined for this sequence.
3314  LHS = zeroExtendInputIfNeeded(LHS);
3315  RHS = zeroExtendInputIfNeeded(RHS);
3316  SDValue Subtract =
3317  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3318  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3319  Subtract, S->getI64Imm(1, dl),
3320  S->getI64Imm(63, dl)), 0);
3321  }
3322  }
3323 }
3324 
3325 /// Produces a sign-extended result of comparing two 32-bit values according to
3326 /// the passed condition code.
3327 SDValue
3328 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3329  ISD::CondCode CC,
3330  int64_t RHSValue, SDLoc dl) {
3331  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3333  return SDValue();
3334  bool IsRHSZero = RHSValue == 0;
3335  bool IsRHSOne = RHSValue == 1;
3336  bool IsRHSNegOne = RHSValue == -1LL;
3337 
3338  switch (CC) {
3339  default: return SDValue();
3340  case ISD::SETEQ: {
3341  // (sext (setcc %a, %b, seteq)) ->
3342  // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3343  // (sext (setcc %a, 0, seteq)) ->
3344  // (ashr (shl (ctlz %a), 58), 63)
3345  SDValue CountInput = IsRHSZero ? LHS :
3346  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3347  SDValue Cntlzw =
3348  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3349  SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3350  S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3351  SDValue Slwi =
3352  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3353  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3354  }
3355  case ISD::SETNE: {
3356  // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3357  // flip the bit, finally take 2's complement.
3358  // (sext (setcc %a, %b, setne)) ->
3359  // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3360  // Same as above, but the first xor is not needed.
3361  // (sext (setcc %a, 0, setne)) ->
3362  // (neg (xor (lshr (ctlz %a), 5), 1))
3363  SDValue Xor = IsRHSZero ? LHS :
3364  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3365  SDValue Clz =
3366  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3367  SDValue ShiftOps[] =
3368  { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3369  SDValue Shift =
3370  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3371  SDValue Xori =
3372  SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3373  S->getI32Imm(1, dl)), 0);
3374  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3375  }
3376  case ISD::SETGE: {
3377  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3378  // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3379  if (IsRHSZero)
3380  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3381 
3382  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3383  // by swapping inputs and falling through.
3384  std::swap(LHS, RHS);
3385  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3386  IsRHSZero = RHSConst && RHSConst->isZero();
3388  }
3389  case ISD::SETLE: {
3390  if (CmpInGPR == ICGPR_NonExtIn)
3391  return SDValue();
3392  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3393  // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3394  if (IsRHSZero)
3395  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3396 
3397  // The upper 32-bits of the register can't be undefined for this sequence.
3398  LHS = signExtendInputIfNeeded(LHS);
3399  RHS = signExtendInputIfNeeded(RHS);
3400  SDValue SUBFNode =
3401  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3402  LHS, RHS), 0);
3403  SDValue Srdi =
3404  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3405  SUBFNode, S->getI64Imm(1, dl),
3406  S->getI64Imm(63, dl)), 0);
3407  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3408  S->getI32Imm(-1, dl)), 0);
3409  }
3410  case ISD::SETGT: {
3411  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3412  // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3413  // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3414  if (IsRHSNegOne)
3415  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3416  if (IsRHSZero) {
3417  if (CmpInGPR == ICGPR_NonExtIn)
3418  return SDValue();
3419  // The upper 32-bits of the register can't be undefined for this sequence.
3420  LHS = signExtendInputIfNeeded(LHS);
3421  RHS = signExtendInputIfNeeded(RHS);
3422  SDValue Neg =
3423  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3424  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3425  S->getI64Imm(63, dl)), 0);
3426  }
3427  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3428  // (%b < %a) by swapping inputs and falling through.
3429  std::swap(LHS, RHS);
3430  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3431  IsRHSZero = RHSConst && RHSConst->isZero();
3432  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3434  }
3435  case ISD::SETLT: {
3436  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3437  // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3438  // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3439  if (IsRHSOne) {
3440  if (CmpInGPR == ICGPR_NonExtIn)
3441  return SDValue();
3442  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3443  }
3444  if (IsRHSZero)
3445  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3446  S->getI32Imm(31, dl)), 0);
3447 
3448  if (CmpInGPR == ICGPR_NonExtIn)
3449  return SDValue();
3450  // The upper 32-bits of the register can't be undefined for this sequence.
3451  LHS = signExtendInputIfNeeded(LHS);
3452  RHS = signExtendInputIfNeeded(RHS);
3453  SDValue SUBFNode =
3454  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3455  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3456  SUBFNode, S->getI64Imm(63, dl)), 0);
3457  }
3458  case ISD::SETUGE:
3459  // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3460  // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3461  std::swap(LHS, RHS);
3463  case ISD::SETULE: {
3464  if (CmpInGPR == ICGPR_NonExtIn)
3465  return SDValue();
3466  // The upper 32-bits of the register can't be undefined for this sequence.
3467  LHS = zeroExtendInputIfNeeded(LHS);
3468  RHS = zeroExtendInputIfNeeded(RHS);
3469  SDValue Subtract =
3470  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3471  SDValue Shift =
3472  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3473  S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3474  0);
3475  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3476  S->getI32Imm(-1, dl)), 0);
3477  }
3478  case ISD::SETUGT:
3479  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3480  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3481  std::swap(LHS, RHS);
3483  case ISD::SETULT: {
3484  if (CmpInGPR == ICGPR_NonExtIn)
3485  return SDValue();
3486  // The upper 32-bits of the register can't be undefined for this sequence.
3487  LHS = zeroExtendInputIfNeeded(LHS);
3488  RHS = zeroExtendInputIfNeeded(RHS);
3489  SDValue Subtract =
3490  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3491  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3492  Subtract, S->getI64Imm(63, dl)), 0);
3493  }
3494  }
3495 }
3496 
3497 /// Produces a zero-extended result of comparing two 64-bit values according to
3498 /// the passed condition code.
3499 SDValue
3500 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3501  ISD::CondCode CC,
3502  int64_t RHSValue, SDLoc dl) {
3503  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3505  return SDValue();
3506  bool IsRHSZero = RHSValue == 0;
3507  bool IsRHSOne = RHSValue == 1;
3508  bool IsRHSNegOne = RHSValue == -1LL;
3509  switch (CC) {
3510  default: return SDValue();
3511  case ISD::SETEQ: {
3512  // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3513  // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3514  SDValue Xor = IsRHSZero ? LHS :
3515  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3516  SDValue Clz =
3517  SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3518  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3519  S->getI64Imm(58, dl),
3520  S->getI64Imm(63, dl)), 0);
3521  }
3522  case ISD::SETNE: {
3523  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3524  // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3525  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3526  // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3527  SDValue Xor = IsRHSZero ? LHS :
3528  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3529  SDValue AC =
3530  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3531  Xor, S->getI32Imm(~0U, dl)), 0);
3532  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3533  Xor, AC.getValue(1)), 0);
3534  }
3535  case ISD::SETGE: {
3536  // {subc.reg, subc.CA} = (subcarry %a, %b)
3537  // (zext (setcc %a, %b, setge)) ->
3538  // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3539  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3540  if (IsRHSZero)
3541  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3542  std::swap(LHS, RHS);
3543  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3544  IsRHSZero = RHSConst && RHSConst->isZero();
3546  }
3547  case ISD::SETLE: {
3548  // {subc.reg, subc.CA} = (subcarry %b, %a)
3549  // (zext (setcc %a, %b, setge)) ->
3550  // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3551  // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3552  if (IsRHSZero)
3553  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3554  SDValue ShiftL =
3555  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3556  S->getI64Imm(1, dl),
3557  S->getI64Imm(63, dl)), 0);
3558  SDValue ShiftR =
3559  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3560  S->getI64Imm(63, dl)), 0);
3561  SDValue SubtractCarry =
3562  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3563  LHS, RHS), 1);
3564  return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3565  ShiftR, ShiftL, SubtractCarry), 0);
3566  }
3567  case ISD::SETGT: {
3568  // {subc.reg, subc.CA} = (subcarry %b, %a)
3569  // (zext (setcc %a, %b, setgt)) ->
3570  // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3571  // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3572  if (IsRHSNegOne)
3573  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3574  if (IsRHSZero) {
3575  SDValue Addi =
3576  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3577  S->getI64Imm(~0ULL, dl)), 0);
3578  SDValue Nor =
3579  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3580  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3581  S->getI64Imm(1, dl),
3582  S->getI64Imm(63, dl)), 0);
3583  }
3584  std::swap(LHS, RHS);
3585  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3586  IsRHSZero = RHSConst && RHSConst->isZero();
3587  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3589  }
3590  case ISD::SETLT: {
3591  // {subc.reg, subc.CA} = (subcarry %a, %b)
3592  // (zext (setcc %a, %b, setlt)) ->
3593  // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3594  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3595  if (IsRHSOne)
3596  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3597  if (IsRHSZero)
3598  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3599  S->getI64Imm(1, dl),
3600  S->getI64Imm(63, dl)), 0);
3601  SDValue SRADINode =
3602  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3603  LHS, S->getI64Imm(63, dl)), 0);
3604  SDValue SRDINode =
3605  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3606  RHS, S->getI64Imm(1, dl),
3607  S->getI64Imm(63, dl)), 0);
3608  SDValue SUBFC8Carry =
3609  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3610  RHS, LHS), 1);
3611  SDValue ADDE8Node =
3612  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3613  SRDINode, SRADINode, SUBFC8Carry), 0);
3614  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3615  ADDE8Node, S->getI64Imm(1, dl)), 0);
3616  }
3617  case ISD::SETUGE:
3618  // {subc.reg, subc.CA} = (subcarry %a, %b)
3619  // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3620  std::swap(LHS, RHS);
3622  case ISD::SETULE: {
3623  // {subc.reg, subc.CA} = (subcarry %b, %a)
3624  // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3625  SDValue SUBFC8Carry =
3626  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3627  LHS, RHS), 1);
3628  SDValue SUBFE8Node =
3629  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3630  LHS, LHS, SUBFC8Carry), 0);
3631  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3632  SUBFE8Node, S->getI64Imm(1, dl)), 0);
3633  }
3634  case ISD::SETUGT:
3635  // {subc.reg, subc.CA} = (subcarry %b, %a)
3636  // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3637  std::swap(LHS, RHS);
3639  case ISD::SETULT: {
3640  // {subc.reg, subc.CA} = (subcarry %a, %b)
3641  // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3642  SDValue SubtractCarry =
3643  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3644  RHS, LHS), 1);
3645  SDValue ExtSub =
3646  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3647  LHS, LHS, SubtractCarry), 0);
3648  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3649  ExtSub), 0);
3650  }
3651  }
3652 }
3653 
3654 /// Produces a sign-extended result of comparing two 64-bit values according to
3655 /// the passed condition code.
3656 SDValue
3657 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3658  ISD::CondCode CC,
3659  int64_t RHSValue, SDLoc dl) {
3660  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3662  return SDValue();
3663  bool IsRHSZero = RHSValue == 0;
3664  bool IsRHSOne = RHSValue == 1;
3665  bool IsRHSNegOne = RHSValue == -1LL;
3666  switch (CC) {
3667  default: return SDValue();
3668  case ISD::SETEQ: {
3669  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3670  // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3671  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3672  // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3673  SDValue AddInput = IsRHSZero ? LHS :
3674  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3675  SDValue Addic =
3676  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3677  AddInput, S->getI32Imm(~0U, dl)), 0);
3678  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3679  Addic, Addic.getValue(1)), 0);
3680  }
3681  case ISD::SETNE: {
3682  // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3683  // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3684  // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3685  // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3686  SDValue Xor = IsRHSZero ? LHS :
3687  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3688  SDValue SC =
3689  SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3690  Xor, S->getI32Imm(0, dl)), 0);
3691  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3692  SC, SC.getValue(1)), 0);
3693  }
3694  case ISD::SETGE: {
3695  // {subc.reg, subc.CA} = (subcarry %a, %b)
3696  // (zext (setcc %a, %b, setge)) ->
3697  // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3698  // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3699  if (IsRHSZero)
3700  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3701  std::swap(LHS, RHS);
3702  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3703  IsRHSZero = RHSConst && RHSConst->isZero();
3705  }
3706  case ISD::SETLE: {
3707  // {subc.reg, subc.CA} = (subcarry %b, %a)
3708  // (zext (setcc %a, %b, setge)) ->
3709  // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3710  // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3711  if (IsRHSZero)
3712  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3713  SDValue ShiftR =
3714  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3715  S->getI64Imm(63, dl)), 0);
3716  SDValue ShiftL =
3717  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3718  S->getI64Imm(1, dl),
3719  S->getI64Imm(63, dl)), 0);
3720  SDValue SubtractCarry =
3721  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3722  LHS, RHS), 1);
3723  SDValue Adde =
3724  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3725  ShiftR, ShiftL, SubtractCarry), 0);
3726  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3727  }
3728  case ISD::SETGT: {
3729  // {subc.reg, subc.CA} = (subcarry %b, %a)
3730  // (zext (setcc %a, %b, setgt)) ->
3731  // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3732  // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3733  if (IsRHSNegOne)
3734  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3735  if (IsRHSZero) {
3736  SDValue Add =
3737  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3738  S->getI64Imm(-1, dl)), 0);
3739  SDValue Nor =
3740  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3741  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3742  S->getI64Imm(63, dl)), 0);
3743  }
3744  std::swap(LHS, RHS);
3745  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3746  IsRHSZero = RHSConst && RHSConst->isZero();
3747  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3749  }
3750  case ISD::SETLT: {
3751  // {subc.reg, subc.CA} = (subcarry %a, %b)
3752  // (zext (setcc %a, %b, setlt)) ->
3753  // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3754  // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3755  if (IsRHSOne)
3756  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3757  if (IsRHSZero) {
3758  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3759  S->getI64Imm(63, dl)), 0);
3760  }
3761  SDValue SRADINode =
3762  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3763  LHS, S->getI64Imm(63, dl)), 0);
3764  SDValue SRDINode =
3765  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3766  RHS, S->getI64Imm(1, dl),
3767  S->getI64Imm(63, dl)), 0);
3768  SDValue SUBFC8Carry =
3769  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3770  RHS, LHS), 1);
3771  SDValue ADDE8Node =
3772  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3773  SRDINode, SRADINode, SUBFC8Carry), 0);
3774  SDValue XORI8Node =
3775  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3776  ADDE8Node, S->getI64Imm(1, dl)), 0);
3777  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3778  XORI8Node), 0);
3779  }
3780  case ISD::SETUGE:
3781  // {subc.reg, subc.CA} = (subcarry %a, %b)
3782  // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3783  std::swap(LHS, RHS);
3785  case ISD::SETULE: {
3786  // {subc.reg, subc.CA} = (subcarry %b, %a)
3787  // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3788  SDValue SubtractCarry =
3789  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3790  LHS, RHS), 1);
3791  SDValue ExtSub =
3792  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3793  LHS, SubtractCarry), 0);
3794  return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3795  ExtSub, ExtSub), 0);
3796  }
3797  case ISD::SETUGT:
3798  // {subc.reg, subc.CA} = (subcarry %b, %a)
3799  // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3800  std::swap(LHS, RHS);
3802  case ISD::SETULT: {
3803  // {subc.reg, subc.CA} = (subcarry %a, %b)
3804  // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3805  SDValue SubCarry =
3806  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3807  RHS, LHS), 1);
3808  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3809  LHS, LHS, SubCarry), 0);
3810  }
3811  }
3812 }
3813 
3814 /// Do all uses of this SDValue need the result in a GPR?
3815 /// This is meant to be used on values that have type i1 since
3816 /// it is somewhat meaningless to ask if values of other types
3817 /// should be kept in GPR's.
3818 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3819  assert(Compare.getOpcode() == ISD::SETCC &&
3820  "An ISD::SETCC node required here.");
3821 
3822  // For values that have a single use, the caller should obviously already have
3823  // checked if that use is an extending use. We check the other uses here.
3824  if (Compare.hasOneUse())
3825  return true;
3826  // We want the value in a GPR if it is being extended, used for a select, or
3827  // used in logical operations.
3828  for (auto CompareUse : Compare.getNode()->uses())
3829  if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3830  CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3831  CompareUse->getOpcode() != ISD::SELECT &&
3832  !isLogicOp(CompareUse->getOpcode())) {
3833  OmittedForNonExtendUses++;
3834  return false;
3835  }
3836  return true;
3837 }
3838 
3839 /// Returns an equivalent of a SETCC node but with the result the same width as
3840 /// the inputs. This can also be used for SELECT_CC if either the true or false
3841 /// values is a power of two while the other is zero.
3842 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3843  SetccInGPROpts ConvOpts) {
3844  assert((Compare.getOpcode() == ISD::SETCC ||
3845  Compare.getOpcode() == ISD::SELECT_CC) &&
3846  "An ISD::SETCC node required here.");
3847 
3848  // Don't convert this comparison to a GPR sequence because there are uses
3849  // of the i1 result (i.e. uses that require the result in the CR).
3850  if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3851  return SDValue();
3852 
3853  SDValue LHS = Compare.getOperand(0);
3854  SDValue RHS = Compare.getOperand(1);
3855 
3856  // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3857  int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3858  ISD::CondCode CC =
3859  cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3860  EVT InputVT = LHS.getValueType();
3861  if (InputVT != MVT::i32 && InputVT != MVT::i64)
3862  return SDValue();
3863 
3864  if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3865  ConvOpts == SetccInGPROpts::SExtInvert)
3866  CC = ISD::getSetCCInverse(CC, InputVT);
3867 
3868  bool Inputs32Bit = InputVT == MVT::i32;
3869 
3870  SDLoc dl(Compare);
3871  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3872  int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
3873  bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3874  ConvOpts == SetccInGPROpts::SExtInvert;
3875 
3876  if (IsSext && Inputs32Bit)
3877  return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3878  else if (Inputs32Bit)
3879  return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3880  else if (IsSext)
3881  return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3882  return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3883 }
3884 
3885 } // end anonymous namespace
3886 
3887 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3888  if (N->getValueType(0) != MVT::i32 &&
3889  N->getValueType(0) != MVT::i64)
3890  return false;
3891 
3892  // This optimization will emit code that assumes 64-bit registers
3893  // so we don't want to run it in 32-bit mode. Also don't run it
3894  // on functions that are not to be optimized.
3895  if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3896  return false;
3897 
3898  // For POWER10, it is more profitable to use the set boolean extension
3899  // instructions rather than the integer compare elimination codegen.
3900  // Users can override this via the command line option, `--ppc-gpr-icmps`.
3901  if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
3902  return false;
3903 
3904  switch (N->getOpcode()) {
3905  default: break;
3906  case ISD::ZERO_EXTEND:
3907  case ISD::SIGN_EXTEND:
3908  case ISD::AND:
3909  case ISD::OR:
3910  case ISD::XOR: {
3911  IntegerCompareEliminator ICmpElim(CurDAG, this);
3912  if (SDNode *New = ICmpElim.Select(N)) {
3913  ReplaceNode(N, New);
3914  return true;
3915  }
3916  }
3917  }
3918  return false;
3919 }
3920 
3921 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3922  if (N->getValueType(0) != MVT::i32 &&
3923  N->getValueType(0) != MVT::i64)
3924  return false;
3925 
3926  if (!UseBitPermRewriter)
3927  return false;
3928 
3929  switch (N->getOpcode()) {
3930  default: break;
3931  case ISD::ROTL:
3932  case ISD::SHL:
3933  case ISD::SRL:
3934  case ISD::AND:
3935  case ISD::OR: {
3936  BitPermutationSelector BPS(CurDAG);
3937  if (SDNode *New = BPS.Select(N)) {
3938  ReplaceNode(N, New);
3939  return true;
3940  }
3941  return false;
3942  }
3943  }
3944 
3945  return false;
3946 }
3947 
3948 /// SelectCC - Select a comparison of the specified values with the specified
3949 /// condition code, returning the CR# of the expression.
3950 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3951  const SDLoc &dl, SDValue Chain) {
3952  // Always select the LHS.
3953  unsigned Opc;
3954 
3955  if (LHS.getValueType() == MVT::i32) {
3956  unsigned Imm;
3957  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3958  if (isInt32Immediate(RHS, Imm)) {
3959  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3960  if (isUInt<16>(Imm))
3961  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3962  getI32Imm(Imm & 0xFFFF, dl)),
3963  0);
3964  // If this is a 16-bit signed immediate, fold it.
3965  if (isInt<16>((int)Imm))
3966  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3967  getI32Imm(Imm & 0xFFFF, dl)),
3968  0);
3969 
3970  // For non-equality comparisons, the default code would materialize the
3971  // constant, then compare against it, like this:
3972  // lis r2, 4660
3973  // ori r2, r2, 22136
3974  // cmpw cr0, r3, r2
3975  // Since we are just comparing for equality, we can emit this instead:
3976  // xoris r0,r3,0x1234
3977  // cmplwi cr0,r0,0x5678
3978  // beq cr0,L6
3979  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3980  getI32Imm(Imm >> 16, dl)), 0);
3981  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3982  getI32Imm(Imm & 0xFFFF, dl)), 0);
3983  }
3984  Opc = PPC::CMPLW;
3985  } else if (ISD::isUnsignedIntSetCC(CC)) {
3987  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3988  getI32Imm(Imm & 0xFFFF, dl)), 0);
3989  Opc = PPC::CMPLW;
3990  } else {
3991  int16_t SImm;
3992  if (isIntS16Immediate(RHS, SImm))
3993  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3994  getI32Imm((int)SImm & 0xFFFF,
3995  dl)),
3996  0);
3997  Opc = PPC::CMPW;
3998  }
3999  } else if (LHS.getValueType() == MVT::i64) {
4000  uint64_t Imm;
4001  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4002  if (isInt64Immediate(RHS.getNode(), Imm)) {
4003  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4004  if (isUInt<16>(Imm))
4005  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4006  getI32Imm(Imm & 0xFFFF, dl)),
4007  0);
4008  // If this is a 16-bit signed immediate, fold it.
4009  if (isInt<16>(Imm))
4010  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4011  getI32Imm(Imm & 0xFFFF, dl)),
4012  0);
4013 
4014  // For non-equality comparisons, the default code would materialize the
4015  // constant, then compare against it, like this:
4016  // lis r2, 4660
4017  // ori r2, r2, 22136
4018  // cmpd cr0, r3, r2
4019  // Since we are just comparing for equality, we can emit this instead:
4020  // xoris r0,r3,0x1234
4021  // cmpldi cr0,r0,0x5678
4022  // beq cr0,L6
4023  if (isUInt<32>(Imm)) {
4024  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4025  getI64Imm(Imm >> 16, dl)), 0);
4026  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4027  getI64Imm(Imm & 0xFFFF, dl)),
4028  0);
4029  }
4030  }
4031  Opc = PPC::CMPLD;
4032  } else if (ISD::isUnsignedIntSetCC(CC)) {
4033  if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4034  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4035  getI64Imm(Imm & 0xFFFF, dl)), 0);
4036  Opc = PPC::CMPLD;
4037  } else {
4038  int16_t SImm;
4039  if (isIntS16Immediate(RHS, SImm))
4040  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4041  getI64Imm(SImm & 0xFFFF, dl)),
4042  0);
4043  Opc = PPC::CMPD;
4044  }
4045  } else if (LHS.getValueType() == MVT::f32) {
4046  if (Subtarget->hasSPE()) {
4047  switch (CC) {
4048  default:
4049  case ISD::SETEQ:
4050  case ISD::SETNE:
4051  Opc = PPC::EFSCMPEQ;
4052  break;
4053  case ISD::SETLT:
4054  case ISD::SETGE:
4055  case ISD::SETOLT:
4056  case ISD::SETOGE:
4057  case ISD::SETULT:
4058  case ISD::SETUGE:
4059  Opc = PPC::EFSCMPLT;
4060  break;
4061  case ISD::SETGT:
4062  case ISD::SETLE:
4063  case ISD::SETOGT:
4064  case ISD::SETOLE:
4065  case ISD::SETUGT:
4066  case ISD::SETULE:
4067  Opc = PPC::EFSCMPGT;
4068  break;
4069  }
4070  } else
4071  Opc = PPC::FCMPUS;
4072  } else if (LHS.getValueType() == MVT::f64) {
4073  if (Subtarget->hasSPE()) {
4074  switch (CC) {
4075  default:
4076  case ISD::SETEQ:
4077  case ISD::SETNE:
4078  Opc = PPC::EFDCMPEQ;
4079  break;
4080  case ISD::SETLT:
4081  case ISD::SETGE:
4082  case ISD::SETOLT:
4083  case ISD::SETOGE:
4084  case ISD::SETULT:
4085  case ISD::SETUGE:
4086  Opc = PPC::EFDCMPLT;
4087  break;
4088  case ISD::SETGT:
4089  case ISD::SETLE:
4090  case ISD::SETOGT:
4091  case ISD::SETOLE:
4092  case ISD::SETUGT:
4093  case ISD::SETULE:
4094  Opc = PPC::EFDCMPGT;
4095  break;
4096  }
4097  } else
4098  Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4099  } else {
4100  assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4101  assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4102  Opc = PPC::XSCMPUQP;
4103  }
4104  if (Chain)
4105  return SDValue(
4106  CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4107  0);
4108  else
4109  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4110 }
4111 
4113  const PPCSubtarget *Subtarget) {
4114  // For SPE instructions, the result is in GT bit of the CR
4115  bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4116 
4117  switch (CC) {
4118  case ISD::SETUEQ:
4119  case ISD::SETONE:
4120  case ISD::SETOLE:
4121  case ISD::SETOGE:
4122  llvm_unreachable("Should be lowered by legalize!");
4123  default: llvm_unreachable("Unknown condition!");
4124  case ISD::SETOEQ:
4125  case ISD::SETEQ:
4126  return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4127  case ISD::SETUNE:
4128  case ISD::SETNE:
4129  return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4130  case ISD::SETOLT:
4131  case ISD::SETLT:
4132  return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4133  case ISD::SETULE:
4134  case ISD::SETLE:
4135  return PPC::PRED_LE;
4136  case ISD::SETOGT:
4137  case ISD::SETGT:
4138  return PPC::PRED_GT;
4139  case ISD::SETUGE:
4140  case ISD::SETGE:
4141  return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4142  case ISD::SETO: return PPC::PRED_NU;
4143  case ISD::SETUO: return PPC::PRED_UN;
4144  // These two are invalid for floating point. Assume we have int.
4145  case ISD::SETULT: return PPC::PRED_LT;
4146  case ISD::SETUGT: return PPC::PRED_GT;
4147  }
4148 }
4149 
4150 /// getCRIdxForSetCC - Return the index of the condition register field
4151 /// associated with the SetCC condition, and whether or not the field is
4152 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
4153 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4154  Invert = false;
4155  switch (CC) {
4156  default: llvm_unreachable("Unknown condition!");
4157  case ISD::SETOLT:
4158  case ISD::SETLT: return 0; // Bit #0 = SETOLT
4159  case ISD::SETOGT:
4160  case ISD::SETGT: return 1; // Bit #1 = SETOGT
4161  case ISD::SETOEQ:
4162  case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4163  case ISD::SETUO: return 3; // Bit #3 = SETUO
4164  case ISD::SETUGE:
4165  case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4166  case ISD::SETULE:
4167  case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4168  case ISD::SETUNE:
4169  case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4170  case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4171  case ISD::SETUEQ:
4172  case ISD::SETOGE:
4173  case ISD::SETOLE:
4174  case ISD::SETONE:
4175  llvm_unreachable("Invalid branch code: should be expanded by legalize");
4176  // These are invalid for floating point. Assume integer.
4177  case ISD::SETULT: return 0;
4178  case ISD::SETUGT: return 1;
4179  }
4180 }
4181 
4182 // getVCmpInst: return the vector compare instruction for the specified
4183 // vector type and condition code. Since this is for altivec specific code,
4184 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4185 // and v4f32).
4186 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4187  bool HasVSX, bool &Swap, bool &Negate) {
4188  Swap = false;
4189  Negate = false;
4190 
4191  if (VecVT.isFloatingPoint()) {
4192  /* Handle some cases by swapping input operands. */
4193  switch (CC) {
4194  case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4195  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4196  case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4197  case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4198  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4199  case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4200  default: break;
4201  }
4202  /* Handle some cases by negating the result. */
4203  switch (CC) {
4204  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4205  case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4206  case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4207  case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4208  default: break;
4209  }
4210  /* We have instructions implementing the remaining cases. */
4211  switch (CC) {
4212  case ISD::SETEQ:
4213  case ISD::SETOEQ:
4214  if (VecVT == MVT::v4f32)
4215  return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4216  else if (VecVT == MVT::v2f64)
4217  return PPC::XVCMPEQDP;
4218  break;
4219  case ISD::SETGT:
4220  case ISD::SETOGT:
4221  if (VecVT == MVT::v4f32)
4222  return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4223  else if (VecVT == MVT::v2f64)
4224  return PPC::XVCMPGTDP;
4225  break;
4226  case ISD::SETGE:
4227  case ISD::SETOGE:
4228  if (VecVT == MVT::v4f32)
4229  return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4230  else if (VecVT == MVT::v2f64)
4231  return PPC::XVCMPGEDP;
4232  break;
4233  default:
4234  break;
4235  }
4236  llvm_unreachable("Invalid floating-point vector compare condition");
4237  } else {
4238  /* Handle some cases by swapping input operands. */
4239  switch (CC) {
4240  case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4241  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4242  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4243  case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4244  default: break;
4245  }
4246  /* Handle some cases by negating the result. */
4247  switch (CC) {
4248  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4249  case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4250  case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4251  case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4252  default: break;
4253  }
4254  /* We have instructions implementing the remaining cases. */
4255  switch (CC) {
4256  case ISD::SETEQ:
4257  case ISD::SETUEQ:
4258  if (VecVT == MVT::v16i8)
4259  return PPC::VCMPEQUB;
4260  else if (VecVT == MVT::v8i16)
4261  return PPC::VCMPEQUH;
4262  else if (VecVT == MVT::v4i32)
4263  return PPC::VCMPEQUW;
4264  else if (VecVT == MVT::v2i64)
4265  return PPC::VCMPEQUD;
4266  else if (VecVT == MVT::v1i128)
4267  return PPC::VCMPEQUQ;
4268  break;
4269  case ISD::SETGT:
4270  if (VecVT == MVT::v16i8)
4271  return PPC::VCMPGTSB;
4272  else if (VecVT == MVT::v8i16)
4273  return PPC::VCMPGTSH;
4274  else if (VecVT == MVT::v4i32)
4275  return PPC::VCMPGTSW;
4276  else if (VecVT == MVT::v2i64)
4277  return PPC::VCMPGTSD;
4278  else if (VecVT == MVT::v1i128)
4279  return PPC::VCMPGTSQ;
4280  break;
4281  case ISD::SETUGT:
4282  if (VecVT == MVT::v16i8)
4283  return PPC::VCMPGTUB;
4284  else if (VecVT == MVT::v8i16)
4285  return PPC::VCMPGTUH;
4286  else if (VecVT == MVT::v4i32)
4287  return PPC::VCMPGTUW;
4288  else if (VecVT == MVT::v2i64)
4289  return PPC::VCMPGTUD;
4290  else if (VecVT == MVT::v1i128)
4291  return PPC::VCMPGTUQ;
4292  break;
4293  default:
4294  break;
4295  }
4296  llvm_unreachable("Invalid integer vector compare condition");
4297  }
4298 }
4299 
4300 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4301  SDLoc dl(N);
4302  unsigned Imm;
4303  bool IsStrict = N->isStrictFPOpcode();
4304  ISD::CondCode CC =
4305  cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4306  EVT PtrVT =
4307  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4308  bool isPPC64 = (PtrVT == MVT::i64);
4309  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4310 
4311  SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4312  SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4313 
4314  if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4315  // We can codegen setcc op, imm very efficiently compared to a brcond.
4316  // Check for those cases here.
4317  // setcc op, 0
4318  if (Imm == 0) {
4319  SDValue Op = LHS;
4320  switch (CC) {
4321  default: break;
4322  case ISD::SETEQ: {
4323  Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4324  SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4325  getI32Imm(31, dl) };
4326  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4327  return true;
4328  }
4329  case ISD::SETNE: {
4330  if (isPPC64) break;
4331  SDValue AD =
4332  SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4333  Op, getI32Imm(~0U, dl)), 0);
4334  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4335  return true;
4336  }
4337  case ISD::SETLT: {
4338  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4339  getI32Imm(31, dl) };
4340  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4341  return true;
4342  }
4343  case ISD::SETGT: {
4344  SDValue T =
4345  SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4346  T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4347  SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4348  getI32Imm(31, dl) };
4349  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4350  return true;
4351  }
4352  }
4353  } else if (Imm == ~0U) { // setcc op, -1
4354  SDValue Op = LHS;
4355  switch (CC) {
4356  default: break;
4357  case ISD::SETEQ:
4358  if (isPPC64) break;
4359  Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4360  Op, getI32Imm(1, dl)), 0);
4361  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4362  SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4363  MVT::i32,
4364  getI32Imm(0, dl)),
4365  0), Op.getValue(1));
4366  return true;
4367  case ISD::SETNE: {
4368  if (isPPC64) break;
4369  Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4370  SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4371  Op, getI32Imm(~0U, dl));
4372  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4373  SDValue(AD, 1));
4374  return true;
4375  }
4376  case ISD::SETLT: {
4377  SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4378  getI32Imm(1, dl)), 0);
4379  SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4380  Op), 0);
4381  SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4382  getI32Imm(31, dl) };
4383  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4384  return true;
4385  }
4386  case ISD::SETGT: {
4387  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4388  getI32Imm(31, dl) };
4389  Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4390  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4391  return true;
4392  }
4393  }
4394  }
4395  }
4396 
4397  // Altivec Vector compare instructions do not set any CR register by default and
4398  // vector compare operations return the same type as the operands.
4399  if (!IsStrict && LHS.getValueType().isVector()) {
4400  if (Subtarget->hasSPE())
4401  return false;
4402 
4403  EVT VecVT = LHS.getValueType();
4404  bool Swap, Negate;
4405  unsigned int VCmpInst =
4406  getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4407  if (Swap)
4408  std::swap(LHS, RHS);
4409 
4410  EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4411  if (Negate) {
4412  SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4413  CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4414  ResVT, VCmp, VCmp);
4415  return true;
4416  }
4417 
4418  CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4419  return true;
4420  }
4421 
4422  if (Subtarget->useCRBits())
4423  return false;
4424 
4425  bool Inv;
4426  unsigned Idx = getCRIdxForSetCC(CC, Inv);
4427  SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4428  if (IsStrict)
4429  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4430  SDValue IntCR;
4431 
4432  // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4433  // The correct compare instruction is already set by SelectCC()
4434  if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4435  Idx = 1;
4436  }
4437 
4438  // Force the ccreg into CR7.
4439  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4440 
4441  SDValue InFlag; // Null incoming flag value.
4442  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4443  InFlag).getValue(1);
4444 
4445  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4446  CCReg), 0);
4447 
4448  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4449  getI32Imm(31, dl), getI32Imm(31, dl) };
4450  if (!Inv) {
4451  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4452  return true;
4453  }
4454 
4455  // Get the specified bit.
4456  SDValue Tmp =
4457  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4458  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4459  return true;
4460 }
4461 
4462 /// Does this node represent a load/store node whose address can be represented
4463 /// with a register plus an immediate that's a multiple of \p Val:
4464 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4465  LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4466  StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4467  MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4468  SDValue AddrOp;
4469  if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4470  AddrOp = N->getOperand(1);
4471  else if (STN)
4472  AddrOp = STN->getOperand(2);
4473 
4474  // If the address points a frame object or a frame object with an offset,
4475  // we need to check the object alignment.
4476  short Imm = 0;
4477  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4478  AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4479  AddrOp)) {
4480  // If op0 is a frame index that is under aligned, we can't do it either,
4481  // because it is translated to r31 or r1 + slot + offset. We won't know the
4482  // slot number until the stack frame is finalized.
4483  const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4484  unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4485  if ((SlotAlign % Val) != 0)
4486  return false;
4487 
4488  // If we have an offset, we need further check on the offset.
4489  if (AddrOp.getOpcode() != ISD::ADD)
4490  return true;
4491  }
4492 
4493  if (AddrOp.getOpcode() == ISD::ADD)
4494  return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4495 
4496  // If the address comes from the outside, the offset will be zero.
4497  return AddrOp.getOpcode() == ISD::CopyFromReg;
4498 }
4499 
4500 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4501  // Transfer memoperands.
4502  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4503  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4504 }
4505 
4506 static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4507  bool &NeedSwapOps, bool &IsUnCmp) {
4508 
4509  assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4510 
4511  SDValue LHS = N->getOperand(0);
4512  SDValue RHS = N->getOperand(1);
4513  SDValue TrueRes = N->getOperand(2);
4514  SDValue FalseRes = N->getOperand(3);
4515  ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4516  if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4517  N->getSimpleValueType(0) != MVT::i32))
4518  return false;
4519 
4520  // We are looking for any of:
4521  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4522  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4523  // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4524  // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4525  int64_t TrueResVal = TrueConst->getSExtValue();
4526  if ((TrueResVal < -1 || TrueResVal > 1) ||
4527  (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4528  (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4529  (TrueResVal == 0 &&
4530  (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4531  return false;
4532 
4533  SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4534  ? FalseRes
4535  : FalseRes.getOperand(0);
4536  bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4537  if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4538  SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4539  return false;
4540 
4541  // Without this setb optimization, the outer SELECT_CC will be manually
4542  // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4543  // transforms pseudo instruction to isel instruction. When there are more than
4544  // one use for result like zext/sext, with current optimization we only see
4545  // isel is replaced by setb but can't see any significant gain. Since
4546  // setb has longer latency than original isel, we should avoid this. Another
4547  // point is that setb requires comparison always kept, it can break the
4548  // opportunity to get the comparison away if we have in future.
4549  if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4550  return false;
4551 
4552  SDValue InnerLHS = SetOrSelCC.getOperand(0);
4553  SDValue InnerRHS = SetOrSelCC.getOperand(1);
4554  ISD::CondCode InnerCC =
4555  cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4556  // If the inner comparison is a select_cc, make sure the true/false values are
4557  // 1/-1 and canonicalize it if needed.
4558  if (InnerIsSel) {
4559  ConstantSDNode *SelCCTrueConst =
4560  dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4561  ConstantSDNode *SelCCFalseConst =
4562  dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4563  if (!SelCCTrueConst || !SelCCFalseConst)
4564  return false;
4565  int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4566  int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4567  // The values must be -1/1 (requiring a swap) or 1/-1.
4568  if (SelCCTVal == -1 && SelCCFVal == 1) {
4569  std::swap(InnerLHS, InnerRHS);
4570  } else if (SelCCTVal != 1 || SelCCFVal != -1)
4571  return false;
4572  }
4573 
4574  // Canonicalize unsigned case
4575  if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4576  IsUnCmp = true;
4577  InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4578  }
4579 
4580  bool InnerSwapped = false;
4581  if (LHS == InnerRHS && RHS == InnerLHS)
4582  InnerSwapped = true;
4583  else if (LHS != InnerLHS || RHS != InnerRHS)
4584  return false;
4585 
4586  switch (CC) {
4587  // (select_cc lhs, rhs, 0, \
4588  // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4589  case ISD::SETEQ:
4590  if (!InnerIsSel)
4591  return false;
4592  if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4593  return false;
4594  NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4595  break;
4596 
4597  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4598  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4599  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4600  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4601  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4602  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4603  case ISD::SETULT:
4604  if (!IsUnCmp && InnerCC != ISD::SETNE)
4605  return false;
4606  IsUnCmp = true;
4608  case ISD::SETLT:
4609  if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4610  (InnerCC == ISD::SETLT && InnerSwapped))
4611  NeedSwapOps = (TrueResVal == 1);
4612  else
4613  return false;
4614  break;
4615 
4616  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4617  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4618  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4619  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4620  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4621  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4622  case ISD::SETUGT:
4623  if (!IsUnCmp && InnerCC != ISD::SETNE)
4624  return false;
4625  IsUnCmp = true;
4627  case ISD::SETGT:
4628  if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4629  (InnerCC == ISD::SETGT && InnerSwapped))
4630  NeedSwapOps = (TrueResVal == -1);
4631  else
4632  return false;
4633  break;
4634 
4635  default:
4636  return false;
4637  }
4638 
4639  LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4640  LLVM_DEBUG(N->dump());
4641 
4642  return true;
4643 }
4644 
4645 // Return true if it's a software square-root/divide operand.
4646 static bool isSWTestOp(SDValue N) {
4647  if (N.getOpcode() == PPCISD::FTSQRT)
4648  return true;
4649  if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
4650  N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4651  return false;
4652  switch (N.getConstantOperandVal(0)) {
4653  case Intrinsic::ppc_vsx_xvtdivdp:
4654  case Intrinsic::ppc_vsx_xvtdivsp:
4655  case Intrinsic::ppc_vsx_xvtsqrtdp:
4656  case Intrinsic::ppc_vsx_xvtsqrtsp:
4657  return true;
4658  }
4659  return false;
4660 }
4661 
4662 bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4663  assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4664  // We are looking for following patterns, where `truncate to i1` actually has
4665  // the same semantic with `and 1`.
4666  // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4667  // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4668  // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4669  // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4670  // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4671  // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4672  // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4673  // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4674  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4675  if (CC != ISD::SETEQ && CC != ISD::SETNE)
4676  return false;
4677 
4678  SDValue CmpRHS = N->getOperand(3);
4679  if (!isa<ConstantSDNode>(CmpRHS) ||
4680  cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
4681  return false;
4682 
4683  SDValue CmpLHS = N->getOperand(2);
4684  if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4685  return false;
4686 
4687  unsigned PCC = 0;
4688  bool IsCCNE = CC == ISD::SETNE;
4689  if (CmpLHS.getOpcode() == ISD::AND &&
4690  isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4691  switch (CmpLHS.getConstantOperandVal(1)) {
4692  case 1:
4693  PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4694  break;
4695  case 2:
4696  PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4697  break;
4698  case 4:
4699  PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4700  break;
4701  case 8:
4702  PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4703  break;
4704  default:
4705  return false;
4706  }
4707  else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4708  CmpLHS.getValueType() == MVT::i1)
4709  PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4710 
4711  if (PCC) {
4712  SDLoc dl(N);
4713  SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4714  N->getOperand(0)};
4715  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4716  return true;
4717  }
4718  return false;
4719 }
4720 
4721 bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4722  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4723  unsigned Imm;
4724  if (!isInt32Immediate(N->getOperand(1), Imm))
4725  return false;
4726 
4727  SDLoc dl(N);
4728  SDValue Val = N->getOperand(0);
4729  unsigned SH, MB, ME;
4730  // If this is an and of a value rotated between 0 and 31 bits and then and'd
4731  // with a mask, emit rlwinm
4732  if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4733  Val = Val.getOperand(0);
4734  SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4735  getI32Imm(ME, dl)};
4736  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4737  return true;
4738  }
4739 
4740  // If this is just a masked value where the input is not handled, and
4741  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4742  if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4743  SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4744  getI32Imm(ME, dl)};
4745  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4746  return true;