LLVM  3.7.0
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a pattern matching instruction selector for PowerPC,
11 // converting from a legalized dag to a PPC dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "PPC.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCTargetMachine.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/GlobalAlias.h"
27 #include "llvm/IR/GlobalValue.h"
28 #include "llvm/IR/GlobalVariable.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/Module.h"
32 #include "llvm/Support/Debug.h"
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "ppc-codegen"
40 
41 // FIXME: Remove this once the bug has been fixed!
42 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
43 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
44 
45 static cl::opt<bool>
46  UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
47  cl::desc("use aggressive ppc isel for bit permutations"),
48  cl::Hidden);
50  "ppc-bit-perm-rewriter-stress-rotates",
51  cl::desc("stress rotate selection in aggressive ppc isel for "
52  "bit permutations"),
53  cl::Hidden);
54 
55 namespace llvm {
57 }
58 
59 namespace {
60  //===--------------------------------------------------------------------===//
61  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
62  /// instructions for SelectionDAG operations.
63  ///
64  class PPCDAGToDAGISel : public SelectionDAGISel {
65  const PPCTargetMachine &TM;
66  const PPCSubtarget *PPCSubTarget;
67  const PPCTargetLowering *PPCLowering;
68  unsigned GlobalBaseReg;
69  public:
70  explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
71  : SelectionDAGISel(tm), TM(tm) {
73  }
74 
75  bool runOnMachineFunction(MachineFunction &MF) override {
76  // Make sure we re-emit a set of the global base reg if necessary
77  GlobalBaseReg = 0;
78  PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
79  PPCLowering = PPCSubTarget->getTargetLowering();
81 
82  if (!PPCSubTarget->isSVR4ABI())
83  InsertVRSaveCode(MF);
84 
85  return true;
86  }
87 
88  void PreprocessISelDAG() override;
89  void PostprocessISelDAG() override;
90 
91  /// getI32Imm - Return a target constant with the specified value, of type
92  /// i32.
93  inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
94  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
95  }
96 
97  /// getI64Imm - Return a target constant with the specified value, of type
98  /// i64.
99  inline SDValue getI64Imm(uint64_t Imm, SDLoc dl) {
100  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
101  }
102 
103  /// getSmallIPtrImm - Return a target constant of pointer type.
104  inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) {
105  return CurDAG->getTargetConstant(
106  Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
107  }
108 
109  /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
110  /// rotate and mask opcode and mask operation.
111  static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
112  unsigned &SH, unsigned &MB, unsigned &ME);
113 
114  /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
115  /// base register. Return the virtual register that holds this value.
116  SDNode *getGlobalBaseReg();
117 
118  SDNode *getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
119 
120  // Select - Convert the specified operand from a target-independent to a
121  // target-specific node if it hasn't already been changed.
122  SDNode *Select(SDNode *N) override;
123 
124  SDNode *SelectBitfieldInsert(SDNode *N);
125  SDNode *SelectBitPermutation(SDNode *N);
126 
127  /// SelectCC - Select a comparison of the specified values with the
128  /// specified condition code, returning the CR# of the expression.
129  SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl);
130 
131  /// SelectAddrImm - Returns true if the address N can be represented by
132  /// a base register plus a signed 16-bit displacement [r+imm].
133  bool SelectAddrImm(SDValue N, SDValue &Disp,
134  SDValue &Base) {
135  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
136  }
137 
138  /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
139  /// immediate field. Note that the operand at this point is already the
140  /// result of a prior SelectAddressRegImm call.
141  bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
142  if (N.getOpcode() == ISD::TargetConstant ||
144  Out = N;
145  return true;
146  }
147 
148  return false;
149  }
150 
151  /// SelectAddrIdx - Given the specified addressed, check to see if it can be
152  /// represented as an indexed [r+r] operation. Returns false if it can
153  /// be represented by [r+imm], which are preferred.
154  bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
155  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
156  }
157 
158  /// SelectAddrIdxOnly - Given the specified addressed, force it to be
159  /// represented as an indexed [r+r] operation.
160  bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
161  return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
162  }
163 
164  /// SelectAddrImmX4 - Returns true if the address N can be represented by
165  /// a base register plus a signed 16-bit displacement that is a multiple of 4.
166  /// Suitable for use by STD and friends.
167  bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
168  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
169  }
170 
171  // Select an address into a single register.
172  bool SelectAddr(SDValue N, SDValue &Base) {
173  Base = N;
174  return true;
175  }
176 
177  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
178  /// inline asm expressions. It is always correct to compute the value into
179  /// a register. The case of adding a (possibly relocatable) constant to a
180  /// register can be improved, but it is wrong to substitute Reg+Reg for
181  /// Reg in an asm, because the load or store opcode would have to change.
182  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
183  unsigned ConstraintID,
184  std::vector<SDValue> &OutOps) override {
185 
186  switch(ConstraintID) {
187  default:
188  errs() << "ConstraintID: " << ConstraintID << "\n";
189  llvm_unreachable("Unexpected asm memory constraint");
197  // We need to make sure that this one operand does not end up in r0
198  // (because we might end up lowering this as 0(%op)).
199  const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
200  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
201  SDLoc dl(Op);
202  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
203  SDValue NewOp =
204  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
205  dl, Op.getValueType(),
206  Op, RC), 0);
207 
208  OutOps.push_back(NewOp);
209  return false;
210  }
211  return true;
212  }
213 
214  void InsertVRSaveCode(MachineFunction &MF);
215 
216  const char *getPassName() const override {
217  return "PowerPC DAG->DAG Pattern Instruction Selection";
218  }
219 
220 // Include the pieces autogenerated from the target description.
221 #include "PPCGenDAGISel.inc"
222 
223 private:
224  SDNode *SelectSETCC(SDNode *N);
225 
226  void PeepholePPC64();
227  void PeepholePPC64ZExt();
228  void PeepholeCROps();
229 
230  SDValue combineToCMPB(SDNode *N);
231  void foldBoolExts(SDValue &Res, SDNode *&N);
232 
233  bool AllUsersSelectZero(SDNode *N);
234  void SwapAllSelectUsers(SDNode *N);
235 
236  SDNode *transferMemOperands(SDNode *N, SDNode *Result);
237  };
238 }
239 
240 /// InsertVRSaveCode - Once the entire function has been instruction selected,
241 /// all virtual registers are created and all machine instructions are built,
242 /// check to see if we need to save/restore VRSAVE. If so, do it.
243 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
244  // Check to see if this function uses vector registers, which means we have to
245  // save and restore the VRSAVE register and update it with the regs we use.
246  //
247  // In this case, there will be virtual registers of vector type created
248  // by the scheduler. Detect them now.
249  bool HasVectorVReg = false;
250  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
252  if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
253  HasVectorVReg = true;
254  break;
255  }
256  }
257  if (!HasVectorVReg) return; // nothing to do.
258 
259  // If we have a vector register, we want to emit code into the entry and exit
260  // blocks to save and restore the VRSAVE register. We do this here (instead
261  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
262  //
263  // 1. This (trivially) reduces the load on the register allocator, by not
264  // having to represent the live range of the VRSAVE register.
265  // 2. This (more significantly) allows us to create a temporary virtual
266  // register to hold the saved VRSAVE value, allowing this temporary to be
267  // register allocated, instead of forcing it to be spilled to the stack.
268 
269  // Create two vregs - one to hold the VRSAVE register that is live-in to the
270  // function and one for the value after having bits or'd into it.
271  unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
272  unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
273 
274  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
275  MachineBasicBlock &EntryBB = *Fn.begin();
276  DebugLoc dl;
277  // Emit the following code into the entry block:
278  // InVRSAVE = MFVRSAVE
279  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
280  // MTVRSAVE UpdatedVRSAVE
281  MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
282  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
283  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
284  UpdatedVRSAVE).addReg(InVRSAVE);
285  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
286 
287  // Find all return blocks, outputting a restore in each epilog.
288  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
289  if (!BB->empty() && BB->back().isReturn()) {
290  IP = BB->end(); --IP;
291 
292  // Skip over all terminator instructions, which are part of the return
293  // sequence.
295  while (I2 != BB->begin() && (--I2)->isTerminator())
296  IP = I2;
297 
298  // Emit: MTVRSAVE InVRSave
299  BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
300  }
301  }
302 }
303 
304 
305 /// getGlobalBaseReg - Output the instructions required to put the
306 /// base address to use for accessing globals into a register.
307 ///
308 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
309  if (!GlobalBaseReg) {
310  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
311  // Insert the set of GlobalBaseReg into the first MBB of the function
312  MachineBasicBlock &FirstMBB = MF->front();
313  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
314  const Module *M = MF->getFunction()->getParent();
315  DebugLoc dl;
316 
317  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
318  if (PPCSubTarget->isTargetELF()) {
319  GlobalBaseReg = PPC::R30;
320  if (M->getPICLevel() == PICLevel::Small) {
321  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
322  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
323  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
324  } else {
325  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
326  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
327  unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
328  BuildMI(FirstMBB, MBBI, dl,
329  TII.get(PPC::UpdateGBR), GlobalBaseReg)
330  .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
331  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
332  }
333  } else {
334  GlobalBaseReg =
335  RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
336  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
337  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
338  }
339  } else {
340  GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
341  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
342  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
343  }
344  }
345  return CurDAG->getRegister(GlobalBaseReg,
346  PPCLowering->getPointerTy(CurDAG->getDataLayout()))
347  .getNode();
348 }
349 
350 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
351 /// or 64-bit immediate, and if the value can be accurately represented as a
352 /// sign extension from a 16-bit value. If so, this returns true and the
353 /// immediate.
354 static bool isIntS16Immediate(SDNode *N, short &Imm) {
355  if (N->getOpcode() != ISD::Constant)
356  return false;
357 
358  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
359  if (N->getValueType(0) == MVT::i32)
360  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
361  else
362  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
363 }
364 
365 static bool isIntS16Immediate(SDValue Op, short &Imm) {
366  return isIntS16Immediate(Op.getNode(), Imm);
367 }
368 
369 
370 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
371 /// operand. If so Imm will receive the 32-bit value.
372 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
373  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
374  Imm = cast<ConstantSDNode>(N)->getZExtValue();
375  return true;
376  }
377  return false;
378 }
379 
380 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
381 /// operand. If so Imm will receive the 64-bit value.
382 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
383  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
384  Imm = cast<ConstantSDNode>(N)->getZExtValue();
385  return true;
386  }
387  return false;
388 }
389 
390 // isInt32Immediate - This method tests to see if a constant operand.
391 // If so Imm will receive the 32 bit value.
392 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
393  return isInt32Immediate(N.getNode(), Imm);
394 }
395 
396 
397 // isOpcWithIntImmediate - This method tests to see if the node is a specific
398 // opcode and that it has a immediate integer right operand.
399 // If so Imm will receive the 32 bit value.
400 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
401  return N->getOpcode() == Opc
402  && isInt32Immediate(N->getOperand(1).getNode(), Imm);
403 }
404 
405 SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
406  SDLoc dl(SN);
407  int FI = cast<FrameIndexSDNode>(N)->getIndex();
408  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
409  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
410  if (SN->hasOneUse())
411  return CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
412  getSmallIPtrImm(Offset, dl));
413  return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
414  getSmallIPtrImm(Offset, dl));
415 }
416 
417 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
418  bool isShiftMask, unsigned &SH,
419  unsigned &MB, unsigned &ME) {
420  // Don't even go down this path for i64, since different logic will be
421  // necessary for rldicl/rldicr/rldimi.
422  if (N->getValueType(0) != MVT::i32)
423  return false;
424 
425  unsigned Shift = 32;
426  unsigned Indeterminant = ~0; // bit mask marking indeterminant results
427  unsigned Opcode = N->getOpcode();
428  if (N->getNumOperands() != 2 ||
429  !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
430  return false;
431 
432  if (Opcode == ISD::SHL) {
433  // apply shift left to mask if it comes first
434  if (isShiftMask) Mask = Mask << Shift;
435  // determine which bits are made indeterminant by shift
436  Indeterminant = ~(0xFFFFFFFFu << Shift);
437  } else if (Opcode == ISD::SRL) {
438  // apply shift right to mask if it comes first
439  if (isShiftMask) Mask = Mask >> Shift;
440  // determine which bits are made indeterminant by shift
441  Indeterminant = ~(0xFFFFFFFFu >> Shift);
442  // adjust for the left rotate
443  Shift = 32 - Shift;
444  } else if (Opcode == ISD::ROTL) {
445  Indeterminant = 0;
446  } else {
447  return false;
448  }
449 
450  // if the mask doesn't intersect any Indeterminant bits
451  if (Mask && !(Mask & Indeterminant)) {
452  SH = Shift & 31;
453  // make sure the mask is still a mask (wrap arounds may not be)
454  return isRunOfOnes(Mask, MB, ME);
455  }
456  return false;
457 }
458 
459 /// SelectBitfieldInsert - turn an or of two masked values into
460 /// the rotate left word immediate then mask insert (rlwimi) instruction.
461 SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
462  SDValue Op0 = N->getOperand(0);
463  SDValue Op1 = N->getOperand(1);
464  SDLoc dl(N);
465 
466  APInt LKZ, LKO, RKZ, RKO;
467  CurDAG->computeKnownBits(Op0, LKZ, LKO);
468  CurDAG->computeKnownBits(Op1, RKZ, RKO);
469 
470  unsigned TargetMask = LKZ.getZExtValue();
471  unsigned InsertMask = RKZ.getZExtValue();
472 
473  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
474  unsigned Op0Opc = Op0.getOpcode();
475  unsigned Op1Opc = Op1.getOpcode();
476  unsigned Value, SH = 0;
477  TargetMask = ~TargetMask;
478  InsertMask = ~InsertMask;
479 
480  // If the LHS has a foldable shift and the RHS does not, then swap it to the
481  // RHS so that we can fold the shift into the insert.
482  if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
483  if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
484  Op0.getOperand(0).getOpcode() == ISD::SRL) {
485  if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
486  Op1.getOperand(0).getOpcode() != ISD::SRL) {
487  std::swap(Op0, Op1);
488  std::swap(Op0Opc, Op1Opc);
489  std::swap(TargetMask, InsertMask);
490  }
491  }
492  } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
493  if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
494  Op1.getOperand(0).getOpcode() != ISD::SRL) {
495  std::swap(Op0, Op1);
496  std::swap(Op0Opc, Op1Opc);
497  std::swap(TargetMask, InsertMask);
498  }
499  }
500 
501  unsigned MB, ME;
502  if (isRunOfOnes(InsertMask, MB, ME)) {
503  SDValue Tmp1, Tmp2;
504 
505  if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
506  isInt32Immediate(Op1.getOperand(1), Value)) {
507  Op1 = Op1.getOperand(0);
508  SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
509  }
510  if (Op1Opc == ISD::AND) {
511  // The AND mask might not be a constant, and we need to make sure that
512  // if we're going to fold the masking with the insert, all bits not
513  // know to be zero in the mask are known to be one.
514  APInt MKZ, MKO;
515  CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
516  bool CanFoldMask = InsertMask == MKO.getZExtValue();
517 
518  unsigned SHOpc = Op1.getOperand(0).getOpcode();
519  if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
520  isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
521  // Note that Value must be in range here (less than 32) because
522  // otherwise there would not be any bits set in InsertMask.
523  Op1 = Op1.getOperand(0).getOperand(0);
524  SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
525  }
526  }
527 
528  SH &= 31;
529  SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
530  getI32Imm(ME, dl) };
531  return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
532  }
533  }
534  return nullptr;
535 }
536 
537 // Predict the number of instructions that would be generated by calling
538 // SelectInt64(N).
539 static unsigned SelectInt64CountDirect(int64_t Imm) {
540  // Assume no remaining bits.
541  unsigned Remainder = 0;
542  // Assume no shift required.
543  unsigned Shift = 0;
544 
545  // If it can't be represented as a 32 bit value.
546  if (!isInt<32>(Imm)) {
547  Shift = countTrailingZeros<uint64_t>(Imm);
548  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
549 
550  // If the shifted value fits 32 bits.
551  if (isInt<32>(ImmSh)) {
552  // Go with the shifted value.
553  Imm = ImmSh;
554  } else {
555  // Still stuck with a 64 bit value.
556  Remainder = Imm;
557  Shift = 32;
558  Imm >>= 32;
559  }
560  }
561 
562  // Intermediate operand.
563  unsigned Result = 0;
564 
565  // Handle first 32 bits.
566  unsigned Lo = Imm & 0xFFFF;
567  unsigned Hi = (Imm >> 16) & 0xFFFF;
568 
569  // Simple value.
570  if (isInt<16>(Imm)) {
571  // Just the Lo bits.
572  ++Result;
573  } else if (Lo) {
574  // Handle the Hi bits and Lo bits.
575  Result += 2;
576  } else {
577  // Just the Hi bits.
578  ++Result;
579  }
580 
581  // If no shift, we're done.
582  if (!Shift) return Result;
583 
584  // Shift for next step if the upper 32-bits were not zero.
585  if (Imm)
586  ++Result;
587 
588  // Add in the last bits as required.
589  if ((Hi = (Remainder >> 16) & 0xFFFF))
590  ++Result;
591  if ((Lo = Remainder & 0xFFFF))
592  ++Result;
593 
594  return Result;
595 }
596 
597 static uint64_t Rot64(uint64_t Imm, unsigned R) {
598  return (Imm << R) | (Imm >> (64 - R));
599 }
600 
601 static unsigned SelectInt64Count(int64_t Imm) {
602  unsigned Count = SelectInt64CountDirect(Imm);
603  if (Count == 1)
604  return Count;
605 
606  for (unsigned r = 1; r < 63; ++r) {
607  uint64_t RImm = Rot64(Imm, r);
608  unsigned RCount = SelectInt64CountDirect(RImm) + 1;
609  Count = std::min(Count, RCount);
610 
611  // See comments in SelectInt64 for an explanation of the logic below.
612  unsigned LS = findLastSet(RImm);
613  if (LS != r-1)
614  continue;
615 
616  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
617  uint64_t RImmWithOnes = RImm | OnesMask;
618 
619  RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
620  Count = std::min(Count, RCount);
621  }
622 
623  return Count;
624 }
625 
626 // Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count
627 // (above) needs to be kept in sync with this function.
628 static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
629  // Assume no remaining bits.
630  unsigned Remainder = 0;
631  // Assume no shift required.
632  unsigned Shift = 0;
633 
634  // If it can't be represented as a 32 bit value.
635  if (!isInt<32>(Imm)) {
636  Shift = countTrailingZeros<uint64_t>(Imm);
637  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
638 
639  // If the shifted value fits 32 bits.
640  if (isInt<32>(ImmSh)) {
641  // Go with the shifted value.
642  Imm = ImmSh;
643  } else {
644  // Still stuck with a 64 bit value.
645  Remainder = Imm;
646  Shift = 32;
647  Imm >>= 32;
648  }
649  }
650 
651  // Intermediate operand.
652  SDNode *Result;
653 
654  // Handle first 32 bits.
655  unsigned Lo = Imm & 0xFFFF;
656  unsigned Hi = (Imm >> 16) & 0xFFFF;
657 
658  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
659  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
660  };
661 
662  // Simple value.
663  if (isInt<16>(Imm)) {
664  // Just the Lo bits.
665  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
666  } else if (Lo) {
667  // Handle the Hi bits.
668  unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
669  Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
670  // And Lo bits.
671  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
672  SDValue(Result, 0), getI32Imm(Lo));
673  } else {
674  // Just the Hi bits.
675  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
676  }
677 
678  // If no shift, we're done.
679  if (!Shift) return Result;
680 
681  // Shift for next step if the upper 32-bits were not zero.
682  if (Imm) {
683  Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
684  SDValue(Result, 0),
685  getI32Imm(Shift),
686  getI32Imm(63 - Shift));
687  }
688 
689  // Add in the last bits as required.
690  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
691  Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
692  SDValue(Result, 0), getI32Imm(Hi));
693  }
694  if ((Lo = Remainder & 0xFFFF)) {
695  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
696  SDValue(Result, 0), getI32Imm(Lo));
697  }
698 
699  return Result;
700 }
701 
702 static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
703  unsigned Count = SelectInt64CountDirect(Imm);
704  if (Count == 1)
705  return SelectInt64Direct(CurDAG, dl, Imm);
706 
707  unsigned RMin = 0;
708 
709  int64_t MatImm;
710  unsigned MaskEnd;
711 
712  for (unsigned r = 1; r < 63; ++r) {
713  uint64_t RImm = Rot64(Imm, r);
714  unsigned RCount = SelectInt64CountDirect(RImm) + 1;
715  if (RCount < Count) {
716  Count = RCount;
717  RMin = r;
718  MatImm = RImm;
719  MaskEnd = 63;
720  }
721 
722  // If the immediate to generate has many trailing zeros, it might be
723  // worthwhile to generate a rotated value with too many leading ones
724  // (because that's free with li/lis's sign-extension semantics), and then
725  // mask them off after rotation.
726 
727  unsigned LS = findLastSet(RImm);
728  // We're adding (63-LS) higher-order ones, and we expect to mask them off
729  // after performing the inverse rotation by (64-r). So we need that:
730  // 63-LS == 64-r => LS == r-1
731  if (LS != r-1)
732  continue;
733 
734  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
735  uint64_t RImmWithOnes = RImm | OnesMask;
736 
737  RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
738  if (RCount < Count) {
739  Count = RCount;
740  RMin = r;
741  MatImm = RImmWithOnes;
742  MaskEnd = LS;
743  }
744  }
745 
746  if (!RMin)
747  return SelectInt64Direct(CurDAG, dl, Imm);
748 
749  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
750  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
751  };
752 
753  SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, MatImm), 0);
754  return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
755  getI32Imm(64 - RMin), getI32Imm(MaskEnd));
756 }
757 
758 // Select a 64-bit constant.
759 static SDNode *SelectInt64(SelectionDAG *CurDAG, SDNode *N) {
760  SDLoc dl(N);
761 
762  // Get 64 bit value.
763  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
764  return SelectInt64(CurDAG, dl, Imm);
765 }
766 
767 namespace {
768 class BitPermutationSelector {
769  struct ValueBit {
770  SDValue V;
771 
772  // The bit number in the value, using a convention where bit 0 is the
773  // lowest-order bit.
774  unsigned Idx;
775 
776  enum Kind {
777  ConstZero,
778  Variable
779  } K;
780 
781  ValueBit(SDValue V, unsigned I, Kind K = Variable)
782  : V(V), Idx(I), K(K) {}
783  ValueBit(Kind K = Variable)
784  : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
785 
786  bool isZero() const {
787  return K == ConstZero;
788  }
789 
790  bool hasValue() const {
791  return K == Variable;
792  }
793 
794  SDValue getValue() const {
795  assert(hasValue() && "Cannot get the value of a constant bit");
796  return V;
797  }
798 
799  unsigned getValueBitIndex() const {
800  assert(hasValue() && "Cannot get the value bit index of a constant bit");
801  return Idx;
802  }
803  };
804 
805  // A bit group has the same underlying value and the same rotate factor.
806  struct BitGroup {
807  SDValue V;
808  unsigned RLAmt;
809  unsigned StartIdx, EndIdx;
810 
811  // This rotation amount assumes that the lower 32 bits of the quantity are
812  // replicated in the high 32 bits by the rotation operator (which is done
813  // by rlwinm and friends in 64-bit mode).
814  bool Repl32;
815  // Did converting to Repl32 == true change the rotation factor? If it did,
816  // it decreased it by 32.
817  bool Repl32CR;
818  // Was this group coalesced after setting Repl32 to true?
819  bool Repl32Coalesced;
820 
821  BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
822  : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
823  Repl32Coalesced(false) {
824  DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
825  " [" << S << ", " << E << "]\n");
826  }
827  };
828 
829  // Information on each (Value, RLAmt) pair (like the number of groups
830  // associated with each) used to choose the lowering method.
831  struct ValueRotInfo {
832  SDValue V;
833  unsigned RLAmt;
834  unsigned NumGroups;
835  unsigned FirstGroupStartIdx;
836  bool Repl32;
837 
838  ValueRotInfo()
839  : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX),
840  Repl32(false) {}
841 
842  // For sorting (in reverse order) by NumGroups, and then by
843  // FirstGroupStartIdx.
844  bool operator < (const ValueRotInfo &Other) const {
845  // We need to sort so that the non-Repl32 come first because, when we're
846  // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
847  // masking operation.
848  if (Repl32 < Other.Repl32)
849  return true;
850  else if (Repl32 > Other.Repl32)
851  return false;
852  else if (NumGroups > Other.NumGroups)
853  return true;
854  else if (NumGroups < Other.NumGroups)
855  return false;
856  else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
857  return true;
858  return false;
859  }
860  };
861 
862  // Return true if something interesting was deduced, return false if we're
863  // providing only a generic representation of V (or something else likewise
864  // uninteresting for instruction selection).
865  bool getValueBits(SDValue V, SmallVector<ValueBit, 64> &Bits) {
866  switch (V.getOpcode()) {
867  default: break;
868  case ISD::ROTL:
869  if (isa<ConstantSDNode>(V.getOperand(1))) {
870  unsigned RotAmt = V.getConstantOperandVal(1);
871 
872  SmallVector<ValueBit, 64> LHSBits(Bits.size());
873  getValueBits(V.getOperand(0), LHSBits);
874 
875  for (unsigned i = 0; i < Bits.size(); ++i)
876  Bits[i] = LHSBits[i < RotAmt ? i + (Bits.size() - RotAmt) : i - RotAmt];
877 
878  return true;
879  }
880  break;
881  case ISD::SHL:
882  if (isa<ConstantSDNode>(V.getOperand(1))) {
883  unsigned ShiftAmt = V.getConstantOperandVal(1);
884 
885  SmallVector<ValueBit, 64> LHSBits(Bits.size());
886  getValueBits(V.getOperand(0), LHSBits);
887 
888  for (unsigned i = ShiftAmt; i < Bits.size(); ++i)
889  Bits[i] = LHSBits[i - ShiftAmt];
890 
891  for (unsigned i = 0; i < ShiftAmt; ++i)
892  Bits[i] = ValueBit(ValueBit::ConstZero);
893 
894  return true;
895  }
896  break;
897  case ISD::SRL:
898  if (isa<ConstantSDNode>(V.getOperand(1))) {
899  unsigned ShiftAmt = V.getConstantOperandVal(1);
900 
901  SmallVector<ValueBit, 64> LHSBits(Bits.size());
902  getValueBits(V.getOperand(0), LHSBits);
903 
904  for (unsigned i = 0; i < Bits.size() - ShiftAmt; ++i)
905  Bits[i] = LHSBits[i + ShiftAmt];
906 
907  for (unsigned i = Bits.size() - ShiftAmt; i < Bits.size(); ++i)
908  Bits[i] = ValueBit(ValueBit::ConstZero);
909 
910  return true;
911  }
912  break;
913  case ISD::AND:
914  if (isa<ConstantSDNode>(V.getOperand(1))) {
915  uint64_t Mask = V.getConstantOperandVal(1);
916 
917  SmallVector<ValueBit, 64> LHSBits(Bits.size());
918  bool LHSTrivial = getValueBits(V.getOperand(0), LHSBits);
919 
920  for (unsigned i = 0; i < Bits.size(); ++i)
921  if (((Mask >> i) & 1) == 1)
922  Bits[i] = LHSBits[i];
923  else
924  Bits[i] = ValueBit(ValueBit::ConstZero);
925 
926  // Mark this as interesting, only if the LHS was also interesting. This
927  // prevents the overall procedure from matching a single immediate 'and'
928  // (which is non-optimal because such an and might be folded with other
929  // things if we don't select it here).
930  return LHSTrivial;
931  }
932  break;
933  case ISD::OR: {
934  SmallVector<ValueBit, 64> LHSBits(Bits.size()), RHSBits(Bits.size());
935  getValueBits(V.getOperand(0), LHSBits);
936  getValueBits(V.getOperand(1), RHSBits);
937 
938  bool AllDisjoint = true;
939  for (unsigned i = 0; i < Bits.size(); ++i)
940  if (LHSBits[i].isZero())
941  Bits[i] = RHSBits[i];
942  else if (RHSBits[i].isZero())
943  Bits[i] = LHSBits[i];
944  else {
945  AllDisjoint = false;
946  break;
947  }
948 
949  if (!AllDisjoint)
950  break;
951 
952  return true;
953  }
954  }
955 
956  for (unsigned i = 0; i < Bits.size(); ++i)
957  Bits[i] = ValueBit(V, i);
958 
959  return false;
960  }
961 
962  // For each value (except the constant ones), compute the left-rotate amount
963  // to get it from its original to final position.
964  void computeRotationAmounts() {
965  HasZeros = false;
966  RLAmt.resize(Bits.size());
967  for (unsigned i = 0; i < Bits.size(); ++i)
968  if (Bits[i].hasValue()) {
969  unsigned VBI = Bits[i].getValueBitIndex();
970  if (i >= VBI)
971  RLAmt[i] = i - VBI;
972  else
973  RLAmt[i] = Bits.size() - (VBI - i);
974  } else if (Bits[i].isZero()) {
975  HasZeros = true;
976  RLAmt[i] = UINT32_MAX;
977  } else {
978  llvm_unreachable("Unknown value bit type");
979  }
980  }
981 
982  // Collect groups of consecutive bits with the same underlying value and
983  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
984  // they break up groups.
985  void collectBitGroups(bool LateMask) {
986  BitGroups.clear();
987 
988  unsigned LastRLAmt = RLAmt[0];
989  SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
990  unsigned LastGroupStartIdx = 0;
991  for (unsigned i = 1; i < Bits.size(); ++i) {
992  unsigned ThisRLAmt = RLAmt[i];
993  SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
994  if (LateMask && !ThisValue) {
995  ThisValue = LastValue;
996  ThisRLAmt = LastRLAmt;
997  // If we're doing late masking, then the first bit group always starts
998  // at zero (even if the first bits were zero).
999  if (BitGroups.empty())
1000  LastGroupStartIdx = 0;
1001  }
1002 
1003  // If this bit has the same underlying value and the same rotate factor as
1004  // the last one, then they're part of the same group.
1005  if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1006  continue;
1007 
1008  if (LastValue.getNode())
1009  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1010  i-1));
1011  LastRLAmt = ThisRLAmt;
1012  LastValue = ThisValue;
1013  LastGroupStartIdx = i;
1014  }
1015  if (LastValue.getNode())
1016  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1017  Bits.size()-1));
1018 
1019  if (BitGroups.empty())
1020  return;
1021 
1022  // We might be able to combine the first and last groups.
1023  if (BitGroups.size() > 1) {
1024  // If the first and last groups are the same, then remove the first group
1025  // in favor of the last group, making the ending index of the last group
1026  // equal to the ending index of the to-be-removed first group.
1027  if (BitGroups[0].StartIdx == 0 &&
1028  BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1029  BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1030  BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1031  DEBUG(dbgs() << "\tcombining final bit group with inital one\n");
1032  BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1033  BitGroups.erase(BitGroups.begin());
1034  }
1035  }
1036  }
1037 
1038  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1039  // associated with each. If there is a degeneracy, pick the one that occurs
1040  // first (in the final value).
1041  void collectValueRotInfo() {
1042  ValueRots.clear();
1043 
1044  for (auto &BG : BitGroups) {
1045  unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1046  ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1047  VRI.V = BG.V;
1048  VRI.RLAmt = BG.RLAmt;
1049  VRI.Repl32 = BG.Repl32;
1050  VRI.NumGroups += 1;
1051  VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1052  }
1053 
1054  // Now that we've collected the various ValueRotInfo instances, we need to
1055  // sort them.
1056  ValueRotsVec.clear();
1057  for (auto &I : ValueRots) {
1058  ValueRotsVec.push_back(I.second);
1059  }
1060  std::sort(ValueRotsVec.begin(), ValueRotsVec.end());
1061  }
1062 
1063  // In 64-bit mode, rlwinm and friends have a rotation operator that
1064  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1065  // indices of these instructions can only be in the lower 32 bits, so they
1066  // can only represent some 64-bit bit groups. However, when they can be used,
1067  // the 32-bit replication can be used to represent, as a single bit group,
1068  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1069  // groups when possible. Returns true if any of the bit groups were
1070  // converted.
1071  void assignRepl32BitGroups() {
1072  // If we have bits like this:
1073  //
1074  // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1075  // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1076  // Groups: | RLAmt = 8 | RLAmt = 40 |
1077  //
1078  // But, making use of a 32-bit operation that replicates the low-order 32
1079  // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1080  // of 8.
1081 
1082  auto IsAllLow32 = [this](BitGroup & BG) {
1083  if (BG.StartIdx <= BG.EndIdx) {
1084  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1085  if (!Bits[i].hasValue())
1086  continue;
1087  if (Bits[i].getValueBitIndex() >= 32)
1088  return false;
1089  }
1090  } else {
1091  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1092  if (!Bits[i].hasValue())
1093  continue;
1094  if (Bits[i].getValueBitIndex() >= 32)
1095  return false;
1096  }
1097  for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1098  if (!Bits[i].hasValue())
1099  continue;
1100  if (Bits[i].getValueBitIndex() >= 32)
1101  return false;
1102  }
1103  }
1104 
1105  return true;
1106  };
1107 
1108  for (auto &BG : BitGroups) {
1109  if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1110  if (IsAllLow32(BG)) {
1111  if (BG.RLAmt >= 32) {
1112  BG.RLAmt -= 32;
1113  BG.Repl32CR = true;
1114  }
1115 
1116  BG.Repl32 = true;
1117 
1118  DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
1119  BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
1120  " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1121  }
1122  }
1123  }
1124 
1125  // Now walk through the bit groups, consolidating where possible.
1126  for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1127  // We might want to remove this bit group by merging it with the previous
1128  // group (which might be the ending group).
1129  auto IP = (I == BitGroups.begin()) ?
1130  std::prev(BitGroups.end()) : std::prev(I);
1131  if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1132  I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1133 
1134  DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
1135  I->V.getNode() << " RLAmt = " << I->RLAmt <<
1136  " [" << I->StartIdx << ", " << I->EndIdx <<
1137  "] with group with range [" <<
1138  IP->StartIdx << ", " << IP->EndIdx << "]\n");
1139 
1140  IP->EndIdx = I->EndIdx;
1141  IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1142  IP->Repl32Coalesced = true;
1143  I = BitGroups.erase(I);
1144  continue;
1145  } else {
1146  // There is a special case worth handling: If there is a single group
1147  // covering the entire upper 32 bits, and it can be merged with both
1148  // the next and previous groups (which might be the same group), then
1149  // do so. If it is the same group (so there will be only one group in
1150  // total), then we need to reverse the order of the range so that it
1151  // covers the entire 64 bits.
1152  if (I->StartIdx == 32 && I->EndIdx == 63) {
1153  assert(std::next(I) == BitGroups.end() &&
1154  "bit group ends at index 63 but there is another?");
1155  auto IN = BitGroups.begin();
1156 
1157  if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1158  (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1159  IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1160  IsAllLow32(*I)) {
1161 
1162  DEBUG(dbgs() << "\tcombining bit group for " <<
1163  I->V.getNode() << " RLAmt = " << I->RLAmt <<
1164  " [" << I->StartIdx << ", " << I->EndIdx <<
1165  "] with 32-bit replicated groups with ranges [" <<
1166  IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
1167  IN->StartIdx << ", " << IN->EndIdx << "]\n");
1168 
1169  if (IP == IN) {
1170  // There is only one other group; change it to cover the whole
1171  // range (backward, so that it can still be Repl32 but cover the
1172  // whole 64-bit range).
1173  IP->StartIdx = 31;
1174  IP->EndIdx = 30;
1175  IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1176  IP->Repl32Coalesced = true;
1177  I = BitGroups.erase(I);
1178  } else {
1179  // There are two separate groups, one before this group and one
1180  // after us (at the beginning). We're going to remove this group,
1181  // but also the group at the very beginning.
1182  IP->EndIdx = IN->EndIdx;
1183  IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1184  IP->Repl32Coalesced = true;
1185  I = BitGroups.erase(I);
1186  BitGroups.erase(BitGroups.begin());
1187  }
1188 
1189  // This must be the last group in the vector (and we might have
1190  // just invalidated the iterator above), so break here.
1191  break;
1192  }
1193  }
1194  }
1195 
1196  ++I;
1197  }
1198  }
1199 
1200  SDValue getI32Imm(unsigned Imm, SDLoc dl) {
1201  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1202  }
1203 
1204  uint64_t getZerosMask() {
1205  uint64_t Mask = 0;
1206  for (unsigned i = 0; i < Bits.size(); ++i) {
1207  if (Bits[i].hasValue())
1208  continue;
1209  Mask |= (UINT64_C(1) << i);
1210  }
1211 
1212  return ~Mask;
1213  }
1214 
1215  // Depending on the number of groups for a particular value, it might be
1216  // better to rotate, mask explicitly (using andi/andis), and then or the
1217  // result. Select this part of the result first.
1218  void SelectAndParts32(SDLoc dl, SDValue &Res, unsigned *InstCnt) {
1220  return;
1221 
1222  for (ValueRotInfo &VRI : ValueRotsVec) {
1223  unsigned Mask = 0;
1224  for (unsigned i = 0; i < Bits.size(); ++i) {
1225  if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1226  continue;
1227  if (RLAmt[i] != VRI.RLAmt)
1228  continue;
1229  Mask |= (1u << i);
1230  }
1231 
1232  // Compute the masks for andi/andis that would be necessary.
1233  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1234  assert((ANDIMask != 0 || ANDISMask != 0) &&
1235  "No set bits in mask for value bit groups");
1236  bool NeedsRotate = VRI.RLAmt != 0;
1237 
1238  // We're trying to minimize the number of instructions. If we have one
1239  // group, using one of andi/andis can break even. If we have three
1240  // groups, we can use both andi and andis and break even (to use both
1241  // andi and andis we also need to or the results together). We need four
1242  // groups if we also need to rotate. To use andi/andis we need to do more
1243  // than break even because rotate-and-mask instructions tend to be easier
1244  // to schedule.
1245 
1246  // FIXME: We've biased here against using andi/andis, which is right for
1247  // POWER cores, but not optimal everywhere. For example, on the A2,
1248  // andi/andis have single-cycle latency whereas the rotate-and-mask
1249  // instructions take two cycles, and it would be better to bias toward
1250  // andi/andis in break-even cases.
1251 
1252  unsigned NumAndInsts = (unsigned) NeedsRotate +
1253  (unsigned) (ANDIMask != 0) +
1254  (unsigned) (ANDISMask != 0) +
1255  (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1256  (unsigned) (bool) Res;
1257 
1258  DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1259  " RL: " << VRI.RLAmt << ":" <<
1260  "\n\t\t\tisel using masking: " << NumAndInsts <<
1261  " using rotates: " << VRI.NumGroups << "\n");
1262 
1263  if (NumAndInsts >= VRI.NumGroups)
1264  continue;
1265 
1266  DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1267 
1268  if (InstCnt) *InstCnt += NumAndInsts;
1269 
1270  SDValue VRot;
1271  if (VRI.RLAmt) {
1272  SDValue Ops[] =
1273  { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1274  getI32Imm(31, dl) };
1275  VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1276  Ops), 0);
1277  } else {
1278  VRot = VRI.V;
1279  }
1280 
1281  SDValue ANDIVal, ANDISVal;
1282  if (ANDIMask != 0)
1283  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1284  VRot, getI32Imm(ANDIMask, dl)), 0);
1285  if (ANDISMask != 0)
1286  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1287  VRot, getI32Imm(ANDISMask, dl)), 0);
1288 
1289  SDValue TotalVal;
1290  if (!ANDIVal)
1291  TotalVal = ANDISVal;
1292  else if (!ANDISVal)
1293  TotalVal = ANDIVal;
1294  else
1295  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1296  ANDIVal, ANDISVal), 0);
1297 
1298  if (!Res)
1299  Res = TotalVal;
1300  else
1301  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1302  Res, TotalVal), 0);
1303 
1304  // Now, remove all groups with this underlying value and rotation
1305  // factor.
1306  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1307  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1308  });
1309  }
1310  }
1311 
1312  // Instruction selection for the 32-bit case.
1313  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1314  SDLoc dl(N);
1315  SDValue Res;
1316 
1317  if (InstCnt) *InstCnt = 0;
1318 
1319  // Take care of cases that should use andi/andis first.
1320  SelectAndParts32(dl, Res, InstCnt);
1321 
1322  // If we've not yet selected a 'starting' instruction, and we have no zeros
1323  // to fill in, select the (Value, RLAmt) with the highest priority (largest
1324  // number of groups), and start with this rotated value.
1325  if ((!HasZeros || LateMask) && !Res) {
1326  ValueRotInfo &VRI = ValueRotsVec[0];
1327  if (VRI.RLAmt) {
1328  if (InstCnt) *InstCnt += 1;
1329  SDValue Ops[] =
1330  { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1331  getI32Imm(31, dl) };
1332  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1333  0);
1334  } else {
1335  Res = VRI.V;
1336  }
1337 
1338  // Now, remove all groups with this underlying value and rotation factor.
1339  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1340  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1341  });
1342  }
1343 
1344  if (InstCnt) *InstCnt += BitGroups.size();
1345 
1346  // Insert the other groups (one at a time).
1347  for (auto &BG : BitGroups) {
1348  if (!Res) {
1349  SDValue Ops[] =
1350  { BG.V, getI32Imm(BG.RLAmt, dl),
1351  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1352  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1353  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1354  } else {
1355  SDValue Ops[] =
1356  { Res, BG.V, getI32Imm(BG.RLAmt, dl),
1357  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1358  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1359  Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1360  }
1361  }
1362 
1363  if (LateMask) {
1364  unsigned Mask = (unsigned) getZerosMask();
1365 
1366  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1367  assert((ANDIMask != 0 || ANDISMask != 0) &&
1368  "No set bits in zeros mask?");
1369 
1370  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1371  (unsigned) (ANDISMask != 0) +
1372  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1373 
1374  SDValue ANDIVal, ANDISVal;
1375  if (ANDIMask != 0)
1376  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1377  Res, getI32Imm(ANDIMask, dl)), 0);
1378  if (ANDISMask != 0)
1379  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1380  Res, getI32Imm(ANDISMask, dl)), 0);
1381 
1382  if (!ANDIVal)
1383  Res = ANDISVal;
1384  else if (!ANDISVal)
1385  Res = ANDIVal;
1386  else
1387  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1388  ANDIVal, ANDISVal), 0);
1389  }
1390 
1391  return Res.getNode();
1392  }
1393 
1394  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1395  unsigned MaskStart, unsigned MaskEnd,
1396  bool IsIns) {
1397  // In the notation used by the instructions, 'start' and 'end' are reversed
1398  // because bits are counted from high to low order.
1399  unsigned InstMaskStart = 64 - MaskEnd - 1,
1400  InstMaskEnd = 64 - MaskStart - 1;
1401 
1402  if (Repl32)
1403  return 1;
1404 
1405  if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1406  InstMaskEnd == 63 - RLAmt)
1407  return 1;
1408 
1409  return 2;
1410  }
1411 
1412  // For 64-bit values, not all combinations of rotates and masks are
1413  // available. Produce one if it is available.
1414  SDValue SelectRotMask64(SDValue V, SDLoc dl, unsigned RLAmt, bool Repl32,
1415  unsigned MaskStart, unsigned MaskEnd,
1416  unsigned *InstCnt = nullptr) {
1417  // In the notation used by the instructions, 'start' and 'end' are reversed
1418  // because bits are counted from high to low order.
1419  unsigned InstMaskStart = 64 - MaskEnd - 1,
1420  InstMaskEnd = 64 - MaskStart - 1;
1421 
1422  if (InstCnt) *InstCnt += 1;
1423 
1424  if (Repl32) {
1425  // This rotation amount assumes that the lower 32 bits of the quantity
1426  // are replicated in the high 32 bits by the rotation operator (which is
1427  // done by rlwinm and friends).
1428  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1429  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1430  SDValue Ops[] =
1431  { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
1432  getI32Imm(InstMaskEnd - 32, dl) };
1433  return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1434  Ops), 0);
1435  }
1436 
1437  if (InstMaskEnd == 63) {
1438  SDValue Ops[] =
1439  { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
1440  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1441  }
1442 
1443  if (InstMaskStart == 0) {
1444  SDValue Ops[] =
1445  { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) };
1446  return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1447  }
1448 
1449  if (InstMaskEnd == 63 - RLAmt) {
1450  SDValue Ops[] =
1451  { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
1452  return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1453  }
1454 
1455  // We cannot do this with a single instruction, so we'll use two. The
1456  // problem is that we're not free to choose both a rotation amount and mask
1457  // start and end independently. We can choose an arbitrary mask start and
1458  // end, but then the rotation amount is fixed. Rotation, however, can be
1459  // inverted, and so by applying an "inverse" rotation first, we can get the
1460  // desired result.
1461  if (InstCnt) *InstCnt += 1;
1462 
1463  // The rotation mask for the second instruction must be MaskStart.
1464  unsigned RLAmt2 = MaskStart;
1465  // The first instruction must rotate V so that the overall rotation amount
1466  // is RLAmt.
1467  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1468  if (RLAmt1)
1469  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1470  return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
1471  }
1472 
1473  // For 64-bit values, not all combinations of rotates and masks are
1474  // available. Produce a rotate-mask-and-insert if one is available.
1475  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, SDLoc dl, unsigned RLAmt,
1476  bool Repl32, unsigned MaskStart,
1477  unsigned MaskEnd, unsigned *InstCnt = nullptr) {
1478  // In the notation used by the instructions, 'start' and 'end' are reversed
1479  // because bits are counted from high to low order.
1480  unsigned InstMaskStart = 64 - MaskEnd - 1,
1481  InstMaskEnd = 64 - MaskStart - 1;
1482 
1483  if (InstCnt) *InstCnt += 1;
1484 
1485  if (Repl32) {
1486  // This rotation amount assumes that the lower 32 bits of the quantity
1487  // are replicated in the high 32 bits by the rotation operator (which is
1488  // done by rlwinm and friends).
1489  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1490  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1491  SDValue Ops[] =
1492  { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
1493  getI32Imm(InstMaskEnd - 32, dl) };
1494  return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
1495  Ops), 0);
1496  }
1497 
1498  if (InstMaskEnd == 63 - RLAmt) {
1499  SDValue Ops[] =
1500  { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
1501  return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
1502  }
1503 
1504  // We cannot do this with a single instruction, so we'll use two. The
1505  // problem is that we're not free to choose both a rotation amount and mask
1506  // start and end independently. We can choose an arbitrary mask start and
1507  // end, but then the rotation amount is fixed. Rotation, however, can be
1508  // inverted, and so by applying an "inverse" rotation first, we can get the
1509  // desired result.
1510  if (InstCnt) *InstCnt += 1;
1511 
1512  // The rotation mask for the second instruction must be MaskStart.
1513  unsigned RLAmt2 = MaskStart;
1514  // The first instruction must rotate V so that the overall rotation amount
1515  // is RLAmt.
1516  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1517  if (RLAmt1)
1518  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1519  return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
1520  }
1521 
1522  void SelectAndParts64(SDLoc dl, SDValue &Res, unsigned *InstCnt) {
1524  return;
1525 
1526  // The idea here is the same as in the 32-bit version, but with additional
1527  // complications from the fact that Repl32 might be true. Because we
1528  // aggressively convert bit groups to Repl32 form (which, for small
1529  // rotation factors, involves no other change), and then coalesce, it might
1530  // be the case that a single 64-bit masking operation could handle both
1531  // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
1532  // form allowed coalescing, then we must use a 32-bit rotaton in order to
1533  // completely capture the new combined bit group.
1534 
1535  for (ValueRotInfo &VRI : ValueRotsVec) {
1536  uint64_t Mask = 0;
1537 
1538  // We need to add to the mask all bits from the associated bit groups.
1539  // If Repl32 is false, we need to add bits from bit groups that have
1540  // Repl32 true, but are trivially convertable to Repl32 false. Such a
1541  // group is trivially convertable if it overlaps only with the lower 32
1542  // bits, and the group has not been coalesced.
1543  auto MatchingBG = [VRI](const BitGroup &BG) {
1544  if (VRI.V != BG.V)
1545  return false;
1546 
1547  unsigned EffRLAmt = BG.RLAmt;
1548  if (!VRI.Repl32 && BG.Repl32) {
1549  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
1550  !BG.Repl32Coalesced) {
1551  if (BG.Repl32CR)
1552  EffRLAmt += 32;
1553  } else {
1554  return false;
1555  }
1556  } else if (VRI.Repl32 != BG.Repl32) {
1557  return false;
1558  }
1559 
1560  if (VRI.RLAmt != EffRLAmt)
1561  return false;
1562 
1563  return true;
1564  };
1565 
1566  for (auto &BG : BitGroups) {
1567  if (!MatchingBG(BG))
1568  continue;
1569 
1570  if (BG.StartIdx <= BG.EndIdx) {
1571  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
1572  Mask |= (UINT64_C(1) << i);
1573  } else {
1574  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
1575  Mask |= (UINT64_C(1) << i);
1576  for (unsigned i = 0; i <= BG.EndIdx; ++i)
1577  Mask |= (UINT64_C(1) << i);
1578  }
1579  }
1580 
1581  // We can use the 32-bit andi/andis technique if the mask does not
1582  // require any higher-order bits. This can save an instruction compared
1583  // to always using the general 64-bit technique.
1584  bool Use32BitInsts = isUInt<32>(Mask);
1585  // Compute the masks for andi/andis that would be necessary.
1586  unsigned ANDIMask = (Mask & UINT16_MAX),
1587  ANDISMask = (Mask >> 16) & UINT16_MAX;
1588 
1589  bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
1590 
1591  unsigned NumAndInsts = (unsigned) NeedsRotate +
1592  (unsigned) (bool) Res;
1593  if (Use32BitInsts)
1594  NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
1595  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1596  else
1597  NumAndInsts += SelectInt64Count(Mask) + /* and */ 1;
1598 
1599  unsigned NumRLInsts = 0;
1600  bool FirstBG = true;
1601  for (auto &BG : BitGroups) {
1602  if (!MatchingBG(BG))
1603  continue;
1604  NumRLInsts +=
1605  SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
1606  !FirstBG);
1607  FirstBG = false;
1608  }
1609 
1610  DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1611  " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
1612  "\n\t\t\tisel using masking: " << NumAndInsts <<
1613  " using rotates: " << NumRLInsts << "\n");
1614 
1615  // When we'd use andi/andis, we bias toward using the rotates (andi only
1616  // has a record form, and is cracked on POWER cores). However, when using
1617  // general 64-bit constant formation, bias toward the constant form,
1618  // because that exposes more opportunities for CSE.
1619  if (NumAndInsts > NumRLInsts)
1620  continue;
1621  if (Use32BitInsts && NumAndInsts == NumRLInsts)
1622  continue;
1623 
1624  DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1625 
1626  if (InstCnt) *InstCnt += NumAndInsts;
1627 
1628  SDValue VRot;
1629  // We actually need to generate a rotation if we have a non-zero rotation
1630  // factor or, in the Repl32 case, if we care about any of the
1631  // higher-order replicated bits. In the latter case, we generate a mask
1632  // backward so that it actually includes the entire 64 bits.
1633  if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
1634  VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1635  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
1636  else
1637  VRot = VRI.V;
1638 
1639  SDValue TotalVal;
1640  if (Use32BitInsts) {
1641  assert((ANDIMask != 0 || ANDISMask != 0) &&
1642  "No set bits in mask when using 32-bit ands for 64-bit value");
1643 
1644  SDValue ANDIVal, ANDISVal;
1645  if (ANDIMask != 0)
1646  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
1647  VRot, getI32Imm(ANDIMask, dl)), 0);
1648  if (ANDISMask != 0)
1649  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
1650  VRot, getI32Imm(ANDISMask, dl)), 0);
1651 
1652  if (!ANDIVal)
1653  TotalVal = ANDISVal;
1654  else if (!ANDISVal)
1655  TotalVal = ANDIVal;
1656  else
1657  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1658  ANDIVal, ANDISVal), 0);
1659  } else {
1660  TotalVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0);
1661  TotalVal =
1662  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
1663  VRot, TotalVal), 0);
1664  }
1665 
1666  if (!Res)
1667  Res = TotalVal;
1668  else
1669  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1670  Res, TotalVal), 0);
1671 
1672  // Now, remove all groups with this underlying value and rotation
1673  // factor.
1674  eraseMatchingBitGroups(MatchingBG);
1675  }
1676  }
1677 
1678  // Instruction selection for the 64-bit case.
1679  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
1680  SDLoc dl(N);
1681  SDValue Res;
1682 
1683  if (InstCnt) *InstCnt = 0;
1684 
1685  // Take care of cases that should use andi/andis first.
1686  SelectAndParts64(dl, Res, InstCnt);
1687 
1688  // If we've not yet selected a 'starting' instruction, and we have no zeros
1689  // to fill in, select the (Value, RLAmt) with the highest priority (largest
1690  // number of groups), and start with this rotated value.
1691  if ((!HasZeros || LateMask) && !Res) {
1692  // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
1693  // groups will come first, and so the VRI representing the largest number
1694  // of groups might not be first (it might be the first Repl32 groups).
1695  unsigned MaxGroupsIdx = 0;
1696  if (!ValueRotsVec[0].Repl32) {
1697  for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
1698  if (ValueRotsVec[i].Repl32) {
1699  if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
1700  MaxGroupsIdx = i;
1701  break;
1702  }
1703  }
1704 
1705  ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
1706  bool NeedsRotate = false;
1707  if (VRI.RLAmt) {
1708  NeedsRotate = true;
1709  } else if (VRI.Repl32) {
1710  for (auto &BG : BitGroups) {
1711  if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
1712  BG.Repl32 != VRI.Repl32)
1713  continue;
1714 
1715  // We don't need a rotate if the bit group is confined to the lower
1716  // 32 bits.
1717  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
1718  continue;
1719 
1720  NeedsRotate = true;
1721  break;
1722  }
1723  }
1724 
1725  if (NeedsRotate)
1726  Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1727  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
1728  InstCnt);
1729  else
1730  Res = VRI.V;
1731 
1732  // Now, remove all groups with this underlying value and rotation factor.
1733  if (Res)
1734  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1735  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
1736  BG.Repl32 == VRI.Repl32;
1737  });
1738  }
1739 
1740  // Because 64-bit rotates are more flexible than inserts, we might have a
1741  // preference regarding which one we do first (to save one instruction).
1742  if (!Res)
1743  for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
1744  if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
1745  false) <
1746  SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
1747  true)) {
1748  if (I != BitGroups.begin()) {
1749  BitGroup BG = *I;
1750  BitGroups.erase(I);
1751  BitGroups.insert(BitGroups.begin(), BG);
1752  }
1753 
1754  break;
1755  }
1756  }
1757 
1758  // Insert the other groups (one at a time).
1759  for (auto &BG : BitGroups) {
1760  if (!Res)
1761  Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
1762  BG.EndIdx, InstCnt);
1763  else
1764  Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
1765  BG.StartIdx, BG.EndIdx, InstCnt);
1766  }
1767 
1768  if (LateMask) {
1769  uint64_t Mask = getZerosMask();
1770 
1771  // We can use the 32-bit andi/andis technique if the mask does not
1772  // require any higher-order bits. This can save an instruction compared
1773  // to always using the general 64-bit technique.
1774  bool Use32BitInsts = isUInt<32>(Mask);
1775  // Compute the masks for andi/andis that would be necessary.
1776  unsigned ANDIMask = (Mask & UINT16_MAX),
1777  ANDISMask = (Mask >> 16) & UINT16_MAX;
1778 
1779  if (Use32BitInsts) {
1780  assert((ANDIMask != 0 || ANDISMask != 0) &&
1781  "No set bits in mask when using 32-bit ands for 64-bit value");
1782 
1783  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1784  (unsigned) (ANDISMask != 0) +
1785  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1786 
1787  SDValue ANDIVal, ANDISVal;
1788  if (ANDIMask != 0)
1789  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
1790  Res, getI32Imm(ANDIMask, dl)), 0);
1791  if (ANDISMask != 0)
1792  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
1793  Res, getI32Imm(ANDISMask, dl)), 0);
1794 
1795  if (!ANDIVal)
1796  Res = ANDISVal;
1797  else if (!ANDISVal)
1798  Res = ANDIVal;
1799  else
1800  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1801  ANDIVal, ANDISVal), 0);
1802  } else {
1803  if (InstCnt) *InstCnt += SelectInt64Count(Mask) + /* and */ 1;
1804 
1805  SDValue MaskVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0);
1806  Res =
1807  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
1808  Res, MaskVal), 0);
1809  }
1810  }
1811 
1812  return Res.getNode();
1813  }
1814 
1815  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
1816  // Fill in BitGroups.
1817  collectBitGroups(LateMask);
1818  if (BitGroups.empty())
1819  return nullptr;
1820 
1821  // For 64-bit values, figure out when we can use 32-bit instructions.
1822  if (Bits.size() == 64)
1823  assignRepl32BitGroups();
1824 
1825  // Fill in ValueRotsVec.
1826  collectValueRotInfo();
1827 
1828  if (Bits.size() == 32) {
1829  return Select32(N, LateMask, InstCnt);
1830  } else {
1831  assert(Bits.size() == 64 && "Not 64 bits here?");
1832  return Select64(N, LateMask, InstCnt);
1833  }
1834 
1835  return nullptr;
1836  }
1837 
1838  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
1839  BitGroups.erase(std::remove_if(BitGroups.begin(), BitGroups.end(), F),
1840  BitGroups.end());
1841  }
1842 
1844 
1845  bool HasZeros;
1847 
1848  SmallVector<BitGroup, 16> BitGroups;
1849 
1850  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
1851  SmallVector<ValueRotInfo, 16> ValueRotsVec;
1852 
1853  SelectionDAG *CurDAG;
1854 
1855 public:
1856  BitPermutationSelector(SelectionDAG *DAG)
1857  : CurDAG(DAG) {}
1858 
1859  // Here we try to match complex bit permutations into a set of
1860  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
1861  // known to produce optimial code for common cases (like i32 byte swapping).
1862  SDNode *Select(SDNode *N) {
1863  Bits.resize(N->getValueType(0).getSizeInBits());
1864  if (!getValueBits(SDValue(N, 0), Bits))
1865  return nullptr;
1866 
1867  DEBUG(dbgs() << "Considering bit-permutation-based instruction"
1868  " selection for: ");
1869  DEBUG(N->dump(CurDAG));
1870 
1871  // Fill it RLAmt and set HasZeros.
1872  computeRotationAmounts();
1873 
1874  if (!HasZeros)
1875  return Select(N, false);
1876 
1877  // We currently have two techniques for handling results with zeros: early
1878  // masking (the default) and late masking. Late masking is sometimes more
1879  // efficient, but because the structure of the bit groups is different, it
1880  // is hard to tell without generating both and comparing the results. With
1881  // late masking, we ignore zeros in the resulting value when inserting each
1882  // set of bit groups, and then mask in the zeros at the end. With early
1883  // masking, we only insert the non-zero parts of the result at every step.
1884 
1885  unsigned InstCnt, InstCntLateMask;
1886  DEBUG(dbgs() << "\tEarly masking:\n");
1887  SDNode *RN = Select(N, false, &InstCnt);
1888  DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
1889 
1890  DEBUG(dbgs() << "\tLate masking:\n");
1891  SDNode *RNLM = Select(N, true, &InstCntLateMask);
1892  DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
1893  " instructions\n");
1894 
1895  if (InstCnt <= InstCntLateMask) {
1896  DEBUG(dbgs() << "\tUsing early-masking for isel\n");
1897  return RN;
1898  }
1899 
1900  DEBUG(dbgs() << "\tUsing late-masking for isel\n");
1901  return RNLM;
1902  }
1903 };
1904 } // anonymous namespace
1905 
1906 SDNode *PPCDAGToDAGISel::SelectBitPermutation(SDNode *N) {
1907  if (N->getValueType(0) != MVT::i32 &&
1908  N->getValueType(0) != MVT::i64)
1909  return nullptr;
1910 
1911  if (!UseBitPermRewriter)
1912  return nullptr;
1913 
1914  switch (N->getOpcode()) {
1915  default: break;
1916  case ISD::ROTL:
1917  case ISD::SHL:
1918  case ISD::SRL:
1919  case ISD::AND:
1920  case ISD::OR: {
1921  BitPermutationSelector BPS(CurDAG);
1922  return BPS.Select(N);
1923  }
1924  }
1925 
1926  return nullptr;
1927 }
1928 
1929 /// SelectCC - Select a comparison of the specified values with the specified
1930 /// condition code, returning the CR# of the expression.
1931 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
1932  ISD::CondCode CC, SDLoc dl) {
1933  // Always select the LHS.
1934  unsigned Opc;
1935 
1936  if (LHS.getValueType() == MVT::i32) {
1937  unsigned Imm;
1938  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
1939  if (isInt32Immediate(RHS, Imm)) {
1940  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
1941  if (isUInt<16>(Imm))
1942  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
1943  getI32Imm(Imm & 0xFFFF, dl)),
1944  0);
1945  // If this is a 16-bit signed immediate, fold it.
1946  if (isInt<16>((int)Imm))
1947  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
1948  getI32Imm(Imm & 0xFFFF, dl)),
1949  0);
1950 
1951  // For non-equality comparisons, the default code would materialize the
1952  // constant, then compare against it, like this:
1953  // lis r2, 4660
1954  // ori r2, r2, 22136
1955  // cmpw cr0, r3, r2
1956  // Since we are just comparing for equality, we can emit this instead:
1957  // xoris r0,r3,0x1234
1958  // cmplwi cr0,r0,0x5678
1959  // beq cr0,L6
1960  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
1961  getI32Imm(Imm >> 16, dl)), 0);
1962  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
1963  getI32Imm(Imm & 0xFFFF, dl)), 0);
1964  }
1965  Opc = PPC::CMPLW;
1966  } else if (ISD::isUnsignedIntSetCC(CC)) {
1967  if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
1968  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
1969  getI32Imm(Imm & 0xFFFF, dl)), 0);
1970  Opc = PPC::CMPLW;
1971  } else {
1972  short SImm;
1973  if (isIntS16Immediate(RHS, SImm))
1974  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
1975  getI32Imm((int)SImm & 0xFFFF,
1976  dl)),
1977  0);
1978  Opc = PPC::CMPW;
1979  }
1980  } else if (LHS.getValueType() == MVT::i64) {
1981  uint64_t Imm;
1982  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
1983  if (isInt64Immediate(RHS.getNode(), Imm)) {
1984  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
1985  if (isUInt<16>(Imm))
1986  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
1987  getI32Imm(Imm & 0xFFFF, dl)),
1988  0);
1989  // If this is a 16-bit signed immediate, fold it.
1990  if (isInt<16>(Imm))
1991  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
1992  getI32Imm(Imm & 0xFFFF, dl)),
1993  0);
1994 
1995  // For non-equality comparisons, the default code would materialize the
1996  // constant, then compare against it, like this:
1997  // lis r2, 4660
1998  // ori r2, r2, 22136
1999  // cmpd cr0, r3, r2
2000  // Since we are just comparing for equality, we can emit this instead:
2001  // xoris r0,r3,0x1234
2002  // cmpldi cr0,r0,0x5678
2003  // beq cr0,L6
2004  if (isUInt<32>(Imm)) {
2005  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
2006  getI64Imm(Imm >> 16, dl)), 0);
2007  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
2008  getI64Imm(Imm & 0xFFFF, dl)),
2009  0);
2010  }
2011  }
2012  Opc = PPC::CMPLD;
2013  } else if (ISD::isUnsignedIntSetCC(CC)) {
2014  if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
2015  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
2016  getI64Imm(Imm & 0xFFFF, dl)), 0);
2017  Opc = PPC::CMPLD;
2018  } else {
2019  short SImm;
2020  if (isIntS16Immediate(RHS, SImm))
2021  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
2022  getI64Imm(SImm & 0xFFFF, dl)),
2023  0);
2024  Opc = PPC::CMPD;
2025  }
2026  } else if (LHS.getValueType() == MVT::f32) {
2027  Opc = PPC::FCMPUS;
2028  } else {
2029  assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
2030  Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
2031  }
2032  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
2033 }
2034 
2036  switch (CC) {
2037  case ISD::SETUEQ:
2038  case ISD::SETONE:
2039  case ISD::SETOLE:
2040  case ISD::SETOGE:
2041  llvm_unreachable("Should be lowered by legalize!");
2042  default: llvm_unreachable("Unknown condition!");
2043  case ISD::SETOEQ:
2044  case ISD::SETEQ: return PPC::PRED_EQ;
2045  case ISD::SETUNE:
2046  case ISD::SETNE: return PPC::PRED_NE;
2047  case ISD::SETOLT:
2048  case ISD::SETLT: return PPC::PRED_LT;
2049  case ISD::SETULE:
2050  case ISD::SETLE: return PPC::PRED_LE;
2051  case ISD::SETOGT:
2052  case ISD::SETGT: return PPC::PRED_GT;
2053  case ISD::SETUGE:
2054  case ISD::SETGE: return PPC::PRED_GE;
2055  case ISD::SETO: return PPC::PRED_NU;
2056  case ISD::SETUO: return PPC::PRED_UN;
2057  // These two are invalid for floating point. Assume we have int.
2058  case ISD::SETULT: return PPC::PRED_LT;
2059  case ISD::SETUGT: return PPC::PRED_GT;
2060  }
2061 }
2062 
2063 /// getCRIdxForSetCC - Return the index of the condition register field
2064 /// associated with the SetCC condition, and whether or not the field is
2065 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
2066 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
2067  Invert = false;
2068  switch (CC) {
2069  default: llvm_unreachable("Unknown condition!");
2070  case ISD::SETOLT:
2071  case ISD::SETLT: return 0; // Bit #0 = SETOLT
2072  case ISD::SETOGT:
2073  case ISD::SETGT: return 1; // Bit #1 = SETOGT
2074  case ISD::SETOEQ:
2075  case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
2076  case ISD::SETUO: return 3; // Bit #3 = SETUO
2077  case ISD::SETUGE:
2078  case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
2079  case ISD::SETULE:
2080  case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
2081  case ISD::SETUNE:
2082  case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
2083  case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
2084  case ISD::SETUEQ:
2085  case ISD::SETOGE:
2086  case ISD::SETOLE:
2087  case ISD::SETONE:
2088  llvm_unreachable("Invalid branch code: should be expanded by legalize");
2089  // These are invalid for floating point. Assume integer.
2090  case ISD::SETULT: return 0;
2091  case ISD::SETUGT: return 1;
2092  }
2093 }
2094 
2095 // getVCmpInst: return the vector compare instruction for the specified
2096 // vector type and condition code. Since this is for altivec specific code,
2097 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
2098 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
2099  bool HasVSX, bool &Swap, bool &Negate) {
2100  Swap = false;
2101  Negate = false;
2102 
2103  if (VecVT.isFloatingPoint()) {
2104  /* Handle some cases by swapping input operands. */
2105  switch (CC) {
2106  case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
2107  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
2108  case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
2109  case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
2110  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
2111  case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
2112  default: break;
2113  }
2114  /* Handle some cases by negating the result. */
2115  switch (CC) {
2116  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
2117  case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
2118  case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
2119  case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
2120  default: break;
2121  }
2122  /* We have instructions implementing the remaining cases. */
2123  switch (CC) {
2124  case ISD::SETEQ:
2125  case ISD::SETOEQ:
2126  if (VecVT == MVT::v4f32)
2127  return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
2128  else if (VecVT == MVT::v2f64)
2129  return PPC::XVCMPEQDP;
2130  break;
2131  case ISD::SETGT:
2132  case ISD::SETOGT:
2133  if (VecVT == MVT::v4f32)
2134  return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
2135  else if (VecVT == MVT::v2f64)
2136  return PPC::XVCMPGTDP;
2137  break;
2138  case ISD::SETGE:
2139  case ISD::SETOGE:
2140  if (VecVT == MVT::v4f32)
2141  return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
2142  else if (VecVT == MVT::v2f64)
2143  return PPC::XVCMPGEDP;
2144  break;
2145  default:
2146  break;
2147  }
2148  llvm_unreachable("Invalid floating-point vector compare condition");
2149  } else {
2150  /* Handle some cases by swapping input operands. */
2151  switch (CC) {
2152  case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
2153  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
2154  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
2155  case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
2156  default: break;
2157  }
2158  /* Handle some cases by negating the result. */
2159  switch (CC) {
2160  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
2161  case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
2162  case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
2163  case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
2164  default: break;
2165  }
2166  /* We have instructions implementing the remaining cases. */
2167  switch (CC) {
2168  case ISD::SETEQ:
2169  case ISD::SETUEQ:
2170  if (VecVT == MVT::v16i8)
2171  return PPC::VCMPEQUB;
2172  else if (VecVT == MVT::v8i16)
2173  return PPC::VCMPEQUH;
2174  else if (VecVT == MVT::v4i32)
2175  return PPC::VCMPEQUW;
2176  else if (VecVT == MVT::v2i64)
2177  return PPC::VCMPEQUD;
2178  break;
2179  case ISD::SETGT:
2180  if (VecVT == MVT::v16i8)
2181  return PPC::VCMPGTSB;
2182  else if (VecVT == MVT::v8i16)
2183  return PPC::VCMPGTSH;
2184  else if (VecVT == MVT::v4i32)
2185  return PPC::VCMPGTSW;
2186  else if (VecVT == MVT::v2i64)
2187  return PPC::VCMPGTSD;
2188  break;
2189  case ISD::SETUGT:
2190  if (VecVT == MVT::v16i8)
2191  return PPC::VCMPGTUB;
2192  else if (VecVT == MVT::v8i16)
2193  return PPC::VCMPGTUH;
2194  else if (VecVT == MVT::v4i32)
2195  return PPC::VCMPGTUW;
2196  else if (VecVT == MVT::v2i64)
2197  return PPC::VCMPGTUD;
2198  break;
2199  default:
2200  break;
2201  }
2202  llvm_unreachable("Invalid integer vector compare condition");
2203  }
2204 }
2205 
2206 SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
2207  SDLoc dl(N);
2208  unsigned Imm;
2209  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2210  EVT PtrVT =
2211  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
2212  bool isPPC64 = (PtrVT == MVT::i64);
2213 
2214  if (!PPCSubTarget->useCRBits() &&
2215  isInt32Immediate(N->getOperand(1), Imm)) {
2216  // We can codegen setcc op, imm very efficiently compared to a brcond.
2217  // Check for those cases here.
2218  // setcc op, 0
2219  if (Imm == 0) {
2220  SDValue Op = N->getOperand(0);
2221  switch (CC) {
2222  default: break;
2223  case ISD::SETEQ: {
2224  Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
2225  SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
2226  getI32Imm(31, dl) };
2227  return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2228  }
2229  case ISD::SETNE: {
2230  if (isPPC64) break;
2231  SDValue AD =
2232  SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2233  Op, getI32Imm(~0U, dl)), 0);
2234  return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
2235  AD.getValue(1));
2236  }
2237  case ISD::SETLT: {
2238  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
2239  getI32Imm(31, dl) };
2240  return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2241  }
2242  case ISD::SETGT: {
2243  SDValue T =
2244  SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
2245  T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
2246  SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
2247  getI32Imm(31, dl) };
2248  return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2249  }
2250  }
2251  } else if (Imm == ~0U) { // setcc op, -1
2252  SDValue Op = N->getOperand(0);
2253  switch (CC) {
2254  default: break;
2255  case ISD::SETEQ:
2256  if (isPPC64) break;
2257  Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2258  Op, getI32Imm(1, dl)), 0);
2259  return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
2260  SDValue(CurDAG->getMachineNode(PPC::LI, dl,
2261  MVT::i32,
2262  getI32Imm(0, dl)),
2263  0), Op.getValue(1));
2264  case ISD::SETNE: {
2265  if (isPPC64) break;
2266  Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
2267  SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2268  Op, getI32Imm(~0U, dl));
2269  return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
2270  Op, SDValue(AD, 1));
2271  }
2272  case ISD::SETLT: {
2273  SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
2274  getI32Imm(1, dl)), 0);
2275  SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
2276  Op), 0);
2277  SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
2278  getI32Imm(31, dl) };
2279  return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2280  }
2281  case ISD::SETGT: {
2282  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
2283  getI32Imm(31, dl) };
2284  Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2285  return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
2286  getI32Imm(1, dl));
2287  }
2288  }
2289  }
2290  }
2291 
2292  SDValue LHS = N->getOperand(0);
2293  SDValue RHS = N->getOperand(1);
2294 
2295  // Altivec Vector compare instructions do not set any CR register by default and
2296  // vector compare operations return the same type as the operands.
2297  if (LHS.getValueType().isVector()) {
2298  if (PPCSubTarget->hasQPX())
2299  return nullptr;
2300 
2301  EVT VecVT = LHS.getValueType();
2302  bool Swap, Negate;
2303  unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
2304  PPCSubTarget->hasVSX(), Swap, Negate);
2305  if (Swap)
2306  std::swap(LHS, RHS);
2307 
2308  EVT ResVT = VecVT.changeVectorElementTypeToInteger();
2309  if (Negate) {
2310  SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
2311  return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
2312  PPC::VNOR,
2313  ResVT, VCmp, VCmp);
2314  }
2315 
2316  return CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
2317  }
2318 
2319  if (PPCSubTarget->useCRBits())
2320  return nullptr;
2321 
2322  bool Inv;
2323  unsigned Idx = getCRIdxForSetCC(CC, Inv);
2324  SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
2325  SDValue IntCR;
2326 
2327  // Force the ccreg into CR7.
2328  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
2329 
2330  SDValue InFlag(nullptr, 0); // Null incoming flag value.
2331  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
2332  InFlag).getValue(1);
2333 
2334  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
2335  CCReg), 0);
2336 
2337  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
2338  getI32Imm(31, dl), getI32Imm(31, dl) };
2339  if (!Inv)
2340  return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2341 
2342  // Get the specified bit.
2343  SDValue Tmp =
2344  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2345  return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
2346 }
2347 
2348 SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
2349  // Transfer memoperands.
2351  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2352  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
2353  return Result;
2354 }
2355 
2356 
2357 // Select - Convert the specified operand from a target-independent to a
2358 // target-specific node if it hasn't already been changed.
2360  SDLoc dl(N);
2361  if (N->isMachineOpcode()) {
2362  N->setNodeId(-1);
2363  return nullptr; // Already selected.
2364  }
2365 
2366  // In case any misguided DAG-level optimizations form an ADD with a
2367  // TargetConstant operand, crash here instead of miscompiling (by selecting
2368  // an r+r add instead of some kind of r+i add).
2369  if (N->getOpcode() == ISD::ADD &&
2371  llvm_unreachable("Invalid ADD with TargetConstant operand");
2372 
2373  // Try matching complex bit permutations before doing anything else.
2374  if (SDNode *NN = SelectBitPermutation(N))
2375  return NN;
2376 
2377  switch (N->getOpcode()) {
2378  default: break;
2379 
2380  case ISD::Constant: {
2381  if (N->getValueType(0) == MVT::i64)
2382  return SelectInt64(CurDAG, N);
2383  break;
2384  }
2385 
2386  case ISD::SETCC: {
2387  SDNode *SN = SelectSETCC(N);
2388  if (SN)
2389  return SN;
2390  break;
2391  }
2392  case PPCISD::GlobalBaseReg:
2393  return getGlobalBaseReg();
2394 
2395  case ISD::FrameIndex:
2396  return getFrameIndex(N, N);
2397 
2398  case PPCISD::MFOCRF: {
2399  SDValue InFlag = N->getOperand(1);
2400  return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
2401  N->getOperand(0), InFlag);
2402  }
2403 
2404  case PPCISD::READ_TIME_BASE: {
2405  return CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
2406  MVT::Other, N->getOperand(0));
2407  }
2408 
2409  case PPCISD::SRA_ADDZE: {
2410  SDValue N0 = N->getOperand(0);
2411  SDValue ShiftAmt =
2412  CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
2413  getConstantIntValue(), dl,
2414  N->getValueType(0));
2415  if (N->getValueType(0) == MVT::i64) {
2416  SDNode *Op =
2417  CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
2418  N0, ShiftAmt);
2419  return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64,
2420  SDValue(Op, 0), SDValue(Op, 1));
2421  } else {
2422  assert(N->getValueType(0) == MVT::i32 &&
2423  "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
2424  SDNode *Op =
2425  CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
2426  N0, ShiftAmt);
2427  return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
2428  SDValue(Op, 0), SDValue(Op, 1));
2429  }
2430  }
2431 
2432  case ISD::LOAD: {
2433  // Handle preincrement loads.
2434  LoadSDNode *LD = cast<LoadSDNode>(N);
2435  EVT LoadedVT = LD->getMemoryVT();
2436 
2437  // Normal loads are handled by code generated from the .td file.
2438  if (LD->getAddressingMode() != ISD::PRE_INC)
2439  break;
2440 
2441  SDValue Offset = LD->getOffset();
2442  if (Offset.getOpcode() == ISD::TargetConstant ||
2443  Offset.getOpcode() == ISD::TargetGlobalAddress) {
2444 
2445  unsigned Opcode;
2446  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
2447  if (LD->getValueType(0) != MVT::i64) {
2448  // Handle PPC32 integer and normal FP loads.
2449  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2450  switch (LoadedVT.getSimpleVT().SimpleTy) {
2451  default: llvm_unreachable("Invalid PPC load type!");
2452  case MVT::f64: Opcode = PPC::LFDU; break;
2453  case MVT::f32: Opcode = PPC::LFSU; break;
2454  case MVT::i32: Opcode = PPC::LWZU; break;
2455  case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
2456  case MVT::i1:
2457  case MVT::i8: Opcode = PPC::LBZU; break;
2458  }
2459  } else {
2460  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
2461  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2462  switch (LoadedVT.getSimpleVT().SimpleTy) {
2463  default: llvm_unreachable("Invalid PPC load type!");
2464  case MVT::i64: Opcode = PPC::LDU; break;
2465  case MVT::i32: Opcode = PPC::LWZU8; break;
2466  case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
2467  case MVT::i1:
2468  case MVT::i8: Opcode = PPC::LBZU8; break;
2469  }
2470  }
2471 
2472  SDValue Chain = LD->getChain();
2473  SDValue Base = LD->getBasePtr();
2474  SDValue Ops[] = { Offset, Base, Chain };
2475  return transferMemOperands(
2476  N, CurDAG->getMachineNode(
2477  Opcode, dl, LD->getValueType(0),
2478  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
2479  Ops));
2480  } else {
2481  unsigned Opcode;
2482  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
2483  if (LD->getValueType(0) != MVT::i64) {
2484  // Handle PPC32 integer and normal FP loads.
2485  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2486  switch (LoadedVT.getSimpleVT().SimpleTy) {
2487  default: llvm_unreachable("Invalid PPC load type!");
2488  case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
2489  case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
2490  case MVT::f64: Opcode = PPC::LFDUX; break;
2491  case MVT::f32: Opcode = PPC::LFSUX; break;
2492  case MVT::i32: Opcode = PPC::LWZUX; break;
2493  case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
2494  case MVT::i1:
2495  case MVT::i8: Opcode = PPC::LBZUX; break;
2496  }
2497  } else {
2498  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
2499  assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
2500  "Invalid sext update load");
2501  switch (LoadedVT.getSimpleVT().SimpleTy) {
2502  default: llvm_unreachable("Invalid PPC load type!");
2503  case MVT::i64: Opcode = PPC::LDUX; break;
2504  case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
2505  case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
2506  case MVT::i1:
2507  case MVT::i8: Opcode = PPC::LBZUX8; break;
2508  }
2509  }
2510 
2511  SDValue Chain = LD->getChain();
2512  SDValue Base = LD->getBasePtr();
2513  SDValue Ops[] = { Base, Offset, Chain };
2514  return transferMemOperands(
2515  N, CurDAG->getMachineNode(
2516  Opcode, dl, LD->getValueType(0),
2517  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
2518  Ops));
2519  }
2520  }
2521 
2522  case ISD::AND: {
2523  unsigned Imm, Imm2, SH, MB, ME;
2524  uint64_t Imm64;
2525 
2526  // If this is an and of a value rotated between 0 and 31 bits and then and'd
2527  // with a mask, emit rlwinm
2528  if (isInt32Immediate(N->getOperand(1), Imm) &&
2529  isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
2530  SDValue Val = N->getOperand(0).getOperand(0);
2531  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
2532  getI32Imm(ME, dl) };
2533  return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2534  }
2535  // If this is just a masked value where the input is not handled above, and
2536  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
2537  if (isInt32Immediate(N->getOperand(1), Imm) &&
2538  isRunOfOnes(Imm, MB, ME) &&
2539  N->getOperand(0).getOpcode() != ISD::ROTL) {
2540  SDValue Val = N->getOperand(0);
2541  SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
2542  getI32Imm(ME, dl) };
2543  return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2544  }
2545  // If this is a 64-bit zero-extension mask, emit rldicl.
2546  if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
2547  isMask_64(Imm64)) {
2548  SDValue Val = N->getOperand(0);
2549  MB = 64 - countTrailingOnes(Imm64);
2550  SH = 0;
2551 
2552  // If the operand is a logical right shift, we can fold it into this
2553  // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
2554  // for n <= mb. The right shift is really a left rotate followed by a
2555  // mask, and this mask is a more-restrictive sub-mask of the mask implied
2556  // by the shift.
2557  if (Val.getOpcode() == ISD::SRL &&
2558  isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
2559  assert(Imm < 64 && "Illegal shift amount");
2560  Val = Val.getOperand(0);
2561  SH = 64 - Imm;
2562  }
2563 
2564  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
2565  return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
2566  }
2567  // AND X, 0 -> 0, not "rlwinm 32".
2568  if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
2569  ReplaceUses(SDValue(N, 0), N->getOperand(1));
2570  return nullptr;
2571  }
2572  // ISD::OR doesn't get all the bitfield insertion fun.
2573  // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
2574  if (isInt32Immediate(N->getOperand(1), Imm) &&
2575  N->getOperand(0).getOpcode() == ISD::OR &&
2576  isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
2577  unsigned MB, ME;
2578  Imm = ~(Imm^Imm2);
2579  if (isRunOfOnes(Imm, MB, ME)) {
2580  SDValue Ops[] = { N->getOperand(0).getOperand(0),
2581  N->getOperand(0).getOperand(1),
2582  getI32Imm(0, dl), getI32Imm(MB, dl),
2583  getI32Imm(ME, dl) };
2584  return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
2585  }
2586  }
2587 
2588  // Other cases are autogenerated.
2589  break;
2590  }
2591  case ISD::OR: {
2592  if (N->getValueType(0) == MVT::i32)
2593  if (SDNode *I = SelectBitfieldInsert(N))
2594  return I;
2595 
2596  short Imm;
2597  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
2598  isIntS16Immediate(N->getOperand(1), Imm)) {
2599  APInt LHSKnownZero, LHSKnownOne;
2600  CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne);
2601 
2602  // If this is equivalent to an add, then we can fold it with the
2603  // FrameIndex calculation.
2604  if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL)
2605  return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
2606  }
2607 
2608  // Other cases are autogenerated.
2609  break;
2610  }
2611  case ISD::ADD: {
2612  short Imm;
2613  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
2614  isIntS16Immediate(N->getOperand(1), Imm))
2615  return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
2616 
2617  break;
2618  }
2619  case ISD::SHL: {
2620  unsigned Imm, SH, MB, ME;
2621  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
2622  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
2623  SDValue Ops[] = { N->getOperand(0).getOperand(0),
2624  getI32Imm(SH, dl), getI32Imm(MB, dl),
2625  getI32Imm(ME, dl) };
2626  return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2627  }
2628 
2629  // Other cases are autogenerated.
2630  break;
2631  }
2632  case ISD::SRL: {
2633  unsigned Imm, SH, MB, ME;
2634  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
2635  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
2636  SDValue Ops[] = { N->getOperand(0).getOperand(0),
2637  getI32Imm(SH, dl), getI32Imm(MB, dl),
2638  getI32Imm(ME, dl) };
2639  return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2640  }
2641 
2642  // Other cases are autogenerated.
2643  break;
2644  }
2645  // FIXME: Remove this once the ANDI glue bug is fixed:
2647  case PPCISD::ANDIo_1_GT_BIT: {
2648  if (!ANDIGlueBug)
2649  break;
2650 
2651  EVT InVT = N->getOperand(0).getValueType();
2652  assert((InVT == MVT::i64 || InVT == MVT::i32) &&
2653  "Invalid input type for ANDIo_1_EQ_BIT");
2654 
2655  unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
2656  SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
2657  N->getOperand(0),
2658  CurDAG->getTargetConstant(1, dl, InVT)),
2659  0);
2660  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2661  SDValue SRIdxVal =
2663  PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
2664 
2666  CR0Reg, SRIdxVal,
2667  SDValue(AndI.getNode(), 1) /* glue */);
2668  }
2669  case ISD::SELECT_CC: {
2670  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2671  EVT PtrVT =
2672  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
2673  bool isPPC64 = (PtrVT == MVT::i64);
2674 
2675  // If this is a select of i1 operands, we'll pattern match it.
2676  if (PPCSubTarget->useCRBits() &&
2677  N->getOperand(0).getValueType() == MVT::i1)
2678  break;
2679 
2680  // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
2681  if (!isPPC64)
2682  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2683  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2684  if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
2685  if (N1C->isNullValue() && N3C->isNullValue() &&
2686  N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
2687  // FIXME: Implement this optzn for PPC64.
2688  N->getValueType(0) == MVT::i32) {
2689  SDNode *Tmp =
2690  CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2691  N->getOperand(0), getI32Imm(~0U, dl));
2692  return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
2693  SDValue(Tmp, 0), N->getOperand(0),
2694  SDValue(Tmp, 1));
2695  }
2696 
2697  SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
2698 
2699  if (N->getValueType(0) == MVT::i1) {
2700  // An i1 select is: (c & t) | (!c & f).
2701  bool Inv;
2702  unsigned Idx = getCRIdxForSetCC(CC, Inv);
2703 
2704  unsigned SRI;
2705  switch (Idx) {
2706  default: llvm_unreachable("Invalid CC index");
2707  case 0: SRI = PPC::sub_lt; break;
2708  case 1: SRI = PPC::sub_gt; break;
2709  case 2: SRI = PPC::sub_eq; break;
2710  case 3: SRI = PPC::sub_un; break;
2711  }
2712 
2713  SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
2714 
2715  SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
2716  CCBit, CCBit), 0);
2717  SDValue C = Inv ? NotCCBit : CCBit,
2718  NotC = Inv ? CCBit : NotCCBit;
2719 
2720  SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
2721  C, N->getOperand(2)), 0);
2722  SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
2723  NotC, N->getOperand(3)), 0);
2724 
2725  return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
2726  }
2727 
2728  unsigned BROpc = getPredicateForSetCC(CC);
2729 
2730  unsigned SelectCCOp;
2731  if (N->getValueType(0) == MVT::i32)
2732  SelectCCOp = PPC::SELECT_CC_I4;
2733  else if (N->getValueType(0) == MVT::i64)
2734  SelectCCOp = PPC::SELECT_CC_I8;
2735  else if (N->getValueType(0) == MVT::f32)
2736  if (PPCSubTarget->hasP8Vector())
2737  SelectCCOp = PPC::SELECT_CC_VSSRC;
2738  else
2739  SelectCCOp = PPC::SELECT_CC_F4;
2740  else if (N->getValueType(0) == MVT::f64)
2741  if (PPCSubTarget->hasVSX())
2742  SelectCCOp = PPC::SELECT_CC_VSFRC;
2743  else
2744  SelectCCOp = PPC::SELECT_CC_F8;
2745  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
2746  SelectCCOp = PPC::SELECT_CC_QFRC;
2747  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
2748  SelectCCOp = PPC::SELECT_CC_QSRC;
2749  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
2750  SelectCCOp = PPC::SELECT_CC_QBRC;
2751  else if (N->getValueType(0) == MVT::v2f64 ||
2752  N->getValueType(0) == MVT::v2i64)
2753  SelectCCOp = PPC::SELECT_CC_VSRC;
2754  else
2755  SelectCCOp = PPC::SELECT_CC_VRRC;
2756 
2757  SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
2758  getI32Imm(BROpc, dl) };
2759  return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
2760  }
2761  case ISD::VSELECT:
2762  if (PPCSubTarget->hasVSX()) {
2763  SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
2764  return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
2765  }
2766 
2767  break;
2768  case ISD::VECTOR_SHUFFLE:
2769  if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
2770  N->getValueType(0) == MVT::v2i64)) {
2771  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
2772 
2773  SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
2774  Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
2775  unsigned DM[2];
2776 
2777  for (int i = 0; i < 2; ++i)
2778  if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
2779  DM[i] = 0;
2780  else
2781  DM[i] = 1;
2782 
2783  if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
2784  Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
2785  isa<LoadSDNode>(Op1.getOperand(0))) {
2786  LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
2787  SDValue Base, Offset;
2788 
2789  if (LD->isUnindexed() &&
2790  SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
2791  SDValue Chain = LD->getChain();
2792  SDValue Ops[] = { Base, Offset, Chain };
2793  return CurDAG->SelectNodeTo(N, PPC::LXVDSX,
2794  N->getValueType(0), Ops);
2795  }
2796  }
2797 
2798  // For little endian, we must swap the input operands and adjust
2799  // the mask elements (reverse and invert them).
2800  if (PPCSubTarget->isLittleEndian()) {
2801  std::swap(Op1, Op2);
2802  unsigned tmp = DM[0];
2803  DM[0] = 1 - DM[1];
2804  DM[1] = 1 - tmp;
2805  }
2806 
2807  SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
2808  MVT::i32);
2809  SDValue Ops[] = { Op1, Op2, DMV };
2810  return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
2811  }
2812 
2813  break;
2814  case PPCISD::BDNZ:
2815  case PPCISD::BDZ: {
2816  bool IsPPC64 = PPCSubTarget->isPPC64();
2817  SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
2818  return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
2819  (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
2820  (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
2821  MVT::Other, Ops);
2822  }
2823  case PPCISD::COND_BRANCH: {
2824  // Op #0 is the Chain.
2825  // Op #1 is the PPC::PRED_* number.
2826  // Op #2 is the CR#
2827  // Op #3 is the Dest MBB
2828  // Op #4 is the Flag.
2829  // Prevent PPC::PRED_* from being selected into LI.
2830  SDValue Pred =
2831  getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(), dl);
2832  SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
2833  N->getOperand(0), N->getOperand(4) };
2834  return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
2835  }
2836  case ISD::BR_CC: {
2837  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
2838  unsigned PCC = getPredicateForSetCC(CC);
2839 
2840  if (N->getOperand(2).getValueType() == MVT::i1) {
2841  unsigned Opc;
2842  bool Swap;
2843  switch (PCC) {
2844  default: llvm_unreachable("Unexpected Boolean-operand predicate");
2845  case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
2846  case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
2847  case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
2848  case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
2849  case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
2850  case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
2851  }
2852 
2853  SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
2854  N->getOperand(Swap ? 3 : 2),
2855  N->getOperand(Swap ? 2 : 3)), 0);
2856  return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other,
2857  BitComp, N->getOperand(4), N->getOperand(0));
2858  }
2859 
2860  SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
2861  SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
2862  N->getOperand(4), N->getOperand(0) };
2863  return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
2864  }
2865  case ISD::BRIND: {
2866  // FIXME: Should custom lower this.
2867  SDValue Chain = N->getOperand(0);
2868  SDValue Target = N->getOperand(1);
2869  unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
2870  unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
2871  Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
2872  Chain), 0);
2873  return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
2874  }
2875  case PPCISD::TOC_ENTRY: {
2876  assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
2877  "Only supported for 64-bit ABI and 32-bit SVR4");
2878  if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
2879  SDValue GA = N->getOperand(0);
2880  return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl,
2881  MVT::i32, GA, N->getOperand(1)));
2882  }
2883 
2884  // For medium and large code model, we generate two instructions as
2885  // described below. Otherwise we allow SelectCodeCommon to handle this,
2886  // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
2887  CodeModel::Model CModel = TM.getCodeModel();
2888  if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
2889  break;
2890 
2891  // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
2892  // If it is an externally defined symbol, a symbol with common linkage,
2893  // a non-local function address, or a jump table address, or if we are
2894  // generating code for large code model, we generate:
2895  // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
2896  // Otherwise we generate:
2897  // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
2898  SDValue GA = N->getOperand(0);
2899  SDValue TOCbase = N->getOperand(1);
2900  SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
2901  TOCbase, GA);
2902 
2903  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
2904  CModel == CodeModel::Large)
2905  return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
2906  MVT::i64, GA, SDValue(Tmp, 0)));
2907 
2908  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
2909  const GlobalValue *GValue = G->getGlobal();
2910  if ((GValue->getType()->getElementType()->isFunctionTy() &&
2911  !GValue->isStrongDefinitionForLinker()) ||
2912  GValue->isDeclaration() || GValue->hasCommonLinkage() ||
2914  return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
2915  MVT::i64, GA, SDValue(Tmp, 0)));
2916  }
2917 
2918  return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
2919  SDValue(Tmp, 0), GA);
2920  }
2921  case PPCISD::PPC32_PICGOT: {
2922  // Generate a PIC-safe GOT reference.
2923  assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
2924  "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
2925  return CurDAG->SelectNodeTo(
2926  N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()),
2927  MVT::i32);
2928  }
2929  case PPCISD::VADD_SPLAT: {
2930  // This expands into one of three sequences, depending on whether
2931  // the first operand is odd or even, positive or negative.
2932  assert(isa<ConstantSDNode>(N->getOperand(0)) &&
2933  isa<ConstantSDNode>(N->getOperand(1)) &&
2934  "Invalid operand on VADD_SPLAT!");
2935 
2936  int Elt = N->getConstantOperandVal(0);
2937  int EltSize = N->getConstantOperandVal(1);
2938  unsigned Opc1, Opc2, Opc3;
2939  EVT VT;
2940 
2941  if (EltSize == 1) {
2942  Opc1 = PPC::VSPLTISB;
2943  Opc2 = PPC::VADDUBM;
2944  Opc3 = PPC::VSUBUBM;
2945  VT = MVT::v16i8;
2946  } else if (EltSize == 2) {
2947  Opc1 = PPC::VSPLTISH;
2948  Opc2 = PPC::VADDUHM;
2949  Opc3 = PPC::VSUBUHM;
2950  VT = MVT::v8i16;
2951  } else {
2952  assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
2953  Opc1 = PPC::VSPLTISW;
2954  Opc2 = PPC::VADDUWM;
2955  Opc3 = PPC::VSUBUWM;
2956  VT = MVT::v4i32;
2957  }
2958 
2959  if ((Elt & 1) == 0) {
2960  // Elt is even, in the range [-32,-18] + [16,30].
2961  //
2962  // Convert: VADD_SPLAT elt, size
2963  // Into: tmp = VSPLTIS[BHW] elt
2964  // VADDU[BHW]M tmp, tmp
2965  // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
2966  SDValue EltVal = getI32Imm(Elt >> 1, dl);
2967  SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
2968  SDValue TmpVal = SDValue(Tmp, 0);
2969  return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
2970 
2971  } else if (Elt > 0) {
2972  // Elt is odd and positive, in the range [17,31].
2973  //
2974  // Convert: VADD_SPLAT elt, size
2975  // Into: tmp1 = VSPLTIS[BHW] elt-16
2976  // tmp2 = VSPLTIS[BHW] -16
2977  // VSUBU[BHW]M tmp1, tmp2
2978  SDValue EltVal = getI32Imm(Elt - 16, dl);
2979  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
2980  EltVal = getI32Imm(-16, dl);
2981  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
2982  return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
2983  SDValue(Tmp2, 0));
2984 
2985  } else {
2986  // Elt is odd and negative, in the range [-31,-17].
2987  //
2988  // Convert: VADD_SPLAT elt, size
2989  // Into: tmp1 = VSPLTIS[BHW] elt+16
2990  // tmp2 = VSPLTIS[BHW] -16
2991  // VADDU[BHW]M tmp1, tmp2
2992  SDValue EltVal = getI32Imm(Elt + 16, dl);
2993  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
2994  EltVal = getI32Imm(-16, dl);
2995  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
2996  return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
2997  SDValue(Tmp2, 0));
2998  }
2999  }
3000  }
3001 
3002  return SelectCode(N);
3003 }
3004 
3005 // If the target supports the cmpb instruction, do the idiom recognition here.
3006 // We don't do this as a DAG combine because we don't want to do it as nodes
3007 // are being combined (because we might miss part of the eventual idiom). We
3008 // don't want to do it during instruction selection because we want to reuse
3009 // the logic for lowering the masking operations already part of the
3010 // instruction selector.
3011 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
3012  SDLoc dl(N);
3013 
3014  assert(N->getOpcode() == ISD::OR &&
3015  "Only OR nodes are supported for CMPB");
3016 
3017  SDValue Res;
3018  if (!PPCSubTarget->hasCMPB())
3019  return Res;
3020 
3021  if (N->getValueType(0) != MVT::i32 &&
3022  N->getValueType(0) != MVT::i64)
3023  return Res;
3024 
3025  EVT VT = N->getValueType(0);
3026 
3027  SDValue RHS, LHS;
3028  bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
3029  uint64_t Mask = 0, Alt = 0;
3030 
3031  auto IsByteSelectCC = [this](SDValue O, unsigned &b,
3032  uint64_t &Mask, uint64_t &Alt,
3033  SDValue &LHS, SDValue &RHS) {
3034  if (O.getOpcode() != ISD::SELECT_CC)
3035  return false;
3036  ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
3037 
3038  if (!isa<ConstantSDNode>(O.getOperand(2)) ||
3039  !isa<ConstantSDNode>(O.getOperand(3)))
3040  return false;
3041 
3042  uint64_t PM = O.getConstantOperandVal(2);
3043  uint64_t PAlt = O.getConstantOperandVal(3);
3044  for (b = 0; b < 8; ++b) {
3045  uint64_t Mask = UINT64_C(0xFF) << (8*b);
3046  if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
3047  break;
3048  }
3049 
3050  if (b == 8)
3051  return false;
3052  Mask |= PM;
3053  Alt |= PAlt;
3054 
3055  if (!isa<ConstantSDNode>(O.getOperand(1)) ||
3056  O.getConstantOperandVal(1) != 0) {
3057  SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
3058  if (Op0.getOpcode() == ISD::TRUNCATE)
3059  Op0 = Op0.getOperand(0);
3060  if (Op1.getOpcode() == ISD::TRUNCATE)
3061  Op1 = Op1.getOperand(0);
3062 
3063  if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
3064  Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
3065  isa<ConstantSDNode>(Op0.getOperand(1))) {
3066 
3067  unsigned Bits = Op0.getValueType().getSizeInBits();
3068  if (b != Bits/8-1)
3069  return false;
3070  if (Op0.getConstantOperandVal(1) != Bits-8)
3071  return false;
3072 
3073  LHS = Op0.getOperand(0);
3074  RHS = Op1.getOperand(0);
3075  return true;
3076  }
3077 
3078  // When we have small integers (i16 to be specific), the form present
3079  // post-legalization uses SETULT in the SELECT_CC for the
3080  // higher-order byte, depending on the fact that the
3081  // even-higher-order bytes are known to all be zero, for example:
3082  // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
3083  // (so when the second byte is the same, because all higher-order
3084  // bits from bytes 3 and 4 are known to be zero, the result of the
3085  // xor can be at most 255)
3086  if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
3087  isa<ConstantSDNode>(O.getOperand(1))) {
3088 
3089  uint64_t ULim = O.getConstantOperandVal(1);
3090  if (ULim != (UINT64_C(1) << b*8))
3091  return false;
3092 
3093  // Now we need to make sure that the upper bytes are known to be
3094  // zero.
3095  unsigned Bits = Op0.getValueType().getSizeInBits();
3096  if (!CurDAG->MaskedValueIsZero(Op0,
3097  APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
3098  return false;
3099 
3100  LHS = Op0.getOperand(0);
3101  RHS = Op0.getOperand(1);
3102  return true;
3103  }
3104 
3105  return false;
3106  }
3107 
3108  if (CC != ISD::SETEQ)
3109  return false;
3110 
3111  SDValue Op = O.getOperand(0);
3112  if (Op.getOpcode() == ISD::AND) {
3113  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3114  return false;
3115  if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
3116  return false;
3117 
3118  SDValue XOR = Op.getOperand(0);
3119  if (XOR.getOpcode() == ISD::TRUNCATE)
3120  XOR = XOR.getOperand(0);
3121  if (XOR.getOpcode() != ISD::XOR)
3122  return false;
3123 
3124  LHS = XOR.getOperand(0);
3125  RHS = XOR.getOperand(1);
3126  return true;
3127  } else if (Op.getOpcode() == ISD::SRL) {
3128  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3129  return false;
3130  unsigned Bits = Op.getValueType().getSizeInBits();
3131  if (b != Bits/8-1)
3132  return false;
3133  if (Op.getConstantOperandVal(1) != Bits-8)
3134  return false;
3135 
3136  SDValue XOR = Op.getOperand(0);
3137  if (XOR.getOpcode() == ISD::TRUNCATE)
3138  XOR = XOR.getOperand(0);
3139  if (XOR.getOpcode() != ISD::XOR)
3140  return false;
3141 
3142  LHS = XOR.getOperand(0);
3143  RHS = XOR.getOperand(1);
3144  return true;
3145  }
3146 
3147  return false;
3148  };
3149 
3150  SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
3151  while (!Queue.empty()) {
3152  SDValue V = Queue.pop_back_val();
3153 
3154  for (const SDValue &O : V.getNode()->ops()) {
3155  unsigned b;
3156  uint64_t M = 0, A = 0;
3157  SDValue OLHS, ORHS;
3158  if (O.getOpcode() == ISD::OR) {
3159  Queue.push_back(O);
3160  } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
3161  if (!LHS) {
3162  LHS = OLHS;
3163  RHS = ORHS;
3164  BytesFound[b] = true;
3165  Mask |= M;
3166  Alt |= A;
3167  } else if ((LHS == ORHS && RHS == OLHS) ||
3168  (RHS == ORHS && LHS == OLHS)) {
3169  BytesFound[b] = true;
3170  Mask |= M;
3171  Alt |= A;
3172  } else {
3173  return Res;
3174  }
3175  } else {
3176  return Res;
3177  }
3178  }
3179  }
3180 
3181  unsigned LastB = 0, BCnt = 0;
3182  for (unsigned i = 0; i < 8; ++i)
3183  if (BytesFound[LastB]) {
3184  ++BCnt;
3185  LastB = i;
3186  }
3187 
3188  if (!LastB || BCnt < 2)
3189  return Res;
3190 
3191  // Because we'll be zero-extending the output anyway if don't have a specific
3192  // value for each input byte (via the Mask), we can 'anyext' the inputs.
3193  if (LHS.getValueType() != VT) {
3194  LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
3195  RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
3196  }
3197 
3198  Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
3199 
3200  bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
3201  if (NonTrivialMask && !Alt) {
3202  // Res = Mask & CMPB
3203  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
3204  CurDAG->getConstant(Mask, dl, VT));
3205  } else if (Alt) {
3206  // Res = (CMPB & Mask) | (~CMPB & Alt)
3207  // Which, as suggested here:
3208  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
3209  // can be written as:
3210  // Res = Alt ^ ((Alt ^ Mask) & CMPB)
3211  // useful because the (Alt ^ Mask) can be pre-computed.
3212  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
3213  CurDAG->getConstant(Mask ^ Alt, dl, VT));
3214  Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
3215  CurDAG->getConstant(Alt, dl, VT));
3216  }
3217 
3218  return Res;
3219 }
3220 
3221 // When CR bit registers are enabled, an extension of an i1 variable to a i32
3222 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
3223 // involves constant materialization of a 0 or a 1 or both. If the result of
3224 // the extension is then operated upon by some operator that can be constant
3225 // folded with a constant 0 or 1, and that constant can be materialized using
3226 // only one instruction (like a zero or one), then we should fold in those
3227 // operations with the select.
3228 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
3229  if (!PPCSubTarget->useCRBits())
3230  return;
3231 
3232  if (N->getOpcode() != ISD::ZERO_EXTEND &&
3233  N->getOpcode() != ISD::SIGN_EXTEND &&
3234  N->getOpcode() != ISD::ANY_EXTEND)
3235  return;
3236 
3237  if (N->getOperand(0).getValueType() != MVT::i1)
3238  return;
3239 
3240  if (!N->hasOneUse())
3241  return;
3242 
3243  SDLoc dl(N);
3244  EVT VT = N->getValueType(0);
3245  SDValue Cond = N->getOperand(0);
3246  SDValue ConstTrue =
3247  CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
3248  SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
3249 
3250  do {
3251  SDNode *User = *N->use_begin();
3252  if (User->getNumOperands() != 2)
3253  break;
3254 
3255  auto TryFold = [this, N, User, dl](SDValue Val) {
3256  SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
3257  SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
3258  SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
3259 
3260  return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
3261  User->getValueType(0),
3262  O0.getNode(), O1.getNode());
3263  };
3264 
3265  SDValue TrueRes = TryFold(ConstTrue);
3266  if (!TrueRes)
3267  break;
3268  SDValue FalseRes = TryFold(ConstFalse);
3269  if (!FalseRes)
3270  break;
3271 
3272  // For us to materialize these using one instruction, we must be able to
3273  // represent them as signed 16-bit integers.
3274  uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
3275  False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
3276  if (!isInt<16>(True) || !isInt<16>(False))
3277  break;
3278 
3279  // We can replace User with a new SELECT node, and try again to see if we
3280  // can fold the select with its user.
3281  Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
3282  N = User;
3283  ConstTrue = TrueRes;
3284  ConstFalse = FalseRes;
3285  } while (N->hasOneUse());
3286 }
3287 
3288 void PPCDAGToDAGISel::PreprocessISelDAG() {
3289  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
3290  ++Position;
3291 
3292  bool MadeChange = false;
3293  while (Position != CurDAG->allnodes_begin()) {
3294  SDNode *N = --Position;
3295  if (N->use_empty())
3296  continue;
3297 
3298  SDValue Res;
3299  switch (N->getOpcode()) {
3300  default: break;
3301  case ISD::OR:
3302  Res = combineToCMPB(N);
3303  break;
3304  }
3305 
3306  if (!Res)
3307  foldBoolExts(Res, N);
3308 
3309  if (Res) {
3310  DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
3311  DEBUG(N->dump(CurDAG));
3312  DEBUG(dbgs() << "\nNew: ");
3313  DEBUG(Res.getNode()->dump(CurDAG));
3314  DEBUG(dbgs() << "\n");
3315 
3316  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
3317  MadeChange = true;
3318  }
3319  }
3320 
3321  if (MadeChange)
3322  CurDAG->RemoveDeadNodes();
3323 }
3324 
3325 /// PostprocessISelDAG - Perform some late peephole optimizations
3326 /// on the DAG representation.
3327 void PPCDAGToDAGISel::PostprocessISelDAG() {
3328 
3329  // Skip peepholes at -O0.
3330  if (TM.getOptLevel() == CodeGenOpt::None)
3331  return;
3332 
3333  PeepholePPC64();
3334  PeepholeCROps();
3335  PeepholePPC64ZExt();
3336 }
3337 
3338 // Check if all users of this node will become isel where the second operand
3339 // is the constant zero. If this is so, and if we can negate the condition,
3340 // then we can flip the true and false operands. This will allow the zero to
3341 // be folded with the isel so that we don't need to materialize a register
3342 // containing zero.
3343 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
3344  // If we're not using isel, then this does not matter.
3345  if (!PPCSubTarget->hasISEL())
3346  return false;
3347 
3348  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
3349  UI != UE; ++UI) {
3350  SDNode *User = *UI;
3351  if (!User->isMachineOpcode())
3352  return false;
3353  if (User->getMachineOpcode() != PPC::SELECT_I4 &&
3354  User->getMachineOpcode() != PPC::SELECT_I8)
3355  return false;
3356 
3357  SDNode *Op2 = User->getOperand(2).getNode();
3358  if (!Op2->isMachineOpcode())
3359  return false;
3360 
3361  if (Op2->getMachineOpcode() != PPC::LI &&
3362  Op2->getMachineOpcode() != PPC::LI8)
3363  return false;
3364 
3366  if (!C)
3367  return false;
3368 
3369  if (!C->isNullValue())
3370  return false;
3371  }
3372 
3373  return true;
3374 }
3375 
3376 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
3377  SmallVector<SDNode *, 4> ToReplace;
3378  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
3379  UI != UE; ++UI) {
3380  SDNode *User = *UI;
3381  assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
3382  User->getMachineOpcode() == PPC::SELECT_I8) &&
3383  "Must have all select users");
3384  ToReplace.push_back(User);
3385  }
3386 
3387  for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
3388  UE = ToReplace.end(); UI != UE; ++UI) {
3389  SDNode *User = *UI;
3390  SDNode *ResNode =
3391  CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
3392  User->getValueType(0), User->getOperand(0),
3393  User->getOperand(2),
3394  User->getOperand(1));
3395 
3396  DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
3397  DEBUG(User->dump(CurDAG));
3398  DEBUG(dbgs() << "\nNew: ");
3399  DEBUG(ResNode->dump(CurDAG));
3400  DEBUG(dbgs() << "\n");
3401 
3402  ReplaceUses(User, ResNode);
3403  }
3404 }
3405 
3406 void PPCDAGToDAGISel::PeepholeCROps() {
3407  bool IsModified;
3408  do {
3409  IsModified = false;
3410  for (SDNode &Node : CurDAG->allnodes()) {
3411  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
3412  if (!MachineNode || MachineNode->use_empty())
3413  continue;
3414  SDNode *ResNode = MachineNode;
3415 
3416  bool Op1Set = false, Op1Unset = false,
3417  Op1Not = false,
3418  Op2Set = false, Op2Unset = false,
3419  Op2Not = false;
3420 
3421  unsigned Opcode = MachineNode->getMachineOpcode();
3422  switch (Opcode) {
3423  default: break;
3424  case PPC::CRAND:
3425  case PPC::CRNAND:
3426  case PPC::CROR:
3427  case PPC::CRXOR:
3428  case PPC::CRNOR:
3429  case PPC::CREQV:
3430  case PPC::CRANDC:
3431  case PPC::CRORC: {
3432  SDValue Op = MachineNode->getOperand(1);
3433  if (Op.isMachineOpcode()) {
3434  if (Op.getMachineOpcode() == PPC::CRSET)
3435  Op2Set = true;
3436  else if (Op.getMachineOpcode() == PPC::CRUNSET)
3437  Op2Unset = true;
3438  else if (Op.getMachineOpcode() == PPC::CRNOR &&
3439  Op.getOperand(0) == Op.getOperand(1))
3440  Op2Not = true;
3441  }
3442  } // fallthrough
3443  case PPC::BC:
3444  case PPC::BCn:
3445  case PPC::SELECT_I4:
3446  case PPC::SELECT_I8:
3447  case PPC::SELECT_F4:
3448  case PPC::SELECT_F8:
3449  case PPC::SELECT_QFRC:
3450  case PPC::SELECT_QSRC:
3451  case PPC::SELECT_QBRC:
3452  case PPC::SELECT_VRRC:
3453  case PPC::SELECT_VSFRC:
3454  case PPC::SELECT_VSSRC:
3455  case PPC::SELECT_VSRC: {
3456  SDValue Op = MachineNode->getOperand(0);
3457  if (Op.isMachineOpcode()) {
3458  if (Op.getMachineOpcode() == PPC::CRSET)
3459  Op1Set = true;
3460  else if (Op.getMachineOpcode() == PPC::CRUNSET)
3461  Op1Unset = true;
3462  else if (Op.getMachineOpcode() == PPC::CRNOR &&
3463  Op.getOperand(0) == Op.getOperand(1))
3464  Op1Not = true;
3465  }
3466  }
3467  break;
3468  }
3469 
3470  bool SelectSwap = false;
3471  switch (Opcode) {
3472  default: break;
3473  case PPC::CRAND:
3474  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3475  // x & x = x
3476  ResNode = MachineNode->getOperand(0).getNode();
3477  else if (Op1Set)
3478  // 1 & y = y
3479  ResNode = MachineNode->getOperand(1).getNode();
3480  else if (Op2Set)
3481  // x & 1 = x
3482  ResNode = MachineNode->getOperand(0).getNode();
3483  else if (Op1Unset || Op2Unset)
3484  // x & 0 = 0 & y = 0
3485  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3486  MVT::i1);
3487  else if (Op1Not)
3488  // ~x & y = andc(y, x)
3489  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3490  MVT::i1, MachineNode->getOperand(1),
3491  MachineNode->getOperand(0).
3492  getOperand(0));
3493  else if (Op2Not)
3494  // x & ~y = andc(x, y)
3495  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3496  MVT::i1, MachineNode->getOperand(0),
3497  MachineNode->getOperand(1).
3498  getOperand(0));
3499  else if (AllUsersSelectZero(MachineNode))
3500  ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
3501  MVT::i1, MachineNode->getOperand(0),
3502  MachineNode->getOperand(1)),
3503  SelectSwap = true;
3504  break;
3505  case PPC::CRNAND:
3506  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3507  // nand(x, x) -> nor(x, x)
3508  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3509  MVT::i1, MachineNode->getOperand(0),
3510  MachineNode->getOperand(0));
3511  else if (Op1Set)
3512  // nand(1, y) -> nor(y, y)
3513  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3514  MVT::i1, MachineNode->getOperand(1),
3515  MachineNode->getOperand(1));
3516  else if (Op2Set)
3517  // nand(x, 1) -> nor(x, x)
3518  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3519  MVT::i1, MachineNode->getOperand(0),
3520  MachineNode->getOperand(0));
3521  else if (Op1Unset || Op2Unset)
3522  // nand(x, 0) = nand(0, y) = 1
3523  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3524  MVT::i1);
3525  else if (Op1Not)
3526  // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
3527  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3528  MVT::i1, MachineNode->getOperand(0).
3529  getOperand(0),
3530  MachineNode->getOperand(1));
3531  else if (Op2Not)
3532  // nand(x, ~y) = ~x | y = orc(y, x)
3533  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3534  MVT::i1, MachineNode->getOperand(1).
3535  getOperand(0),
3536  MachineNode->getOperand(0));
3537  else if (AllUsersSelectZero(MachineNode))
3538  ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
3539  MVT::i1, MachineNode->getOperand(0),
3540  MachineNode->getOperand(1)),
3541  SelectSwap = true;
3542  break;
3543  case PPC::CROR:
3544  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3545  // x | x = x
3546  ResNode = MachineNode->getOperand(0).getNode();
3547  else if (Op1Set || Op2Set)
3548  // x | 1 = 1 | y = 1
3549  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3550  MVT::i1);
3551  else if (Op1Unset)
3552  // 0 | y = y
3553  ResNode = MachineNode->getOperand(1).getNode();
3554  else if (Op2Unset)
3555  // x | 0 = x
3556  ResNode = MachineNode->getOperand(0).getNode();
3557  else if (Op1Not)
3558  // ~x | y = orc(y, x)
3559  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3560  MVT::i1, MachineNode->getOperand(1),
3561  MachineNode->getOperand(0).
3562  getOperand(0));
3563  else if (Op2Not)
3564  // x | ~y = orc(x, y)
3565  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3566  MVT::i1, MachineNode->getOperand(0),
3567  MachineNode->getOperand(1).
3568  getOperand(0));
3569  else if (AllUsersSelectZero(MachineNode))
3570  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3571  MVT::i1, MachineNode->getOperand(0),
3572  MachineNode->getOperand(1)),
3573  SelectSwap = true;
3574  break;
3575  case PPC::CRXOR:
3576  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3577  // xor(x, x) = 0
3578  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3579  MVT::i1);
3580  else if (Op1Set)
3581  // xor(1, y) -> nor(y, y)
3582  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3583  MVT::i1, MachineNode->getOperand(1),
3584  MachineNode->getOperand(1));
3585  else if (Op2Set)
3586  // xor(x, 1) -> nor(x, x)
3587  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3588  MVT::i1, MachineNode->getOperand(0),
3589  MachineNode->getOperand(0));
3590  else if (Op1Unset)
3591  // xor(0, y) = y
3592  ResNode = MachineNode->getOperand(1).getNode();
3593  else if (Op2Unset)
3594  // xor(x, 0) = x
3595  ResNode = MachineNode->getOperand(0).getNode();
3596  else if (Op1Not)
3597  // xor(~x, y) = eqv(x, y)
3598  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3599  MVT::i1, MachineNode->getOperand(0).
3600  getOperand(0),
3601  MachineNode->getOperand(1));
3602  else if (Op2Not)
3603  // xor(x, ~y) = eqv(x, y)
3604  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3605  MVT::i1, MachineNode->getOperand(0),
3606  MachineNode->getOperand(1).
3607  getOperand(0));
3608  else if (AllUsersSelectZero(MachineNode))
3609  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3610  MVT::i1, MachineNode->getOperand(0),
3611  MachineNode->getOperand(1)),
3612  SelectSwap = true;
3613  break;
3614  case PPC::CRNOR:
3615  if (Op1Set || Op2Set)
3616  // nor(1, y) -> 0
3617  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3618  MVT::i1);
3619  else if (Op1Unset)
3620  // nor(0, y) = ~y -> nor(y, y)
3621  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3622  MVT::i1, MachineNode->getOperand(1),
3623  MachineNode->getOperand(1));
3624  else if (Op2Unset)
3625  // nor(x, 0) = ~x
3626  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3627  MVT::i1, MachineNode->getOperand(0),
3628  MachineNode->getOperand(0));
3629  else if (Op1Not)
3630  // nor(~x, y) = andc(x, y)
3631  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3632  MVT::i1, MachineNode->getOperand(0).
3633  getOperand(0),
3634  MachineNode->getOperand(1));
3635  else if (Op2Not)
3636  // nor(x, ~y) = andc(y, x)
3637  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3638  MVT::i1, MachineNode->getOperand(1).
3639  getOperand(0),
3640  MachineNode->getOperand(0));
3641  else if (AllUsersSelectZero(MachineNode))
3642  ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
3643  MVT::i1, MachineNode->getOperand(0),
3644  MachineNode->getOperand(1)),
3645  SelectSwap = true;
3646  break;
3647  case PPC::CREQV:
3648  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3649  // eqv(x, x) = 1
3650  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3651  MVT::i1);
3652  else if (Op1Set)
3653  // eqv(1, y) = y
3654  ResNode = MachineNode->getOperand(1).getNode();
3655  else if (Op2Set)
3656  // eqv(x, 1) = x
3657  ResNode = MachineNode->getOperand(0).getNode();
3658  else if (Op1Unset)
3659  // eqv(0, y) = ~y -> nor(y, y)
3660  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3661  MVT::i1, MachineNode->getOperand(1),
3662  MachineNode->getOperand(1));
3663  else if (Op2Unset)
3664  // eqv(x, 0) = ~x
3665  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3666  MVT::i1, MachineNode->getOperand(0),
3667  MachineNode->getOperand(0));
3668  else if (Op1Not)
3669  // eqv(~x, y) = xor(x, y)
3670  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
3671  MVT::i1, MachineNode->getOperand(0).
3672  getOperand(0),
3673  MachineNode->getOperand(1));
3674  else if (Op2Not)
3675  // eqv(x, ~y) = xor(x, y)
3676  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
3677  MVT::i1, MachineNode->getOperand(0),
3678  MachineNode->getOperand(1).
3679  getOperand(0));
3680  else if (AllUsersSelectZero(MachineNode))
3681  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
3682  MVT::i1, MachineNode->getOperand(0),
3683  MachineNode->getOperand(1)),
3684  SelectSwap = true;
3685  break;
3686  case PPC::CRANDC:
3687  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3688  // andc(x, x) = 0
3689  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3690  MVT::i1);
3691  else if (Op1Set)
3692  // andc(1, y) = ~y
3693  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3694  MVT::i1, MachineNode->getOperand(1),
3695  MachineNode->getOperand(1));
3696  else if (Op1Unset || Op2Set)
3697  // andc(0, y) = andc(x, 1) = 0
3698  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3699  MVT::i1);
3700  else if (Op2Unset)
3701  // andc(x, 0) = x
3702  ResNode = MachineNode->getOperand(0).getNode();
3703  else if (Op1Not)
3704  // andc(~x, y) = ~(x | y) = nor(x, y)
3705  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3706  MVT::i1, MachineNode->getOperand(0).
3707  getOperand(0),
3708  MachineNode->getOperand(1));
3709  else if (Op2Not)
3710  // andc(x, ~y) = x & y
3711  ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
3712  MVT::i1, MachineNode->getOperand(0),
3713  MachineNode->getOperand(1).
3714  getOperand(0));
3715  else if (AllUsersSelectZero(MachineNode))
3716  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3717  MVT::i1, MachineNode->getOperand(1),
3718  MachineNode->getOperand(0)),
3719  SelectSwap = true;
3720  break;
3721  case PPC::CRORC:
3722  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3723  // orc(x, x) = 1
3724  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3725  MVT::i1);
3726  else if (Op1Set || Op2Unset)
3727  // orc(1, y) = orc(x, 0) = 1
3728  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3729  MVT::i1);
3730  else if (Op2Set)
3731  // orc(x, 1) = x
3732  ResNode = MachineNode->getOperand(0).getNode();
3733  else if (Op1Unset)
3734  // orc(0, y) = ~y
3735  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3736  MVT::i1, MachineNode->getOperand(1),
3737  MachineNode->getOperand(1));
3738  else if (Op1Not)
3739  // orc(~x, y) = ~(x & y) = nand(x, y)
3740  ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
3741  MVT::i1, MachineNode->getOperand(0).
3742  getOperand(0),
3743  MachineNode->getOperand(1));
3744  else if (Op2Not)
3745  // orc(x, ~y) = x | y
3746  ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
3747  MVT::i1, MachineNode->getOperand(0),
3748  MachineNode->getOperand(1).
3749  getOperand(0));
3750  else if (AllUsersSelectZero(MachineNode))
3751  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3752  MVT::i1, MachineNode->getOperand(1),
3753  MachineNode->getOperand(0)),
3754  SelectSwap = true;
3755  break;
3756  case PPC::SELECT_I4:
3757  case PPC::SELECT_I8:
3758  case PPC::SELECT_F4:
3759  case PPC::SELECT_F8:
3760  case PPC::SELECT_QFRC:
3761  case PPC::SELECT_QSRC:
3762  case PPC::SELECT_QBRC:
3763  case PPC::SELECT_VRRC:
3764  case PPC::SELECT_VSFRC:
3765  case PPC::SELECT_VSSRC:
3766  case PPC::SELECT_VSRC:
3767  if (Op1Set)
3768  ResNode = MachineNode->getOperand(1).getNode();
3769  else if (Op1Unset)
3770  ResNode = MachineNode->getOperand(2).getNode();
3771  else if (Op1Not)
3772  ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
3773  SDLoc(MachineNode),
3774  MachineNode->getValueType(0),
3775  MachineNode->getOperand(0).
3776  getOperand(0),
3777  MachineNode->getOperand(2),
3778  MachineNode->getOperand(1));
3779  break;
3780  case PPC::BC:
3781  case PPC::BCn:
3782  if (Op1Not)
3783  ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
3784  PPC::BC,
3785  SDLoc(MachineNode),
3786  MVT::Other,
3787  MachineNode->getOperand(0).
3788  getOperand(0),
3789  MachineNode->getOperand(1),
3790  MachineNode->getOperand(2));
3791  // FIXME: Handle Op1Set, Op1Unset here too.
3792  break;
3793  }
3794 
3795  // If we're inverting this node because it is used only by selects that
3796  // we'd like to swap, then swap the selects before the node replacement.
3797  if (SelectSwap)
3798  SwapAllSelectUsers(MachineNode);
3799 
3800  if (ResNode != MachineNode) {
3801  DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
3802  DEBUG(MachineNode->dump(CurDAG));
3803  DEBUG(dbgs() << "\nNew: ");
3804  DEBUG(ResNode->dump(CurDAG));
3805  DEBUG(dbgs() << "\n");
3806 
3807  ReplaceUses(MachineNode, ResNode);
3808  IsModified = true;
3809  }
3810  }
3811  if (IsModified)
3812  CurDAG->RemoveDeadNodes();
3813  } while (IsModified);
3814 }
3815 
3816 // Gather the set of 32-bit operations that are known to have their
3817 // higher-order 32 bits zero, where ToPromote contains all such operations.
3819  SmallPtrSetImpl<SDNode *> &ToPromote) {
3820  if (!Op32.isMachineOpcode())
3821  return false;
3822 
3823  // First, check for the "frontier" instructions (those that will clear the
3824  // higher-order 32 bits.
3825 
3826  // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
3827  // around. If it does not, then these instructions will clear the
3828  // higher-order bits.
3829  if ((Op32.getMachineOpcode() == PPC::RLWINM ||
3830  Op32.getMachineOpcode() == PPC::RLWNM) &&
3831  Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
3832  ToPromote.insert(Op32.getNode());
3833  return true;
3834  }
3835 
3836  // SLW and SRW always clear the higher-order bits.
3837  if (Op32.getMachineOpcode() == PPC::SLW ||
3838  Op32.getMachineOpcode() == PPC::SRW) {
3839  ToPromote.insert(Op32.getNode());
3840  return true;
3841  }
3842 
3843  // For LI and LIS, we need the immediate to be positive (so that it is not
3844  // sign extended).
3845  if (Op32.getMachineOpcode() == PPC::LI ||
3846  Op32.getMachineOpcode() == PPC::LIS) {
3847  if (!isUInt<15>(Op32.getConstantOperandVal(0)))
3848  return false;
3849 
3850  ToPromote.insert(Op32.getNode());
3851  return true;
3852  }
3853 
3854  // LHBRX and LWBRX always clear the higher-order bits.
3855  if (Op32.getMachineOpcode() == PPC::LHBRX ||
3856  Op32.getMachineOpcode() == PPC::LWBRX) {
3857  ToPromote.insert(Op32.getNode());
3858  return true;
3859  }
3860 
3861  // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
3862  if (Op32.getMachineOpcode() == PPC::CNTLZW) {
3863  ToPromote.insert(Op32.getNode());
3864  return true;
3865  }
3866 
3867  // Next, check for those instructions we can look through.
3868 
3869  // Assuming the mask does not wrap around, then the higher-order bits are
3870  // taken directly from the first operand.
3871  if (Op32.getMachineOpcode() == PPC::RLWIMI &&
3872  Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
3873  SmallPtrSet<SDNode *, 16> ToPromote1;
3874  if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
3875  return false;
3876 
3877  ToPromote.insert(Op32.getNode());
3878  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
3879  return true;
3880  }
3881 
3882  // For OR, the higher-order bits are zero if that is true for both operands.
3883  // For SELECT_I4, the same is true (but the relevant operand numbers are
3884  // shifted by 1).
3885  if (Op32.getMachineOpcode() == PPC::OR ||
3886  Op32.getMachineOpcode() == PPC::SELECT_I4) {
3887  unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
3888  SmallPtrSet<SDNode *, 16> ToPromote1;
3889  if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
3890  return false;
3891  if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
3892  return false;
3893 
3894  ToPromote.insert(Op32.getNode());
3895  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
3896  return true;
3897  }
3898 
3899  // For ORI and ORIS, we need the higher-order bits of the first operand to be
3900  // zero, and also for the constant to be positive (so that it is not sign
3901  // extended).
3902  if (Op32.getMachineOpcode() == PPC::ORI ||
3903  Op32.getMachineOpcode() == PPC::ORIS) {
3904  SmallPtrSet<SDNode *, 16> ToPromote1;
3905  if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
3906  return false;
3907  if (!isUInt<15>(Op32.getConstantOperandVal(1)))
3908  return false;
3909 
3910  ToPromote.insert(Op32.getNode());
3911  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
3912  return true;
3913  }
3914 
3915  // The higher-order bits of AND are zero if that is true for at least one of
3916  // the operands.
3917  if (Op32.getMachineOpcode() == PPC::AND) {
3918  SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
3919  bool Op0OK =
3920  PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
3921  bool Op1OK =
3922  PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
3923  if (!Op0OK && !Op1OK)
3924  return false;
3925 
3926  ToPromote.insert(Op32.getNode());
3927 
3928  if (Op0OK)
3929  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
3930 
3931  if (Op1OK)
3932  ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
3933 
3934  return true;
3935  }
3936 
3937  // For ANDI and ANDIS, the higher-order bits are zero if either that is true
3938  // of the first operand, or if the second operand is positive (so that it is
3939  // not sign extended).
3940  if (Op32.getMachineOpcode() == PPC::ANDIo ||
3941  Op32.getMachineOpcode() == PPC::ANDISo) {
3942  SmallPtrSet<SDNode *, 16> ToPromote1;
3943  bool Op0OK =
3944  PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
3945  bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
3946  if (!Op0OK && !Op1OK)
3947  return false;
3948 
3949  ToPromote.insert(Op32.getNode());
3950 
3951  if (Op0OK)
3952  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
3953 
3954  return true;
3955  }
3956 
3957  return false;
3958 }
3959 
3960 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
3961  if (!PPCSubTarget->isPPC64())
3962  return;
3963 
3964  // When we zero-extend from i32 to i64, we use a pattern like this:
3965  // def : Pat<(i64 (zext i32:$in)),
3966  // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
3967  // 0, 32)>;
3968  // There are several 32-bit shift/rotate instructions, however, that will
3969  // clear the higher-order bits of their output, rendering the RLDICL
3970  // unnecessary. When that happens, we remove it here, and redefine the
3971  // relevant 32-bit operation to be a 64-bit operation.
3972 
3973  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
3974  ++Position;
3975 
3976  bool MadeChange = false;
3977  while (Position != CurDAG->allnodes_begin()) {
3978  SDNode *N = --Position;
3979  // Skip dead nodes and any non-machine opcodes.
3980  if (N->use_empty() || !N->isMachineOpcode())
3981  continue;
3982 
3983  if (N->getMachineOpcode() != PPC::RLDICL)
3984  continue;
3985 
3986  if (N->getConstantOperandVal(1) != 0 ||
3987  N->getConstantOperandVal(2) != 32)
3988  continue;
3989 
3990  SDValue ISR = N->getOperand(0);
3991  if (!ISR.isMachineOpcode() ||
3993  continue;
3994 
3995  if (!ISR.hasOneUse())
3996  continue;
3997 
3998  if (ISR.getConstantOperandVal(2) != PPC::sub_32)
3999  continue;
4000 
4001  SDValue IDef = ISR.getOperand(0);
4002  if (!IDef.isMachineOpcode() ||
4004  continue;
4005 
4006  // We now know that we're looking at a canonical i32 -> i64 zext. See if we
4007  // can get rid of it.
4008 
4009  SDValue Op32 = ISR->getOperand(1);
4010  if (!Op32.isMachineOpcode())
4011  continue;
4012 
4013  // There are some 32-bit instructions that always clear the high-order 32
4014  // bits, there are also some instructions (like AND) that we can look
4015  // through.
4016  SmallPtrSet<SDNode *, 16> ToPromote;
4017  if (!PeepholePPC64ZExtGather(Op32, ToPromote))
4018  continue;
4019 
4020  // If the ToPromote set contains nodes that have uses outside of the set
4021  // (except for the original INSERT_SUBREG), then abort the transformation.
4022  bool OutsideUse = false;
4023  for (SDNode *PN : ToPromote) {
4024  for (SDNode *UN : PN->uses()) {
4025  if (!ToPromote.count(UN) && UN != ISR.getNode()) {
4026  OutsideUse = true;
4027  break;
4028  }
4029  }
4030 
4031  if (OutsideUse)
4032  break;
4033  }
4034  if (OutsideUse)
4035  continue;
4036 
4037  MadeChange = true;
4038 
4039  // We now know that this zero extension can be removed by promoting to
4040  // nodes in ToPromote to 64-bit operations, where for operations in the
4041  // frontier of the set, we need to insert INSERT_SUBREGs for their
4042  // operands.
4043  for (SDNode *PN : ToPromote) {
4044  unsigned NewOpcode;
4045  switch (PN->getMachineOpcode()) {
4046  default:
4047  llvm_unreachable("Don't know the 64-bit variant of this instruction");
4048  case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
4049  case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
4050  case PPC::SLW: NewOpcode = PPC::SLW8; break;
4051  case PPC::SRW: NewOpcode = PPC::SRW8; break;
4052  case PPC::LI: NewOpcode = PPC::LI8; break;
4053  case PPC::LIS: NewOpcode = PPC::LIS8; break;
4054  case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
4055  case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
4056  case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
4057  case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
4058  case PPC::OR: NewOpcode = PPC::OR8; break;
4059  case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
4060  case PPC::ORI: NewOpcode = PPC::ORI8; break;
4061  case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
4062  case PPC::AND: NewOpcode = PPC::AND8; break;
4063  case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break;
4064  case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break;
4065  }
4066 
4067  // Note: During the replacement process, the nodes will be in an
4068  // inconsistent state (some instructions will have operands with values
4069  // of the wrong type). Once done, however, everything should be right
4070  // again.
4071 
4073  for (const SDValue &V : PN->ops()) {
4074  if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
4075  !isa<ConstantSDNode>(V)) {
4076  SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
4077  SDNode *ReplOp =
4079  ISR.getNode()->getVTList(), ReplOpOps);
4080  Ops.push_back(SDValue(ReplOp, 0));
4081  } else {
4082  Ops.push_back(V);
4083  }
4084  }
4085 
4086  // Because all to-be-promoted nodes only have users that are other
4087  // promoted nodes (or the original INSERT_SUBREG), we can safely replace
4088  // the i32 result value type with i64.
4089 
4090  SmallVector<EVT, 2> NewVTs;
4091  SDVTList VTs = PN->getVTList();
4092  for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
4093  if (VTs.VTs[i] == MVT::i32)
4094  NewVTs.push_back(MVT::i64);
4095  else
4096  NewVTs.push_back(VTs.VTs[i]);
4097 
4098  DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
4099  DEBUG(PN->dump(CurDAG));
4100 
4101  CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
4102 
4103  DEBUG(dbgs() << "\nNew: ");
4104  DEBUG(PN->dump(CurDAG));
4105  DEBUG(dbgs() << "\n");
4106  }
4107 
4108  // Now we replace the original zero extend and its associated INSERT_SUBREG
4109  // with the value feeding the INSERT_SUBREG (which has now been promoted to
4110  // return an i64).
4111 
4112  DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
4113  DEBUG(N->dump(CurDAG));
4114  DEBUG(dbgs() << "\nNew: ");
4115  DEBUG(Op32.getNode()->dump(CurDAG));
4116  DEBUG(dbgs() << "\n");
4117 
4118  ReplaceUses(N, Op32.getNode());
4119  }
4120 
4121  if (MadeChange)
4122  CurDAG->RemoveDeadNodes();
4123 }
4124 
4125 void PPCDAGToDAGISel::PeepholePPC64() {
4126  // These optimizations are currently supported only for 64-bit SVR4.
4127  if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
4128  return;
4129 
4130  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
4131  ++Position;
4132 
4133  while (Position != CurDAG->allnodes_begin()) {
4134  SDNode *N = --Position;
4135  // Skip dead nodes and any non-machine opcodes.
4136  if (N->use_empty() || !N->isMachineOpcode())
4137  continue;
4138 
4139  unsigned FirstOp;
4140  unsigned StorageOpcode = N->getMachineOpcode();
4141 
4142  switch (StorageOpcode) {
4143  default: continue;
4144 
4145  case PPC::LBZ:
4146  case PPC::LBZ8:
4147  case PPC::LD:
4148  case PPC::LFD:
4149  case PPC::LFS:
4150  case PPC::LHA:
4151  case PPC::LHA8:
4152  case PPC::LHZ:
4153  case PPC::LHZ8:
4154  case PPC::LWA:
4155  case PPC::LWZ:
4156  case PPC::LWZ8:
4157  FirstOp = 0;
4158  break;
4159 
4160  case PPC::STB:
4161  case PPC::STB8:
4162  case PPC::STD:
4163  case PPC::STFD:
4164  case PPC::STFS:
4165  case PPC::STH:
4166  case PPC::STH8:
4167  case PPC::STW:
4168  case PPC::STW8:
4169  FirstOp = 1;
4170  break;
4171  }
4172 
4173  // If this is a load or store with a zero offset, we may be able to
4174  // fold an add-immediate into the memory operation.
4175  if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
4176  N->getConstantOperandVal(FirstOp) != 0)
4177  continue;
4178 
4179  SDValue Base = N->getOperand(FirstOp + 1);
4180  if (!Base.isMachineOpcode())
4181  continue;
4182 
4183  unsigned Flags = 0;
4184  bool ReplaceFlags = true;
4185 
4186  // When the feeding operation is an add-immediate of some sort,
4187  // determine whether we need to add relocation information to the
4188  // target flags on the immediate operand when we fold it into the
4189  // load instruction.
4190  //
4191  // For something like ADDItocL, the relocation information is
4192  // inferred from the opcode; when we process it in the AsmPrinter,
4193  // we add the necessary relocation there. A load, though, can receive
4194  // relocation from various flavors of ADDIxxx, so we need to carry
4195  // the relocation information in the target flags.
4196  switch (Base.getMachineOpcode()) {
4197  default: continue;
4198 
4199  case PPC::ADDI8:
4200  case PPC::ADDI:
4201  // In some cases (such as TLS) the relocation information
4202  // is already in place on the operand, so copying the operand
4203  // is sufficient.
4204  ReplaceFlags = false;
4205  // For these cases, the immediate may not be divisible by 4, in
4206  // which case the fold is illegal for DS-form instructions. (The
4207  // other cases provide aligned addresses and are always safe.)
4208  if ((StorageOpcode == PPC::LWA ||
4209  StorageOpcode == PPC::LD ||
4210  StorageOpcode == PPC::STD) &&
4211  (!isa<ConstantSDNode>(Base.getOperand(1)) ||
4212  Base.getConstantOperandVal(1) % 4 != 0))
4213  continue;
4214  break;
4215  case PPC::ADDIdtprelL:
4216  Flags = PPCII::MO_DTPREL_LO;
4217  break;
4218  case PPC::ADDItlsldL:
4219  Flags = PPCII::MO_TLSLD_LO;
4220  break;
4221  case PPC::ADDItocL:
4222  Flags = PPCII::MO_TOC_LO;
4223  break;
4224  }
4225 
4226  // We found an opportunity. Reverse the operands from the add
4227  // immediate and substitute them into the load or store. If
4228  // needed, update the target flags for the immediate operand to
4229  // reflect the necessary relocation information.
4230  DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
4231  DEBUG(Base->dump(CurDAG));
4232  DEBUG(dbgs() << "\nN: ");
4233  DEBUG(N->dump(CurDAG));
4234  DEBUG(dbgs() << "\n");
4235 
4236  SDValue ImmOpnd = Base.getOperand(1);
4237 
4238  // If the relocation information isn't already present on the
4239  // immediate operand, add it now.
4240  if (ReplaceFlags) {
4241  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
4242  SDLoc dl(GA);
4243  const GlobalValue *GV = GA->getGlobal();
4244  // We can't perform this optimization for data whose alignment
4245  // is insufficient for the instruction encoding.
4246  if (GV->getAlignment() < 4 &&
4247  (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
4248  StorageOpcode == PPC::LWA)) {
4249  DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
4250  continue;
4251  }
4252  ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
4253  } else if (ConstantPoolSDNode *CP =
4254  dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
4255  const Constant *C = CP->getConstVal();
4256  ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
4257  CP->getAlignment(),
4258  0, Flags);
4259  }
4260  }
4261 
4262  if (FirstOp == 1) // Store
4263  (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
4264  Base.getOperand(0), N->getOperand(3));
4265  else // Load
4266  (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
4267  N->getOperand(2));
4268 
4269  // The add-immediate may now be dead, in which case remove it.
4270  if (Base.getNode()->use_empty())
4271  CurDAG->RemoveDeadNode(Base.getNode());
4272  }
4273 }
4274 
4275 
4276 /// createPPCISelDag - This pass converts a legalized DAG into a
4277 /// PowerPC-specific DAG, ready for instruction scheduling.
4278 ///
4280  return new PPCDAGToDAGISel(TM);
4281 }
4282 
4284  const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
4285  PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID,
4286  nullptr, false, false);
4287  Registry.registerPass(*PI, true);
4288 }
4289 
4292 }
4293 
bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:276
bool use_empty() const
Return true if there are no uses of this node.
SDValue getValue(unsigned R) const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
Definition: MathExtras.h:206
static unsigned SelectInt64CountDirect(int64_t Imm)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:522
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
void dump() const
Dump this node, for debugging.
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:554
GPRC = address of GLOBAL_OFFSET_TABLE.
bool hasOneUse() const
Return true if there is exactly one use of this node.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:301
static unsigned index2VirtReg(unsigned Index)
index2VirtReg - Convert a 0-based index to a virtual register number.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDVTList getVTList() const
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:61
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:66
unsigned getID() const
getID() - Return the register class ID number.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:122
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:261
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
unsigned getNumOperands() const
Return the number of values used by this operation.
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
A debug info location.
Definition: DebugLoc.h:34
const SDValue & getOperand(unsigned Num) const
F(f)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
void setNodeId(int Id)
Set unique node id.
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
const SDValue & getBasePtr() const
unsigned int NumVTs
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
bool isUnsignedIntSetCC(CondCode Code)
isUnsignedIntSetCC - Return true if this is a setcc instruction that performs an unsigned comparison ...
Definition: ISDOpcodes.h:843
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:242
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode * > &ToPromote)
MachineMemOperand - A description of a memory reference used in the backend.
bool hasCommonLinkage() const
Definition: GlobalValue.h:282
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:332
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:407
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:467
Reg
All possible values of the reg field in the ModR/M byte.
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
unsigned getMachineOpcode() const
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:353
int getMaskElt(unsigned Idx) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
#define false
Definition: ConvertUTF.c:65
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:591
#define G(x, y, z)
Definition: MD5.cpp:52
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:326
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const MachineBasicBlock & front() const
static unsigned SelectInt64Count(int64_t Imm)
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:351
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
bool isMask_64(uint64_t Value)
isMask_64 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:335
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:804
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:116
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
Type * getElementType() const
Definition: DerivedTypes.h:323
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
TargetInstrInfo - Interface to description of machine instruction set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:513
SDNode * getNode() const
get the SDNode which holds the desired result
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
bundle_iterator< MachineInstr, instr_iterator > iterator
static bool isIntS16Immediate(SDNode *N, short &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
This is an important base class in LLVM.
Definition: Constant.h:41
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC)
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1900
INSERT_SUBREG - This instruction takes three operands: a register that has subregisters, a register providing an insert value, and a subregister index.
Definition: TargetOpcodes.h:49
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
This class provides iterator support for SDUse operands that use a specific SDNode.
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:164
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
unsigned getOpcode() const
void initializePPCDAGToDAGISelPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
iterator begin() const
Definition: SmallPtrSet.h:286
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
PassInfo class - An instance of this class exists for every pass known by the system, and can be obtained from a live Pass by calling its getPassInfo() method.
Definition: PassInfo.h:30
EVT - Extended Value Type.
Definition: ValueTypes.h:31
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:335
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
const SDValue & getOffset() const
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for ""bit permutations"), cl::Hidden)
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:484
SDValue FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, SDNode *Cst1, SDNode *Cst2)
EXTRACT_SUBREG - This instruction takes two operands: a register that has subregisters, and a subregister index.
Definition: TargetOpcodes.h:41
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition: Module.cpp:471
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:317
bool isFunctionTy() const
isFunctionTy - True if this is an instance of FunctionType.
Definition: Type.h:205
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
SDNode * SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isMachineOpcode() const
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
These values identify relocations on immediates folded into memory operations.
Definition: PPC.h:92
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after execu...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:73
bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:302
iterator_range< use_iterator > uses()
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2...
iterator end() const
Definition: SmallPtrSet.h:289
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:697
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:185
uint64_t getConstantOperandVal(unsigned i) const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:128
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
COPY_TO_REGCLASS - This instruction is a placeholder for a plain register-to-register copy into a spe...
Definition: TargetOpcodes.h:66
static SDNode * SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
ArrayRef< SDUse > ops() const
The CMPB instruction (takes two operands of i32 or i64).
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:272
static SDNode * SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm)
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSelect(SDLoc DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:739
unsigned getAlignment() const
Definition: Globals.cpp:63
const ARM::ArchExtKind Kind
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side...
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:332
static uint64_t Rot64(uint64_t Imm, unsigned R)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
LLVM Value Representation.
Definition: Value.h:69
SDValue getRegister(unsigned Reg, EVT VT)
#define CALL_ONCE_INITIALIZATION(function)
Definition: PassSupport.h:36
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:298
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
BasicBlockListType::iterator iterator
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:287
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
#define DEBUG(X)
Definition: Debug.h:92
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:41
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
Conversion operators.
Definition: ISDOpcodes.h:380
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:338
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
FunctionPass * createPPCISelDag(PPCTargetMachine &TM)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG, ready for instruction scheduling.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
static void initializePassOnce(PassRegistry &Registry)
void registerPass(const PassInfo &PI, bool ShouldFree=false)
registerPass - Register a pass (by means of its PassInfo) with the registry.
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
getPointerRegClass - Returns a TargetRegisterClass used for pointer values.
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:309
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:538
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
void resize(size_type N)
Definition: SmallVector.h:376
This class is used to represent ISD::LOAD nodes.