LLVM 20.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
16#include "PPC.h"
17#include "PPCISelLowering.h"
19#include "PPCSubtarget.h"
20#include "PPCTargetMachine.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/ADT/APSInt.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/Statistic.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/DebugLoc.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/InlineAsm.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/IntrinsicsPowerPC.h"
50#include "llvm/IR/Module.h"
55#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <cstdint>
63#include <iterator>
64#include <limits>
65#include <memory>
66#include <new>
67#include <tuple>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "ppc-isel"
73#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
74
75STATISTIC(NumSextSetcc,
76 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
77STATISTIC(NumZextSetcc,
78 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
79STATISTIC(SignExtensionsAdded,
80 "Number of sign extensions for compare inputs added.");
81STATISTIC(ZeroExtensionsAdded,
82 "Number of zero extensions for compare inputs added.");
83STATISTIC(NumLogicOpsOnComparison,
84 "Number of logical ops on i1 values calculated in GPR.");
85STATISTIC(OmittedForNonExtendUses,
86 "Number of compares not eliminated as they have non-extending uses.");
87STATISTIC(NumP9Setb,
88 "Number of compares lowered to setb.");
89
90// FIXME: Remove this once the bug has been fixed!
91cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
92cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
93
94static cl::opt<bool>
95 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
96 cl::desc("use aggressive ppc isel for bit permutations"),
99 "ppc-bit-perm-rewriter-stress-rotates",
100 cl::desc("stress rotate selection in aggressive ppc isel for "
101 "bit permutations"),
102 cl::Hidden);
103
105 "ppc-use-branch-hint", cl::init(true),
106 cl::desc("Enable static hinting of branches on ppc"),
107 cl::Hidden);
108
110 "ppc-tls-opt", cl::init(true),
111 cl::desc("Enable tls optimization peephole"),
112 cl::Hidden);
113
117
119 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
120 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
121 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
122 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
123 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
124 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
125 clEnumValN(ICGPR_NonExtIn, "nonextin",
126 "Only comparisons where inputs don't need [sz]ext."),
127 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
128 clEnumValN(ICGPR_ZextI32, "zexti32",
129 "Only i32 comparisons with zext result."),
130 clEnumValN(ICGPR_ZextI64, "zexti64",
131 "Only i64 comparisons with zext result."),
132 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
133 clEnumValN(ICGPR_SextI32, "sexti32",
134 "Only i32 comparisons with sext result."),
135 clEnumValN(ICGPR_SextI64, "sexti64",
136 "Only i64 comparisons with sext result.")));
137namespace {
138
139 //===--------------------------------------------------------------------===//
140 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
141 /// instructions for SelectionDAG operations.
142 ///
143 class PPCDAGToDAGISel : public SelectionDAGISel {
144 const PPCTargetMachine &TM;
145 const PPCSubtarget *Subtarget = nullptr;
146 const PPCTargetLowering *PPCLowering = nullptr;
147 unsigned GlobalBaseReg = 0;
148
149 public:
150 PPCDAGToDAGISel() = delete;
151
152 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
153 : SelectionDAGISel(tm, OptLevel), TM(tm) {}
154
155 bool runOnMachineFunction(MachineFunction &MF) override {
156 // Make sure we re-emit a set of the global base reg if necessary
157 GlobalBaseReg = 0;
158 Subtarget = &MF.getSubtarget<PPCSubtarget>();
159 PPCLowering = Subtarget->getTargetLowering();
160 if (Subtarget->hasROPProtect()) {
161 // Create a place on the stack for the ROP Protection Hash.
162 // The ROP Protection Hash will always be 8 bytes and aligned to 8
163 // bytes.
164 MachineFrameInfo &MFI = MF.getFrameInfo();
166 const int Result = MFI.CreateStackObject(8, Align(8), false);
168 }
170
171 return true;
172 }
173
174 void PreprocessISelDAG() override;
175 void PostprocessISelDAG() override;
176
177 /// getI16Imm - Return a target constant with the specified value, of type
178 /// i16.
179 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
180 return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
181 }
182
183 /// getI32Imm - Return a target constant with the specified value, of type
184 /// i32.
185 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
186 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
187 }
188
189 /// getI64Imm - Return a target constant with the specified value, of type
190 /// i64.
191 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
192 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
193 }
194
195 /// getSmallIPtrImm - Return a target constant of pointer type.
196 inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) {
197 return CurDAG->getTargetConstant(
198 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
199 }
200
201 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
202 /// rotate and mask opcode and mask operation.
203 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
204 unsigned &SH, unsigned &MB, unsigned &ME);
205
206 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
207 /// base register. Return the virtual register that holds this value.
208 SDNode *getGlobalBaseReg();
209
210 void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0);
211
212 // Select - Convert the specified operand from a target-independent to a
213 // target-specific node if it hasn't already been changed.
214 void Select(SDNode *N) override;
215
216 bool tryBitfieldInsert(SDNode *N);
217 bool tryBitPermutation(SDNode *N);
218 bool tryIntCompareInGPR(SDNode *N);
219
220 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
221 // an X-Form load instruction with the offset being a relocation coming from
222 // the PPCISD::ADD_TLS.
223 bool tryTLSXFormLoad(LoadSDNode *N);
224 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
225 // an X-Form store instruction with the offset being a relocation coming from
226 // the PPCISD::ADD_TLS.
227 bool tryTLSXFormStore(StoreSDNode *N);
228 /// SelectCC - Select a comparison of the specified values with the
229 /// specified condition code, returning the CR# of the expression.
230 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
231 const SDLoc &dl, SDValue Chain = SDValue());
232
233 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
234 /// immediate field. Note that the operand at this point is already the
235 /// result of a prior SelectAddressRegImm call.
236 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
237 if (N.getOpcode() == ISD::TargetConstant ||
238 N.getOpcode() == ISD::TargetGlobalAddress) {
239 Out = N;
240 return true;
241 }
242
243 return false;
244 }
245
246 /// SelectDSForm - Returns true if address N can be represented by the
247 /// addressing mode of DSForm instructions (a base register, plus a signed
248 /// 16-bit displacement that is a multiple of 4.
249 bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
250 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
251 Align(4)) == PPC::AM_DSForm;
252 }
253
254 /// SelectDQForm - Returns true if address N can be represented by the
255 /// addressing mode of DQForm instructions (a base register, plus a signed
256 /// 16-bit displacement that is a multiple of 16.
257 bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
258 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
259 Align(16)) == PPC::AM_DQForm;
260 }
261
262 /// SelectDForm - Returns true if address N can be represented by
263 /// the addressing mode of DForm instructions (a base register, plus a
264 /// signed 16-bit immediate.
265 bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
266 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
267 std::nullopt) == PPC::AM_DForm;
268 }
269
270 /// SelectPCRelForm - Returns true if address N can be represented by
271 /// PC-Relative addressing mode.
272 bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
273 SDValue &Base) {
274 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
275 std::nullopt) == PPC::AM_PCRel;
276 }
277
278 /// SelectPDForm - Returns true if address N can be represented by Prefixed
279 /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
280 bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
281 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
282 std::nullopt) ==
284 }
285
286 /// SelectXForm - Returns true if address N can be represented by the
287 /// addressing mode of XForm instructions (an indexed [r+r] operation).
288 bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
289 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
290 std::nullopt) == PPC::AM_XForm;
291 }
292
293 /// SelectForceXForm - Given the specified address, force it to be
294 /// represented as an indexed [r+r] operation (an XForm instruction).
295 bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
296 SDValue &Base) {
297 return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
299 }
300
301 /// SelectAddrIdx - Given the specified address, check to see if it can be
302 /// represented as an indexed [r+r] operation.
303 /// This is for xform instructions whose associated displacement form is D.
304 /// The last parameter \p 0 means associated D form has no requirment for 16
305 /// bit signed displacement.
306 /// Returns false if it can be represented by [r+imm], which are preferred.
307 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
308 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
309 std::nullopt);
310 }
311
312 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
313 /// represented as an indexed [r+r] operation.
314 /// This is for xform instructions whose associated displacement form is DS.
315 /// The last parameter \p 4 means associated DS form 16 bit signed
316 /// displacement must be a multiple of 4.
317 /// Returns false if it can be represented by [r+imm], which are preferred.
318 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
319 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
320 Align(4));
321 }
322
323 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
324 /// represented as an indexed [r+r] operation.
325 /// This is for xform instructions whose associated displacement form is DQ.
326 /// The last parameter \p 16 means associated DQ form 16 bit signed
327 /// displacement must be a multiple of 16.
328 /// Returns false if it can be represented by [r+imm], which are preferred.
329 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
330 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
331 Align(16));
332 }
333
334 /// SelectAddrIdxOnly - Given the specified address, force it to be
335 /// represented as an indexed [r+r] operation.
336 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
337 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
338 }
339
340 /// SelectAddrImm - Returns true if the address N can be represented by
341 /// a base register plus a signed 16-bit displacement [r+imm].
342 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
343 /// displacement.
344 bool SelectAddrImm(SDValue N, SDValue &Disp,
345 SDValue &Base) {
346 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
347 std::nullopt);
348 }
349
350 /// SelectAddrImmX4 - Returns true if the address N can be represented by
351 /// a base register plus a signed 16-bit displacement that is a multiple of
352 /// 4 (last parameter). Suitable for use by STD and friends.
353 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
354 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
355 }
356
357 /// SelectAddrImmX16 - Returns true if the address N can be represented by
358 /// a base register plus a signed 16-bit displacement that is a multiple of
359 /// 16(last parameter). Suitable for use by STXV and friends.
360 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
361 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
362 Align(16));
363 }
364
365 /// SelectAddrImmX34 - Returns true if the address N can be represented by
366 /// a base register plus a signed 34-bit displacement. Suitable for use by
367 /// PSTXVP and friends.
368 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
369 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
370 }
371
372 // Select an address into a single register.
373 bool SelectAddr(SDValue N, SDValue &Base) {
374 Base = N;
375 return true;
376 }
377
378 bool SelectAddrPCRel(SDValue N, SDValue &Base) {
379 return PPCLowering->SelectAddressPCRel(N, Base);
380 }
381
382 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
383 /// inline asm expressions. It is always correct to compute the value into
384 /// a register. The case of adding a (possibly relocatable) constant to a
385 /// register can be improved, but it is wrong to substitute Reg+Reg for
386 /// Reg in an asm, because the load or store opcode would have to change.
388 InlineAsm::ConstraintCode ConstraintID,
389 std::vector<SDValue> &OutOps) override {
390 switch(ConstraintID) {
391 default:
392 errs() << "ConstraintID: "
393 << InlineAsm::getMemConstraintName(ConstraintID) << "\n";
394 llvm_unreachable("Unexpected asm memory constraint");
401 // We need to make sure that this one operand does not end up in r0
402 // (because we might end up lowering this as 0(%op)).
403 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
404 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
405 SDLoc dl(Op);
406 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
407 SDValue NewOp =
408 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
409 dl, Op.getValueType(),
410 Op, RC), 0);
411
412 OutOps.push_back(NewOp);
413 return false;
414 }
415 return true;
416 }
417
418// Include the pieces autogenerated from the target description.
419#include "PPCGenDAGISel.inc"
420
421private:
422 bool trySETCC(SDNode *N);
423 bool tryFoldSWTestBRCC(SDNode *N);
424 bool trySelectLoopCountIntrinsic(SDNode *N);
425 bool tryAsSingleRLDICL(SDNode *N);
426 bool tryAsSingleRLDCL(SDNode *N);
427 bool tryAsSingleRLDICR(SDNode *N);
428 bool tryAsSingleRLWINM(SDNode *N);
429 bool tryAsSingleRLWINM8(SDNode *N);
430 bool tryAsSingleRLWIMI(SDNode *N);
431 bool tryAsPairOfRLDICL(SDNode *N);
432 bool tryAsSingleRLDIMI(SDNode *N);
433
434 void PeepholePPC64();
435 void PeepholePPC64ZExt();
436 void PeepholeCROps();
437
438 SDValue combineToCMPB(SDNode *N);
439 void foldBoolExts(SDValue &Res, SDNode *&N);
440
441 bool AllUsersSelectZero(SDNode *N);
442 void SwapAllSelectUsers(SDNode *N);
443
444 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
445 void transferMemOperands(SDNode *N, SDNode *Result);
446 };
447
448 class PPCDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
449 public:
450 static char ID;
451 explicit PPCDAGToDAGISelLegacy(PPCTargetMachine &tm,
452 CodeGenOptLevel OptLevel)
454 ID, std::make_unique<PPCDAGToDAGISel>(tm, OptLevel)) {}
455 };
456} // end anonymous namespace
457
458char PPCDAGToDAGISelLegacy::ID = 0;
459
460INITIALIZE_PASS(PPCDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
461
462/// getGlobalBaseReg - Output the instructions required to put the
463/// base address to use for accessing globals into a register.
464///
465SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
466 if (!GlobalBaseReg) {
467 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
468 // Insert the set of GlobalBaseReg into the first MBB of the function
469 MachineBasicBlock &FirstMBB = MF->front();
471 const Module *M = MF->getFunction().getParent();
472 DebugLoc dl;
473
474 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
475 if (Subtarget->isTargetELF()) {
476 GlobalBaseReg = PPC::R30;
477 if (!Subtarget->isSecurePlt() &&
478 M->getPICLevel() == PICLevel::SmallPIC) {
479 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
480 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
481 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
482 } else {
483 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
484 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
485 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
486 BuildMI(FirstMBB, MBBI, dl,
487 TII.get(PPC::UpdateGBR), GlobalBaseReg)
488 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
489 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
490 }
491 } else {
493 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
494 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
495 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
496 }
497 } else {
498 // We must ensure that this sequence is dominated by the prologue.
499 // FIXME: This is a bit of a big hammer since we don't get the benefits
500 // of shrink-wrapping whenever we emit this instruction. Considering
501 // this is used in any function where we emit a jump table, this may be
502 // a significant limitation. We should consider inserting this in the
503 // block where it is used and then commoning this sequence up if it
504 // appears in multiple places.
505 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
506 // MovePCtoLR8.
507 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
508 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
509 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
510 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
511 }
512 }
513 return CurDAG->getRegister(GlobalBaseReg,
514 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
515 .getNode();
516}
517
518// Check if a SDValue has the toc-data attribute.
519static bool hasTocDataAttr(SDValue Val) {
520 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
521 if (!GA)
522 return false;
523
524 const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
525 if (!GV)
526 return false;
527
528 if (!GV->hasAttribute("toc-data"))
529 return false;
530 return true;
531}
532
534 const TargetMachine &TM,
535 const SDNode *Node) {
536 // If there isn't an attribute to override the module code model
537 // this will be the effective code model.
538 CodeModel::Model ModuleModel = TM.getCodeModel();
539
540 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Node->getOperand(0));
541 if (!GA)
542 return ModuleModel;
543
544 const GlobalValue *GV = GA->getGlobal();
545 if (!GV)
546 return ModuleModel;
547
548 return Subtarget.getCodeModel(TM, GV);
549}
550
551/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
552/// operand. If so Imm will receive the 32-bit value.
553static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
554 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
555 Imm = N->getAsZExtVal();
556 return true;
557 }
558 return false;
559}
560
561/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
562/// operand. If so Imm will receive the 64-bit value.
563static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
564 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
565 Imm = N->getAsZExtVal();
566 return true;
567 }
568 return false;
569}
570
571// isInt32Immediate - This method tests to see if a constant operand.
572// If so Imm will receive the 32 bit value.
573static bool isInt32Immediate(SDValue N, unsigned &Imm) {
574 return isInt32Immediate(N.getNode(), Imm);
575}
576
577/// isInt64Immediate - This method tests to see if the value is a 64-bit
578/// constant operand. If so Imm will receive the 64-bit value.
579static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
580 return isInt64Immediate(N.getNode(), Imm);
581}
582
583static unsigned getBranchHint(unsigned PCC,
584 const FunctionLoweringInfo &FuncInfo,
585 const SDValue &DestMBB) {
586 assert(isa<BasicBlockSDNode>(DestMBB));
587
588 if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
589
590 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
591 const Instruction *BBTerm = BB->getTerminator();
592
593 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
594
595 const BasicBlock *TBB = BBTerm->getSuccessor(0);
596 const BasicBlock *FBB = BBTerm->getSuccessor(1);
597
598 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
599 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
600
601 // We only want to handle cases which are easy to predict at static time, e.g.
602 // C++ throw statement, that is very likely not taken, or calling never
603 // returned function, e.g. stdlib exit(). So we set Threshold to filter
604 // unwanted cases.
605 //
606 // Below is LLVM branch weight table, we only want to handle case 1, 2
607 //
608 // Case Taken:Nontaken Example
609 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
610 // 2. Invoke-terminating 1:1048575
611 // 3. Coldblock 4:64 __builtin_expect
612 // 4. Loop Branch 124:4 For loop
613 // 5. PH/ZH/FPH 20:12
614 const uint32_t Threshold = 10000;
615
616 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
617 return PPC::BR_NO_HINT;
618
619 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
620 << "::" << BB->getName() << "'\n"
621 << " -> " << TBB->getName() << ": " << TProb << "\n"
622 << " -> " << FBB->getName() << ": " << FProb << "\n");
623
624 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
625
626 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
627 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
628 if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
629 std::swap(TProb, FProb);
630
631 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
632}
633
634// isOpcWithIntImmediate - This method tests to see if the node is a specific
635// opcode and that it has a immediate integer right operand.
636// If so Imm will receive the 32 bit value.
637static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
638 return N->getOpcode() == Opc
639 && isInt32Immediate(N->getOperand(1).getNode(), Imm);
640}
641
642void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) {
643 SDLoc dl(SN);
644 int FI = cast<FrameIndexSDNode>(N)->getIndex();
645 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
646 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
647 if (SN->hasOneUse())
648 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
649 getSmallIPtrImm(Offset, dl));
650 else
651 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
652 getSmallIPtrImm(Offset, dl)));
653}
654
655bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
656 bool isShiftMask, unsigned &SH,
657 unsigned &MB, unsigned &ME) {
658 // Don't even go down this path for i64, since different logic will be
659 // necessary for rldicl/rldicr/rldimi.
660 if (N->getValueType(0) != MVT::i32)
661 return false;
662
663 unsigned Shift = 32;
664 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
665 unsigned Opcode = N->getOpcode();
666 if (N->getNumOperands() != 2 ||
667 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
668 return false;
669
670 if (Opcode == ISD::SHL) {
671 // apply shift left to mask if it comes first
672 if (isShiftMask) Mask = Mask << Shift;
673 // determine which bits are made indeterminant by shift
674 Indeterminant = ~(0xFFFFFFFFu << Shift);
675 } else if (Opcode == ISD::SRL) {
676 // apply shift right to mask if it comes first
677 if (isShiftMask) Mask = Mask >> Shift;
678 // determine which bits are made indeterminant by shift
679 Indeterminant = ~(0xFFFFFFFFu >> Shift);
680 // adjust for the left rotate
681 Shift = 32 - Shift;
682 } else if (Opcode == ISD::ROTL) {
683 Indeterminant = 0;
684 } else {
685 return false;
686 }
687
688 // if the mask doesn't intersect any Indeterminant bits
689 if (Mask && !(Mask & Indeterminant)) {
690 SH = Shift & 31;
691 // make sure the mask is still a mask (wrap arounds may not be)
692 return isRunOfOnes(Mask, MB, ME);
693 }
694 return false;
695}
696
697// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
698// instruction use the thread pointer.
700 assert(
701 Base.getOpcode() == PPCISD::ADD_TLS &&
702 "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
703 const PPCSubtarget &Subtarget =
705 SDValue ADDTLSOp1 = Base.getOperand(0);
706 unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
707
708 // Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
709 //
710 // Although ADD_TLS does not explicitly use the thread pointer
711 // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
712 // instruction will have a relocation specifier, @got@tprel, that is used to
713 // generate a GOT entry. The linker replaces this entry with an offset for a
714 // for a thread local variable, which will be relative to the thread pointer.
715 if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
716 return true;
717 // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
718 // node is produced instead to represent the aforementioned situation.
719 LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
720 if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
721 return true;
722
723 // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
724 // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
725 // later returning it into R3.
726 if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
727 return true;
728
729 // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
730 RegisterSDNode *AddFirstOpReg =
731 dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
732 if (AddFirstOpReg &&
733 AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
734 return true;
735
736 return false;
737}
738
739// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
740// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
741// operation, can be optimized to use an X-Form load or store, allowing the
742// ADD_TLS node to be removed completely.
744
745 // Do not do this transformation at -O0.
746 if (CurDAG->getTarget().getOptLevel() == CodeGenOptLevel::None)
747 return false;
748
749 // In order to perform this optimization inside tryTLSXForm[Load|Store],
750 // Base is expected to be an ADD_TLS node.
751 if (Base.getOpcode() != PPCISD::ADD_TLS)
752 return false;
753 for (auto *ADDTLSUse : Base.getNode()->uses()) {
754 // The optimization to convert the D-Form load/store into its X-Form
755 // counterpart should only occur if the source value offset of the load/
756 // store is 0. This also means that The offset should always be undefined.
757 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
758 if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
759 return false;
760 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
761 if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
762 return false;
763 } else // Don't optimize if there are ADD_TLS users that aren't load/stores.
764 return false;
765 }
766
767 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
768 return false;
769
770 // Does the ADD_TLS node of the load/store use the thread pointer?
771 // If the thread pointer is not used as one of the operands of ADD_TLS,
772 // then this optimization is not valid.
773 return isThreadPointerAcquisitionNode(Base, CurDAG);
774}
775
776bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
777 SDValue Base = ST->getBasePtr();
778 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
779 return false;
780
781 SDLoc dl(ST);
782 EVT MemVT = ST->getMemoryVT();
783 EVT RegVT = ST->getValue().getValueType();
784
785 unsigned Opcode;
786 switch (MemVT.getSimpleVT().SimpleTy) {
787 default:
788 return false;
789 case MVT::i8: {
790 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
791 break;
792 }
793 case MVT::i16: {
794 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
795 break;
796 }
797 case MVT::i32: {
798 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
799 break;
800 }
801 case MVT::i64: {
802 Opcode = PPC::STDXTLS;
803 break;
804 }
805 case MVT::f32: {
806 Opcode = PPC::STFSXTLS;
807 break;
808 }
809 case MVT::f64: {
810 Opcode = PPC::STFDXTLS;
811 break;
812 }
813 }
814 SDValue Chain = ST->getChain();
815 SDVTList VTs = ST->getVTList();
816 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
817 Chain};
818 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
819 transferMemOperands(ST, MN);
820 ReplaceNode(ST, MN);
821 return true;
822}
823
824bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
825 SDValue Base = LD->getBasePtr();
826 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
827 return false;
828
829 SDLoc dl(LD);
830 EVT MemVT = LD->getMemoryVT();
831 EVT RegVT = LD->getValueType(0);
832 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
833 unsigned Opcode;
834 switch (MemVT.getSimpleVT().SimpleTy) {
835 default:
836 return false;
837 case MVT::i8: {
838 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
839 break;
840 }
841 case MVT::i16: {
842 if (RegVT == MVT::i32)
843 Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
844 else
845 Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
846 break;
847 }
848 case MVT::i32: {
849 if (RegVT == MVT::i32)
850 Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
851 else
852 Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
853 break;
854 }
855 case MVT::i64: {
856 Opcode = PPC::LDXTLS;
857 break;
858 }
859 case MVT::f32: {
860 Opcode = PPC::LFSXTLS;
861 break;
862 }
863 case MVT::f64: {
864 Opcode = PPC::LFDXTLS;
865 break;
866 }
867 }
868 SDValue Chain = LD->getChain();
869 SDVTList VTs = LD->getVTList();
870 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
871 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
872 transferMemOperands(LD, MN);
873 ReplaceNode(LD, MN);
874 return true;
875}
876
877/// Turn an or of two masked values into the rotate left word immediate then
878/// mask insert (rlwimi) instruction.
879bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
880 SDValue Op0 = N->getOperand(0);
881 SDValue Op1 = N->getOperand(1);
882 SDLoc dl(N);
883
884 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
885 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
886
887 unsigned TargetMask = LKnown.Zero.getZExtValue();
888 unsigned InsertMask = RKnown.Zero.getZExtValue();
889
890 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
891 unsigned Op0Opc = Op0.getOpcode();
892 unsigned Op1Opc = Op1.getOpcode();
893 unsigned Value, SH = 0;
894 TargetMask = ~TargetMask;
895 InsertMask = ~InsertMask;
896
897 // If the LHS has a foldable shift and the RHS does not, then swap it to the
898 // RHS so that we can fold the shift into the insert.
899 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
900 if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
901 Op0.getOperand(0).getOpcode() == ISD::SRL) {
902 if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
903 Op1.getOperand(0).getOpcode() != ISD::SRL) {
904 std::swap(Op0, Op1);
905 std::swap(Op0Opc, Op1Opc);
906 std::swap(TargetMask, InsertMask);
907 }
908 }
909 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
910 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
911 Op1.getOperand(0).getOpcode() != ISD::SRL) {
912 std::swap(Op0, Op1);
913 std::swap(Op0Opc, Op1Opc);
914 std::swap(TargetMask, InsertMask);
915 }
916 }
917
918 unsigned MB, ME;
919 if (isRunOfOnes(InsertMask, MB, ME)) {
920 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
922 Op1 = Op1.getOperand(0);
923 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
924 }
925 if (Op1Opc == ISD::AND) {
926 // The AND mask might not be a constant, and we need to make sure that
927 // if we're going to fold the masking with the insert, all bits not
928 // know to be zero in the mask are known to be one.
929 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
930 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
931
932 unsigned SHOpc = Op1.getOperand(0).getOpcode();
933 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
935 // Note that Value must be in range here (less than 32) because
936 // otherwise there would not be any bits set in InsertMask.
937 Op1 = Op1.getOperand(0).getOperand(0);
938 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
939 }
940 }
941
942 SH &= 31;
943 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
944 getI32Imm(ME, dl) };
945 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
946 return true;
947 }
948 }
949 return false;
950}
951
952static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
953 unsigned MaxTruncation = 0;
954 // Cannot use range-based for loop here as we need the actual use (i.e. we
955 // need the operand number corresponding to the use). A range-based for
956 // will unbox the use and provide an SDNode*.
957 for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
958 Use != UseEnd; ++Use) {
959 unsigned Opc =
960 Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
961 switch (Opc) {
962 default: return 0;
963 case ISD::TRUNCATE:
964 if (Use->isMachineOpcode())
965 return 0;
966 MaxTruncation =
967 std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
968 continue;
969 case ISD::STORE: {
970 if (Use->isMachineOpcode())
971 return 0;
972 StoreSDNode *STN = cast<StoreSDNode>(*Use);
973 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
974 if (MemVTSize == 64 || Use.getOperandNo() != 0)
975 return 0;
976 MaxTruncation = std::max(MaxTruncation, MemVTSize);
977 continue;
978 }
979 case PPC::STW8:
980 case PPC::STWX8:
981 case PPC::STWU8:
982 case PPC::STWUX8:
983 if (Use.getOperandNo() != 0)
984 return 0;
985 MaxTruncation = std::max(MaxTruncation, 32u);
986 continue;
987 case PPC::STH8:
988 case PPC::STHX8:
989 case PPC::STHU8:
990 case PPC::STHUX8:
991 if (Use.getOperandNo() != 0)
992 return 0;
993 MaxTruncation = std::max(MaxTruncation, 16u);
994 continue;
995 case PPC::STB8:
996 case PPC::STBX8:
997 case PPC::STBU8:
998 case PPC::STBUX8:
999 if (Use.getOperandNo() != 0)
1000 return 0;
1001 MaxTruncation = std::max(MaxTruncation, 8u);
1002 continue;
1003 }
1004 }
1005 return MaxTruncation;
1006}
1007
1008// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1009// zeros and return the number of bits by the left of these consecutive zeros.
1010static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1011 unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
1012 unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
1013 if ((HiTZ + LoLZ) >= Num)
1014 return (32 + HiTZ);
1015 return 0;
1016}
1017
1018// Direct materialization of 64-bit constants by enumerated patterns.
1019static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
1020 uint64_t Imm, unsigned &InstCnt) {
1021 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1022 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1023 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1024 unsigned LO = llvm::countl_one<uint64_t>(Imm);
1025 unsigned Hi32 = Hi_32(Imm);
1026 unsigned Lo32 = Lo_32(Imm);
1027 SDNode *Result = nullptr;
1028 unsigned Shift = 0;
1029
1030 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1031 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1032 };
1033
1034 // Following patterns use 1 instructions to materialize the Imm.
1035 InstCnt = 1;
1036 // 1-1) Patterns : {zeros}{15-bit valve}
1037 // {ones}{15-bit valve}
1038 if (isInt<16>(Imm)) {
1039 SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1040 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1041 }
1042 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1043 // {ones}{15-bit valve}{16 zeros}
1044 if (TZ > 15 && (LZ > 32 || LO > 32))
1045 return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1046 getI32Imm((Imm >> 16) & 0xffff));
1047
1048 // Following patterns use 2 instructions to materialize the Imm.
1049 InstCnt = 2;
1050 assert(LZ < 64 && "Unexpected leading zeros here.");
1051 // Count of ones follwing the leading zeros.
1052 unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
1053 // 2-1) Patterns : {zeros}{31-bit value}
1054 // {ones}{31-bit value}
1055 if (isInt<32>(Imm)) {
1056 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1057 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1058 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1059 return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1060 getI32Imm(Imm & 0xffff));
1061 }
1062 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1063 // {zeros}{15-bit value}{zeros}
1064 // {zeros}{ones}{15-bit value}
1065 // {ones}{15-bit value}{zeros}
1066 // We can take advantage of LI's sign-extension semantics to generate leading
1067 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
1068 if ((LZ + FO + TZ) > 48) {
1069 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1070 getI32Imm((Imm >> TZ) & 0xffff));
1071 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1072 getI32Imm(TZ), getI32Imm(LZ));
1073 }
1074 // 2-3) Pattern : {zeros}{15-bit value}{ones}
1075 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1076 // therefore we can take advantage of LI's sign-extension semantics, and then
1077 // mask them off after rotation.
1078 //
1079 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
1080 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1081 // +------------------------+ +------------------------+
1082 // 63 0 63 0
1083 // Imm (Imm >> (48 - LZ) & 0xffff)
1084 // +----sext-----|--16-bit--+ +clear-|-----------------+
1085 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1086 // +------------------------+ +------------------------+
1087 // 63 0 63 0
1088 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1089 if ((LZ + TO) > 48) {
1090 // Since the immediates with (LZ > 32) have been handled by previous
1091 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1092 // the Imm by a negative value.
1093 assert(LZ <= 32 && "Unexpected shift value.");
1094 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1095 getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1096 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1097 getI32Imm(48 - LZ), getI32Imm(LZ));
1098 }
1099 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1100 // {ones}{15-bit value}{ones}
1101 // We can take advantage of LI's sign-extension semantics to generate leading
1102 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1103 // after rotation.
1104 //
1105 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1106 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1107 // +------------------------+ +------------------------+
1108 // 63 0 63 0
1109 // Imm (Imm >> TO) & 0xffff
1110 // +----sext-----|--16-bit--+ +LZ|---------------------+
1111 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1112 // +------------------------+ +------------------------+
1113 // 63 0 63 0
1114 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1115 if ((LZ + FO + TO) > 48) {
1116 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1117 getI32Imm((Imm >> TO) & 0xffff));
1118 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1119 getI32Imm(TO), getI32Imm(LZ));
1120 }
1121 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1122 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1123 // value, we can use LI for Lo16 without generating leading ones then add the
1124 // Hi16(in Lo32).
1125 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1126 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1127 getI32Imm(Lo32 & 0xffff));
1128 return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1129 getI32Imm(Lo32 >> 16));
1130 }
1131 // 2-6) Patterns : {******}{49 zeros}{******}
1132 // {******}{49 ones}{******}
1133 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1134 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1135 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1136 // it back.
1137 //
1138 // 1) findContiguousZerosAtLeast(Imm, 49)
1139 // +------|--zeros-|------+ +---ones--||---15 bit--+
1140 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1141 // +----------------------+ +----------------------+
1142 // 63 0 63 0
1143 //
1144 // 2) findContiguousZerosAtLeast(~Imm, 49)
1145 // +------|--ones--|------+ +---ones--||---15 bit--+
1146 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1147 // +----------------------+ +----------------------+
1148 // 63 0 63 0
1149 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1150 (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1151 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1152 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1153 getI32Imm(RotImm & 0xffff));
1154 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1155 getI32Imm(Shift), getI32Imm(0));
1156 }
1157 // 2-7) Patterns : High word == Low word
1158 // This may require 2 to 3 instructions, depending on whether Lo32 can be
1159 // materialized in 1 instruction.
1160 if (Hi32 == Lo32) {
1161 // Handle the first 32 bits.
1162 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1163 uint64_t ImmLo16 = Lo32 & 0xffff;
1164 if (isInt<16>(Lo32))
1165 Result =
1166 CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1167 else if (!ImmLo16)
1168 Result =
1169 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1170 else {
1171 InstCnt = 3;
1172 Result =
1173 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1174 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1175 SDValue(Result, 0), getI32Imm(ImmLo16));
1176 }
1177 // Use rldimi to insert the Low word into High word.
1178 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1179 getI32Imm(0)};
1180 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1181 }
1182
1183 // Following patterns use 3 instructions to materialize the Imm.
1184 InstCnt = 3;
1185 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1186 // {zeros}{31-bit value}{zeros}
1187 // {zeros}{ones}{31-bit value}
1188 // {ones}{31-bit value}{zeros}
1189 // We can take advantage of LIS's sign-extension semantics to generate leading
1190 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1191 // ones in both sides after rotation.
1192 if ((LZ + FO + TZ) > 32) {
1193 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1194 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1195 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1196 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1197 getI32Imm((Imm >> TZ) & 0xffff));
1198 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1199 getI32Imm(TZ), getI32Imm(LZ));
1200 }
1201 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1202 // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1203 // value, therefore we can take advantage of LIS's sign-extension semantics,
1204 // add the remaining bits with ORI, and then mask them off after rotation.
1205 // This is similar to Pattern 2-3, please refer to the diagram there.
1206 if ((LZ + TO) > 32) {
1207 // Since the immediates with (LZ > 32) have been handled by previous
1208 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1209 // the Imm by a negative value.
1210 assert(LZ <= 32 && "Unexpected shift value.");
1211 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1212 getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1213 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1214 getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1215 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1216 getI32Imm(32 - LZ), getI32Imm(LZ));
1217 }
1218 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1219 // {ones}{31-bit value}{ones}
1220 // We can take advantage of LIS's sign-extension semantics to generate leading
1221 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1222 // ones in left sides (if required) after rotation.
1223 // This is similar to Pattern 2-4, please refer to the diagram there.
1224 if ((LZ + FO + TO) > 32) {
1225 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1226 getI32Imm((Imm >> (TO + 16)) & 0xffff));
1227 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1228 getI32Imm((Imm >> TO) & 0xffff));
1229 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1230 getI32Imm(TO), getI32Imm(LZ));
1231 }
1232 // 3-4) Patterns : {******}{33 zeros}{******}
1233 // {******}{33 ones}{******}
1234 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1235 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1236 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1237 // rotate it back.
1238 // This is similar to Pattern 2-6, please refer to the diagram there.
1239 if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1240 (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1241 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1242 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1243 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1244 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1245 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1246 getI32Imm(RotImm & 0xffff));
1247 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1248 getI32Imm(Shift), getI32Imm(0));
1249 }
1250
1251 InstCnt = 0;
1252 return nullptr;
1253}
1254
1255// Try to select instructions to generate a 64 bit immediate using prefix as
1256// well as non prefix instructions. The function will return the SDNode
1257// to materialize that constant or it will return nullptr if it does not
1258// find one. The variable InstCnt is set to the number of instructions that
1259// were selected.
1261 uint64_t Imm, unsigned &InstCnt) {
1262 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1263 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1264 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1265 unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1266 unsigned Hi32 = Hi_32(Imm);
1267 unsigned Lo32 = Lo_32(Imm);
1268
1269 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1270 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1271 };
1272
1273 auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1274 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1275 };
1276
1277 // Following patterns use 1 instruction to materialize Imm.
1278 InstCnt = 1;
1279
1280 // The pli instruction can materialize up to 34 bits directly.
1281 // If a constant fits within 34-bits, emit the pli instruction here directly.
1282 if (isInt<34>(Imm))
1283 return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1284 CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1285
1286 // Require at least two instructions.
1287 InstCnt = 2;
1288 SDNode *Result = nullptr;
1289 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1290 // {zeros}{33-bit value}{zeros}
1291 // {zeros}{ones}{33-bit value}
1292 // {ones}{33-bit value}{zeros}
1293 // We can take advantage of PLI's sign-extension semantics to generate leading
1294 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1295 if ((LZ + FO + TZ) > 30) {
1296 APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1297 APInt Extended = SignedInt34.sext(64);
1298 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1299 getI64Imm(Extended.getZExtValue()));
1300 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1301 getI32Imm(TZ), getI32Imm(LZ));
1302 }
1303 // Pattern : {zeros}{33-bit value}{ones}
1304 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1305 // therefore we can take advantage of PLI's sign-extension semantics, and then
1306 // mask them off after rotation.
1307 //
1308 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1309 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1310 // +------------------------+ +------------------------+
1311 // 63 0 63 0
1312 //
1313 // +----sext-----|--34-bit--+ +clear-|-----------------+
1314 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1315 // +------------------------+ +------------------------+
1316 // 63 0 63 0
1317 if ((LZ + TO) > 30) {
1318 APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1319 APInt Extended = SignedInt34.sext(64);
1320 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1321 getI64Imm(Extended.getZExtValue()));
1322 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1323 getI32Imm(30 - LZ), getI32Imm(LZ));
1324 }
1325 // Patterns : {zeros}{ones}{33-bit value}{ones}
1326 // {ones}{33-bit value}{ones}
1327 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1328 // generate leading ones, and then use RLDICL to mask off the ones in left
1329 // sides (if required) after rotation.
1330 if ((LZ + FO + TO) > 30) {
1331 APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1332 APInt Extended = SignedInt34.sext(64);
1333 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1334 getI64Imm(Extended.getZExtValue()));
1335 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1336 getI32Imm(TO), getI32Imm(LZ));
1337 }
1338 // Patterns : {******}{31 zeros}{******}
1339 // : {******}{31 ones}{******}
1340 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1341 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1342 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1343 //
1344 // +------|--ones--|------+ +---ones--||---33 bit--+
1345 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1346 // +----------------------+ +----------------------+
1347 // 63 0 63 0
1348 for (unsigned Shift = 0; Shift < 63; ++Shift) {
1349 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1350 if (isInt<34>(RotImm)) {
1351 Result =
1352 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1353 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1354 SDValue(Result, 0), getI32Imm(Shift),
1355 getI32Imm(0));
1356 }
1357 }
1358
1359 // Patterns : High word == Low word
1360 // This is basically a splat of a 32 bit immediate.
1361 if (Hi32 == Lo32) {
1362 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1363 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1364 getI32Imm(0)};
1365 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1366 }
1367
1368 InstCnt = 3;
1369 // Catch-all
1370 // This pattern can form any 64 bit immediate in 3 instructions.
1371 SDNode *ResultHi =
1372 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1373 SDNode *ResultLo =
1374 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1375 SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1376 getI32Imm(0)};
1377 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1378}
1379
1380static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1381 unsigned *InstCnt = nullptr) {
1382 unsigned InstCntDirect = 0;
1383 // No more than 3 instructions are used if we can select the i64 immediate
1384 // directly.
1385 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1386
1387 const PPCSubtarget &Subtarget =
1389
1390 // If we have prefixed instructions and there is a chance we can
1391 // materialize the constant with fewer prefixed instructions than
1392 // non-prefixed, try that.
1393 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1394 unsigned InstCntDirectP = 0;
1395 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1396 // Use the prefix case in either of two cases:
1397 // 1) We have no result from the non-prefix case to use.
1398 // 2) The non-prefix case uses more instructions than the prefix case.
1399 // If the prefix and non-prefix cases use the same number of instructions
1400 // we will prefer the non-prefix case.
1401 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1402 if (InstCnt)
1403 *InstCnt = InstCntDirectP;
1404 return ResultP;
1405 }
1406 }
1407
1408 if (Result) {
1409 if (InstCnt)
1410 *InstCnt = InstCntDirect;
1411 return Result;
1412 }
1413 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1414 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1415 };
1416
1417 uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
1418 uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
1419
1420 // Try to use 4 instructions to materialize the immediate which is "almost" a
1421 // splat of a 32 bit immediate.
1422 if (Hi16OfLo32 && Lo16OfLo32) {
1423 uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
1424 uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
1425 bool IsSelected = false;
1426
1427 auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1428 SDNode *Result =
1429 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1430 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1431 SDValue(Result, 0), getI32Imm(Lo16));
1432 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1433 getI32Imm(0)};
1434 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1435 };
1436
1437 if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1438 IsSelected = true;
1439 Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1440 // Modify Hi16OfHi32.
1441 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
1442 getI32Imm(0)};
1443 Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1444 } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1445 IsSelected = true;
1446 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1447 // Modify Lo16OfLo32.
1448 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1449 getI32Imm(16), getI32Imm(31)};
1450 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1451 } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1452 IsSelected = true;
1453 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1454 // Modify Hi16OfLo32.
1455 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1456 getI32Imm(0), getI32Imm(15)};
1457 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1458 }
1459 if (IsSelected == true) {
1460 if (InstCnt)
1461 *InstCnt = 4;
1462 return Result;
1463 }
1464 }
1465
1466 // Handle the upper 32 bit value.
1467 Result =
1468 selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1469 // Add in the last bits as required.
1470 if (Hi16OfLo32) {
1471 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1472 SDValue(Result, 0), getI32Imm(Hi16OfLo32));
1473 ++InstCntDirect;
1474 }
1475 if (Lo16OfLo32) {
1476 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1477 getI32Imm(Lo16OfLo32));
1478 ++InstCntDirect;
1479 }
1480 if (InstCnt)
1481 *InstCnt = InstCntDirect;
1482 return Result;
1483}
1484
1485// Select a 64-bit constant.
1487 SDLoc dl(N);
1488
1489 // Get 64 bit value.
1490 int64_t Imm = N->getAsZExtVal();
1491 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1492 uint64_t SextImm = SignExtend64(Imm, MinSize);
1493 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1494 if (isInt<16>(SextImm))
1495 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1496 }
1497 return selectI64Imm(CurDAG, dl, Imm);
1498}
1499
1500namespace {
1501
1502class BitPermutationSelector {
1503 struct ValueBit {
1504 SDValue V;
1505
1506 // The bit number in the value, using a convention where bit 0 is the
1507 // lowest-order bit.
1508 unsigned Idx;
1509
1510 // ConstZero means a bit we need to mask off.
1511 // Variable is a bit comes from an input variable.
1512 // VariableKnownToBeZero is also a bit comes from an input variable,
1513 // but it is known to be already zero. So we do not need to mask them.
1514 enum Kind {
1515 ConstZero,
1516 Variable,
1517 VariableKnownToBeZero
1518 } K;
1519
1520 ValueBit(SDValue V, unsigned I, Kind K = Variable)
1521 : V(V), Idx(I), K(K) {}
1522 ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1523
1524 bool isZero() const {
1525 return K == ConstZero || K == VariableKnownToBeZero;
1526 }
1527
1528 bool hasValue() const {
1529 return K == Variable || K == VariableKnownToBeZero;
1530 }
1531
1532 SDValue getValue() const {
1533 assert(hasValue() && "Cannot get the value of a constant bit");
1534 return V;
1535 }
1536
1537 unsigned getValueBitIndex() const {
1538 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1539 return Idx;
1540 }
1541 };
1542
1543 // A bit group has the same underlying value and the same rotate factor.
1544 struct BitGroup {
1545 SDValue V;
1546 unsigned RLAmt;
1547 unsigned StartIdx, EndIdx;
1548
1549 // This rotation amount assumes that the lower 32 bits of the quantity are
1550 // replicated in the high 32 bits by the rotation operator (which is done
1551 // by rlwinm and friends in 64-bit mode).
1552 bool Repl32;
1553 // Did converting to Repl32 == true change the rotation factor? If it did,
1554 // it decreased it by 32.
1555 bool Repl32CR;
1556 // Was this group coalesced after setting Repl32 to true?
1557 bool Repl32Coalesced;
1558
1559 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1560 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1561 Repl32Coalesced(false) {
1562 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1563 << " [" << S << ", " << E << "]\n");
1564 }
1565 };
1566
1567 // Information on each (Value, RLAmt) pair (like the number of groups
1568 // associated with each) used to choose the lowering method.
1569 struct ValueRotInfo {
1570 SDValue V;
1571 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1572 unsigned NumGroups = 0;
1573 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1574 bool Repl32 = false;
1575
1576 ValueRotInfo() = default;
1577
1578 // For sorting (in reverse order) by NumGroups, and then by
1579 // FirstGroupStartIdx.
1580 bool operator < (const ValueRotInfo &Other) const {
1581 // We need to sort so that the non-Repl32 come first because, when we're
1582 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1583 // masking operation.
1584 if (Repl32 < Other.Repl32)
1585 return true;
1586 else if (Repl32 > Other.Repl32)
1587 return false;
1588 else if (NumGroups > Other.NumGroups)
1589 return true;
1590 else if (NumGroups < Other.NumGroups)
1591 return false;
1592 else if (RLAmt == 0 && Other.RLAmt != 0)
1593 return true;
1594 else if (RLAmt != 0 && Other.RLAmt == 0)
1595 return false;
1596 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1597 return true;
1598 return false;
1599 }
1600 };
1601
1602 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1603 using ValueBitsMemoizer =
1605 ValueBitsMemoizer Memoizer;
1606
1607 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1608 // The bool is true if something interesting was deduced, otherwise if we're
1609 // providing only a generic representation of V (or something else likewise
1610 // uninteresting for instruction selection) through the SmallVector.
1611 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1612 unsigned NumBits) {
1613 auto &ValueEntry = Memoizer[V];
1614 if (ValueEntry)
1615 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1616 ValueEntry.reset(new ValueBitsMemoizedValue());
1617 bool &Interesting = ValueEntry->first;
1618 SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1619 Bits.resize(NumBits);
1620
1621 switch (V.getOpcode()) {
1622 default: break;
1623 case ISD::ROTL:
1624 if (isa<ConstantSDNode>(V.getOperand(1))) {
1625 assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1626 unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
1627
1628 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1629
1630 for (unsigned i = 0; i < NumBits; ++i)
1631 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1632
1633 return std::make_pair(Interesting = true, &Bits);
1634 }
1635 break;
1636 case ISD::SHL:
1637 case PPCISD::SHL:
1638 if (isa<ConstantSDNode>(V.getOperand(1))) {
1639 // sld takes 7 bits, slw takes 6.
1640 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1641
1642 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1643
1644 if (ShiftAmt >= NumBits) {
1645 for (unsigned i = 0; i < NumBits; ++i)
1646 Bits[i] = ValueBit(ValueBit::ConstZero);
1647 } else {
1648 for (unsigned i = ShiftAmt; i < NumBits; ++i)
1649 Bits[i] = LHSBits[i - ShiftAmt];
1650 for (unsigned i = 0; i < ShiftAmt; ++i)
1651 Bits[i] = ValueBit(ValueBit::ConstZero);
1652 }
1653
1654 return std::make_pair(Interesting = true, &Bits);
1655 }
1656 break;
1657 case ISD::SRL:
1658 case PPCISD::SRL:
1659 if (isa<ConstantSDNode>(V.getOperand(1))) {
1660 // srd takes lowest 7 bits, srw takes 6.
1661 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1662
1663 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1664
1665 if (ShiftAmt >= NumBits) {
1666 for (unsigned i = 0; i < NumBits; ++i)
1667 Bits[i] = ValueBit(ValueBit::ConstZero);
1668 } else {
1669 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1670 Bits[i] = LHSBits[i + ShiftAmt];
1671 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1672 Bits[i] = ValueBit(ValueBit::ConstZero);
1673 }
1674
1675 return std::make_pair(Interesting = true, &Bits);
1676 }
1677 break;
1678 case ISD::AND:
1679 if (isa<ConstantSDNode>(V.getOperand(1))) {
1680 uint64_t Mask = V.getConstantOperandVal(1);
1681
1682 const SmallVector<ValueBit, 64> *LHSBits;
1683 // Mark this as interesting, only if the LHS was also interesting. This
1684 // prevents the overall procedure from matching a single immediate 'and'
1685 // (which is non-optimal because such an and might be folded with other
1686 // things if we don't select it here).
1687 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1688
1689 for (unsigned i = 0; i < NumBits; ++i)
1690 if (((Mask >> i) & 1) == 1)
1691 Bits[i] = (*LHSBits)[i];
1692 else {
1693 // AND instruction masks this bit. If the input is already zero,
1694 // we have nothing to do here. Otherwise, make the bit ConstZero.
1695 if ((*LHSBits)[i].isZero())
1696 Bits[i] = (*LHSBits)[i];
1697 else
1698 Bits[i] = ValueBit(ValueBit::ConstZero);
1699 }
1700
1701 return std::make_pair(Interesting, &Bits);
1702 }
1703 break;
1704 case ISD::OR: {
1705 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1706 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1707
1708 bool AllDisjoint = true;
1709 SDValue LastVal = SDValue();
1710 unsigned LastIdx = 0;
1711 for (unsigned i = 0; i < NumBits; ++i) {
1712 if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1713 // If both inputs are known to be zero and one is ConstZero and
1714 // another is VariableKnownToBeZero, we can select whichever
1715 // we like. To minimize the number of bit groups, we select
1716 // VariableKnownToBeZero if this bit is the next bit of the same
1717 // input variable from the previous bit. Otherwise, we select
1718 // ConstZero.
1719 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1720 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1721 Bits[i] = LHSBits[i];
1722 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1723 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1724 Bits[i] = RHSBits[i];
1725 else
1726 Bits[i] = ValueBit(ValueBit::ConstZero);
1727 }
1728 else if (LHSBits[i].isZero())
1729 Bits[i] = RHSBits[i];
1730 else if (RHSBits[i].isZero())
1731 Bits[i] = LHSBits[i];
1732 else {
1733 AllDisjoint = false;
1734 break;
1735 }
1736 // We remember the value and bit index of this bit.
1737 if (Bits[i].hasValue()) {
1738 LastVal = Bits[i].getValue();
1739 LastIdx = Bits[i].getValueBitIndex();
1740 }
1741 else {
1742 if (LastVal) LastVal = SDValue();
1743 LastIdx = 0;
1744 }
1745 }
1746
1747 if (!AllDisjoint)
1748 break;
1749
1750 return std::make_pair(Interesting = true, &Bits);
1751 }
1752 case ISD::ZERO_EXTEND: {
1753 // We support only the case with zero extension from i32 to i64 so far.
1754 if (V.getValueType() != MVT::i64 ||
1755 V.getOperand(0).getValueType() != MVT::i32)
1756 break;
1757
1758 const SmallVector<ValueBit, 64> *LHSBits;
1759 const unsigned NumOperandBits = 32;
1760 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1761 NumOperandBits);
1762
1763 for (unsigned i = 0; i < NumOperandBits; ++i)
1764 Bits[i] = (*LHSBits)[i];
1765
1766 for (unsigned i = NumOperandBits; i < NumBits; ++i)
1767 Bits[i] = ValueBit(ValueBit::ConstZero);
1768
1769 return std::make_pair(Interesting, &Bits);
1770 }
1771 case ISD::TRUNCATE: {
1772 EVT FromType = V.getOperand(0).getValueType();
1773 EVT ToType = V.getValueType();
1774 // We support only the case with truncate from i64 to i32.
1775 if (FromType != MVT::i64 || ToType != MVT::i32)
1776 break;
1777 const unsigned NumAllBits = FromType.getSizeInBits();
1779 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1780 NumAllBits);
1781 const unsigned NumValidBits = ToType.getSizeInBits();
1782
1783 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1784 // So, we cannot include this truncate.
1785 bool UseUpper32bit = false;
1786 for (unsigned i = 0; i < NumValidBits; ++i)
1787 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1788 UseUpper32bit = true;
1789 break;
1790 }
1791 if (UseUpper32bit)
1792 break;
1793
1794 for (unsigned i = 0; i < NumValidBits; ++i)
1795 Bits[i] = (*InBits)[i];
1796
1797 return std::make_pair(Interesting, &Bits);
1798 }
1799 case ISD::AssertZext: {
1800 // For AssertZext, we look through the operand and
1801 // mark the bits known to be zero.
1802 const SmallVector<ValueBit, 64> *LHSBits;
1803 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1804 NumBits);
1805
1806 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1807 const unsigned NumValidBits = FromType.getSizeInBits();
1808 for (unsigned i = 0; i < NumValidBits; ++i)
1809 Bits[i] = (*LHSBits)[i];
1810
1811 // These bits are known to be zero but the AssertZext may be from a value
1812 // that already has some constant zero bits (i.e. from a masking and).
1813 for (unsigned i = NumValidBits; i < NumBits; ++i)
1814 Bits[i] = (*LHSBits)[i].hasValue()
1815 ? ValueBit((*LHSBits)[i].getValue(),
1816 (*LHSBits)[i].getValueBitIndex(),
1817 ValueBit::VariableKnownToBeZero)
1818 : ValueBit(ValueBit::ConstZero);
1819
1820 return std::make_pair(Interesting, &Bits);
1821 }
1822 case ISD::LOAD:
1823 LoadSDNode *LD = cast<LoadSDNode>(V);
1824 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1825 EVT VT = LD->getMemoryVT();
1826 const unsigned NumValidBits = VT.getSizeInBits();
1827
1828 for (unsigned i = 0; i < NumValidBits; ++i)
1829 Bits[i] = ValueBit(V, i);
1830
1831 // These bits are known to be zero.
1832 for (unsigned i = NumValidBits; i < NumBits; ++i)
1833 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1834
1835 // Zero-extending load itself cannot be optimized. So, it is not
1836 // interesting by itself though it gives useful information.
1837 return std::make_pair(Interesting = false, &Bits);
1838 }
1839 break;
1840 }
1841
1842 for (unsigned i = 0; i < NumBits; ++i)
1843 Bits[i] = ValueBit(V, i);
1844
1845 return std::make_pair(Interesting = false, &Bits);
1846 }
1847
1848 // For each value (except the constant ones), compute the left-rotate amount
1849 // to get it from its original to final position.
1850 void computeRotationAmounts() {
1851 NeedMask = false;
1852 RLAmt.resize(Bits.size());
1853 for (unsigned i = 0; i < Bits.size(); ++i)
1854 if (Bits[i].hasValue()) {
1855 unsigned VBI = Bits[i].getValueBitIndex();
1856 if (i >= VBI)
1857 RLAmt[i] = i - VBI;
1858 else
1859 RLAmt[i] = Bits.size() - (VBI - i);
1860 } else if (Bits[i].isZero()) {
1861 NeedMask = true;
1862 RLAmt[i] = UINT32_MAX;
1863 } else {
1864 llvm_unreachable("Unknown value bit type");
1865 }
1866 }
1867
1868 // Collect groups of consecutive bits with the same underlying value and
1869 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1870 // they break up groups.
1871 void collectBitGroups(bool LateMask) {
1872 BitGroups.clear();
1873
1874 unsigned LastRLAmt = RLAmt[0];
1875 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1876 unsigned LastGroupStartIdx = 0;
1877 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1878 for (unsigned i = 1; i < Bits.size(); ++i) {
1879 unsigned ThisRLAmt = RLAmt[i];
1880 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1881 if (LateMask && !ThisValue) {
1882 ThisValue = LastValue;
1883 ThisRLAmt = LastRLAmt;
1884 // If we're doing late masking, then the first bit group always starts
1885 // at zero (even if the first bits were zero).
1886 if (BitGroups.empty())
1887 LastGroupStartIdx = 0;
1888 }
1889
1890 // If this bit is known to be zero and the current group is a bit group
1891 // of zeros, we do not need to terminate the current bit group even the
1892 // Value or RLAmt does not match here. Instead, we terminate this group
1893 // when the first non-zero bit appears later.
1894 if (IsGroupOfZeros && Bits[i].isZero())
1895 continue;
1896
1897 // If this bit has the same underlying value and the same rotate factor as
1898 // the last one, then they're part of the same group.
1899 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1900 // We cannot continue the current group if this bits is not known to
1901 // be zero in a bit group of zeros.
1902 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1903 continue;
1904
1905 if (LastValue.getNode())
1906 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1907 i-1));
1908 LastRLAmt = ThisRLAmt;
1909 LastValue = ThisValue;
1910 LastGroupStartIdx = i;
1911 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1912 }
1913 if (LastValue.getNode())
1914 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1915 Bits.size()-1));
1916
1917 if (BitGroups.empty())
1918 return;
1919
1920 // We might be able to combine the first and last groups.
1921 if (BitGroups.size() > 1) {
1922 // If the first and last groups are the same, then remove the first group
1923 // in favor of the last group, making the ending index of the last group
1924 // equal to the ending index of the to-be-removed first group.
1925 if (BitGroups[0].StartIdx == 0 &&
1926 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1927 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1928 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1929 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1930 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1931 BitGroups.erase(BitGroups.begin());
1932 }
1933 }
1934 }
1935
1936 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1937 // associated with each. If the number of groups are same, we prefer a group
1938 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1939 // instruction. If there is a degeneracy, pick the one that occurs
1940 // first (in the final value).
1941 void collectValueRotInfo() {
1942 ValueRots.clear();
1943
1944 for (auto &BG : BitGroups) {
1945 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1946 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1947 VRI.V = BG.V;
1948 VRI.RLAmt = BG.RLAmt;
1949 VRI.Repl32 = BG.Repl32;
1950 VRI.NumGroups += 1;
1951 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1952 }
1953
1954 // Now that we've collected the various ValueRotInfo instances, we need to
1955 // sort them.
1956 ValueRotsVec.clear();
1957 for (auto &I : ValueRots) {
1958 ValueRotsVec.push_back(I.second);
1959 }
1960 llvm::sort(ValueRotsVec);
1961 }
1962
1963 // In 64-bit mode, rlwinm and friends have a rotation operator that
1964 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1965 // indices of these instructions can only be in the lower 32 bits, so they
1966 // can only represent some 64-bit bit groups. However, when they can be used,
1967 // the 32-bit replication can be used to represent, as a single bit group,
1968 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1969 // groups when possible. Returns true if any of the bit groups were
1970 // converted.
1971 void assignRepl32BitGroups() {
1972 // If we have bits like this:
1973 //
1974 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1975 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1976 // Groups: | RLAmt = 8 | RLAmt = 40 |
1977 //
1978 // But, making use of a 32-bit operation that replicates the low-order 32
1979 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1980 // of 8.
1981
1982 auto IsAllLow32 = [this](BitGroup & BG) {
1983 if (BG.StartIdx <= BG.EndIdx) {
1984 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1985 if (!Bits[i].hasValue())
1986 continue;
1987 if (Bits[i].getValueBitIndex() >= 32)
1988 return false;
1989 }
1990 } else {
1991 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1992 if (!Bits[i].hasValue())
1993 continue;
1994 if (Bits[i].getValueBitIndex() >= 32)
1995 return false;
1996 }
1997 for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1998 if (!Bits[i].hasValue())
1999 continue;
2000 if (Bits[i].getValueBitIndex() >= 32)
2001 return false;
2002 }
2003 }
2004
2005 return true;
2006 };
2007
2008 for (auto &BG : BitGroups) {
2009 // If this bit group has RLAmt of 0 and will not be merged with
2010 // another bit group, we don't benefit from Repl32. We don't mark
2011 // such group to give more freedom for later instruction selection.
2012 if (BG.RLAmt == 0) {
2013 auto PotentiallyMerged = [this](BitGroup & BG) {
2014 for (auto &BG2 : BitGroups)
2015 if (&BG != &BG2 && BG.V == BG2.V &&
2016 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
2017 return true;
2018 return false;
2019 };
2020 if (!PotentiallyMerged(BG))
2021 continue;
2022 }
2023 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2024 if (IsAllLow32(BG)) {
2025 if (BG.RLAmt >= 32) {
2026 BG.RLAmt -= 32;
2027 BG.Repl32CR = true;
2028 }
2029
2030 BG.Repl32 = true;
2031
2032 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2033 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2034 << BG.StartIdx << ", " << BG.EndIdx << "]\n");
2035 }
2036 }
2037 }
2038
2039 // Now walk through the bit groups, consolidating where possible.
2040 for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2041 // We might want to remove this bit group by merging it with the previous
2042 // group (which might be the ending group).
2043 auto IP = (I == BitGroups.begin()) ?
2044 std::prev(BitGroups.end()) : std::prev(I);
2045 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2046 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
2047
2048 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2049 << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2050 << I->StartIdx << ", " << I->EndIdx
2051 << "] with group with range [" << IP->StartIdx << ", "
2052 << IP->EndIdx << "]\n");
2053
2054 IP->EndIdx = I->EndIdx;
2055 IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
2056 IP->Repl32Coalesced = true;
2057 I = BitGroups.erase(I);
2058 continue;
2059 } else {
2060 // There is a special case worth handling: If there is a single group
2061 // covering the entire upper 32 bits, and it can be merged with both
2062 // the next and previous groups (which might be the same group), then
2063 // do so. If it is the same group (so there will be only one group in
2064 // total), then we need to reverse the order of the range so that it
2065 // covers the entire 64 bits.
2066 if (I->StartIdx == 32 && I->EndIdx == 63) {
2067 assert(std::next(I) == BitGroups.end() &&
2068 "bit group ends at index 63 but there is another?");
2069 auto IN = BitGroups.begin();
2070
2071 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2072 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
2073 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
2074 IsAllLow32(*I)) {
2075
2076 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2077 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2078 << ", " << I->EndIdx
2079 << "] with 32-bit replicated groups with ranges ["
2080 << IP->StartIdx << ", " << IP->EndIdx << "] and ["
2081 << IN->StartIdx << ", " << IN->EndIdx << "]\n");
2082
2083 if (IP == IN) {
2084 // There is only one other group; change it to cover the whole
2085 // range (backward, so that it can still be Repl32 but cover the
2086 // whole 64-bit range).
2087 IP->StartIdx = 31;
2088 IP->EndIdx = 30;
2089 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
2090 IP->Repl32Coalesced = true;
2091 I = BitGroups.erase(I);
2092 } else {
2093 // There are two separate groups, one before this group and one
2094 // after us (at the beginning). We're going to remove this group,
2095 // but also the group at the very beginning.
2096 IP->EndIdx = IN->EndIdx;
2097 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
2098 IP->Repl32Coalesced = true;
2099 I = BitGroups.erase(I);
2100 BitGroups.erase(BitGroups.begin());
2101 }
2102
2103 // This must be the last group in the vector (and we might have
2104 // just invalidated the iterator above), so break here.
2105 break;
2106 }
2107 }
2108 }
2109
2110 ++I;
2111 }
2112 }
2113
2114 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2115 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2116 }
2117
2118 uint64_t getZerosMask() {
2119 uint64_t Mask = 0;
2120 for (unsigned i = 0; i < Bits.size(); ++i) {
2121 if (Bits[i].hasValue())
2122 continue;
2123 Mask |= (UINT64_C(1) << i);
2124 }
2125
2126 return ~Mask;
2127 }
2128
2129 // This method extends an input value to 64 bit if input is 32-bit integer.
2130 // While selecting instructions in BitPermutationSelector in 64-bit mode,
2131 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2132 // In such case, we extend it to 64 bit to be consistent with other values.
2133 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2134 if (V.getValueSizeInBits() == 64)
2135 return V;
2136
2137 assert(V.getValueSizeInBits() == 32);
2138 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2139 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2140 MVT::i64), 0);
2141 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2142 MVT::i64, ImDef, V,
2143 SubRegIdx), 0);
2144 return ExtVal;
2145 }
2146
2147 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2148 if (V.getValueSizeInBits() == 32)
2149 return V;
2150
2151 assert(V.getValueSizeInBits() == 64);
2152 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2153 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2154 MVT::i32, V, SubRegIdx), 0);
2155 return SubVal;
2156 }
2157
2158 // Depending on the number of groups for a particular value, it might be
2159 // better to rotate, mask explicitly (using andi/andis), and then or the
2160 // result. Select this part of the result first.
2161 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2163 return;
2164
2165 for (ValueRotInfo &VRI : ValueRotsVec) {
2166 unsigned Mask = 0;
2167 for (unsigned i = 0; i < Bits.size(); ++i) {
2168 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2169 continue;
2170 if (RLAmt[i] != VRI.RLAmt)
2171 continue;
2172 Mask |= (1u << i);
2173 }
2174
2175 // Compute the masks for andi/andis that would be necessary.
2176 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2177 assert((ANDIMask != 0 || ANDISMask != 0) &&
2178 "No set bits in mask for value bit groups");
2179 bool NeedsRotate = VRI.RLAmt != 0;
2180
2181 // We're trying to minimize the number of instructions. If we have one
2182 // group, using one of andi/andis can break even. If we have three
2183 // groups, we can use both andi and andis and break even (to use both
2184 // andi and andis we also need to or the results together). We need four
2185 // groups if we also need to rotate. To use andi/andis we need to do more
2186 // than break even because rotate-and-mask instructions tend to be easier
2187 // to schedule.
2188
2189 // FIXME: We've biased here against using andi/andis, which is right for
2190 // POWER cores, but not optimal everywhere. For example, on the A2,
2191 // andi/andis have single-cycle latency whereas the rotate-and-mask
2192 // instructions take two cycles, and it would be better to bias toward
2193 // andi/andis in break-even cases.
2194
2195 unsigned NumAndInsts = (unsigned) NeedsRotate +
2196 (unsigned) (ANDIMask != 0) +
2197 (unsigned) (ANDISMask != 0) +
2198 (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2199 (unsigned) (bool) Res;
2200
2201 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2202 << " RL: " << VRI.RLAmt << ":"
2203 << "\n\t\t\tisel using masking: " << NumAndInsts
2204 << " using rotates: " << VRI.NumGroups << "\n");
2205
2206 if (NumAndInsts >= VRI.NumGroups)
2207 continue;
2208
2209 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2210
2211 if (InstCnt) *InstCnt += NumAndInsts;
2212
2213 SDValue VRot;
2214 if (VRI.RLAmt) {
2215 SDValue Ops[] =
2216 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2217 getI32Imm(0, dl), getI32Imm(31, dl) };
2218 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2219 Ops), 0);
2220 } else {
2221 VRot = TruncateToInt32(VRI.V, dl);
2222 }
2223
2224 SDValue ANDIVal, ANDISVal;
2225 if (ANDIMask != 0)
2226 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2227 VRot, getI32Imm(ANDIMask, dl)),
2228 0);
2229 if (ANDISMask != 0)
2230 ANDISVal =
2231 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2232 getI32Imm(ANDISMask, dl)),
2233 0);
2234
2235 SDValue TotalVal;
2236 if (!ANDIVal)
2237 TotalVal = ANDISVal;
2238 else if (!ANDISVal)
2239 TotalVal = ANDIVal;
2240 else
2241 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2242 ANDIVal, ANDISVal), 0);
2243
2244 if (!Res)
2245 Res = TotalVal;
2246 else
2247 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2248 Res, TotalVal), 0);
2249
2250 // Now, remove all groups with this underlying value and rotation
2251 // factor.
2252 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2253 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2254 });
2255 }
2256 }
2257
2258 // Instruction selection for the 32-bit case.
2259 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2260 SDLoc dl(N);
2261 SDValue Res;
2262
2263 if (InstCnt) *InstCnt = 0;
2264
2265 // Take care of cases that should use andi/andis first.
2266 SelectAndParts32(dl, Res, InstCnt);
2267
2268 // If we've not yet selected a 'starting' instruction, and we have no zeros
2269 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2270 // number of groups), and start with this rotated value.
2271 if ((!NeedMask || LateMask) && !Res) {
2272 ValueRotInfo &VRI = ValueRotsVec[0];
2273 if (VRI.RLAmt) {
2274 if (InstCnt) *InstCnt += 1;
2275 SDValue Ops[] =
2276 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2277 getI32Imm(0, dl), getI32Imm(31, dl) };
2278 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2279 0);
2280 } else {
2281 Res = TruncateToInt32(VRI.V, dl);
2282 }
2283
2284 // Now, remove all groups with this underlying value and rotation factor.
2285 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2286 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2287 });
2288 }
2289
2290 if (InstCnt) *InstCnt += BitGroups.size();
2291
2292 // Insert the other groups (one at a time).
2293 for (auto &BG : BitGroups) {
2294 if (!Res) {
2295 SDValue Ops[] =
2296 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2297 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2298 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2299 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2300 } else {
2301 SDValue Ops[] =
2302 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2303 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2304 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2305 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2306 }
2307 }
2308
2309 if (LateMask) {
2310 unsigned Mask = (unsigned) getZerosMask();
2311
2312 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2313 assert((ANDIMask != 0 || ANDISMask != 0) &&
2314 "No set bits in zeros mask?");
2315
2316 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2317 (unsigned) (ANDISMask != 0) +
2318 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2319
2320 SDValue ANDIVal, ANDISVal;
2321 if (ANDIMask != 0)
2322 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2323 Res, getI32Imm(ANDIMask, dl)),
2324 0);
2325 if (ANDISMask != 0)
2326 ANDISVal =
2327 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2328 getI32Imm(ANDISMask, dl)),
2329 0);
2330
2331 if (!ANDIVal)
2332 Res = ANDISVal;
2333 else if (!ANDISVal)
2334 Res = ANDIVal;
2335 else
2336 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2337 ANDIVal, ANDISVal), 0);
2338 }
2339
2340 return Res.getNode();
2341 }
2342
2343 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2344 unsigned MaskStart, unsigned MaskEnd,
2345 bool IsIns) {
2346 // In the notation used by the instructions, 'start' and 'end' are reversed
2347 // because bits are counted from high to low order.
2348 unsigned InstMaskStart = 64 - MaskEnd - 1,
2349 InstMaskEnd = 64 - MaskStart - 1;
2350
2351 if (Repl32)
2352 return 1;
2353
2354 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2355 InstMaskEnd == 63 - RLAmt)
2356 return 1;
2357
2358 return 2;
2359 }
2360
2361 // For 64-bit values, not all combinations of rotates and masks are
2362 // available. Produce one if it is available.
2363 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2364 bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2365 unsigned *InstCnt = nullptr) {
2366 // In the notation used by the instructions, 'start' and 'end' are reversed
2367 // because bits are counted from high to low order.
2368 unsigned InstMaskStart = 64 - MaskEnd - 1,
2369 InstMaskEnd = 64 - MaskStart - 1;
2370
2371 if (InstCnt) *InstCnt += 1;
2372
2373 if (Repl32) {
2374 // This rotation amount assumes that the lower 32 bits of the quantity
2375 // are replicated in the high 32 bits by the rotation operator (which is
2376 // done by rlwinm and friends).
2377 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2378 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2379 SDValue Ops[] =
2380 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2381 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2382 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2383 Ops), 0);
2384 }
2385
2386 if (InstMaskEnd == 63) {
2387 SDValue Ops[] =
2388 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2389 getI32Imm(InstMaskStart, dl) };
2390 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2391 }
2392
2393 if (InstMaskStart == 0) {
2394 SDValue Ops[] =
2395 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2396 getI32Imm(InstMaskEnd, dl) };
2397 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2398 }
2399
2400 if (InstMaskEnd == 63 - RLAmt) {
2401 SDValue Ops[] =
2402 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2403 getI32Imm(InstMaskStart, dl) };
2404 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2405 }
2406
2407 // We cannot do this with a single instruction, so we'll use two. The
2408 // problem is that we're not free to choose both a rotation amount and mask
2409 // start and end independently. We can choose an arbitrary mask start and
2410 // end, but then the rotation amount is fixed. Rotation, however, can be
2411 // inverted, and so by applying an "inverse" rotation first, we can get the
2412 // desired result.
2413 if (InstCnt) *InstCnt += 1;
2414
2415 // The rotation mask for the second instruction must be MaskStart.
2416 unsigned RLAmt2 = MaskStart;
2417 // The first instruction must rotate V so that the overall rotation amount
2418 // is RLAmt.
2419 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2420 if (RLAmt1)
2421 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2422 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2423 }
2424
2425 // For 64-bit values, not all combinations of rotates and masks are
2426 // available. Produce a rotate-mask-and-insert if one is available.
2427 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2428 unsigned RLAmt, bool Repl32, unsigned MaskStart,
2429 unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2430 // In the notation used by the instructions, 'start' and 'end' are reversed
2431 // because bits are counted from high to low order.
2432 unsigned InstMaskStart = 64 - MaskEnd - 1,
2433 InstMaskEnd = 64 - MaskStart - 1;
2434
2435 if (InstCnt) *InstCnt += 1;
2436
2437 if (Repl32) {
2438 // This rotation amount assumes that the lower 32 bits of the quantity
2439 // are replicated in the high 32 bits by the rotation operator (which is
2440 // done by rlwinm and friends).
2441 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2442 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2443 SDValue Ops[] =
2444 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2445 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2446 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2447 Ops), 0);
2448 }
2449
2450 if (InstMaskEnd == 63 - RLAmt) {
2451 SDValue Ops[] =
2452 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2453 getI32Imm(InstMaskStart, dl) };
2454 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2455 }
2456
2457 // We cannot do this with a single instruction, so we'll use two. The
2458 // problem is that we're not free to choose both a rotation amount and mask
2459 // start and end independently. We can choose an arbitrary mask start and
2460 // end, but then the rotation amount is fixed. Rotation, however, can be
2461 // inverted, and so by applying an "inverse" rotation first, we can get the
2462 // desired result.
2463 if (InstCnt) *InstCnt += 1;
2464
2465 // The rotation mask for the second instruction must be MaskStart.
2466 unsigned RLAmt2 = MaskStart;
2467 // The first instruction must rotate V so that the overall rotation amount
2468 // is RLAmt.
2469 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2470 if (RLAmt1)
2471 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2472 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2473 }
2474
2475 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2477 return;
2478
2479 // The idea here is the same as in the 32-bit version, but with additional
2480 // complications from the fact that Repl32 might be true. Because we
2481 // aggressively convert bit groups to Repl32 form (which, for small
2482 // rotation factors, involves no other change), and then coalesce, it might
2483 // be the case that a single 64-bit masking operation could handle both
2484 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2485 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2486 // completely capture the new combined bit group.
2487
2488 for (ValueRotInfo &VRI : ValueRotsVec) {
2489 uint64_t Mask = 0;
2490
2491 // We need to add to the mask all bits from the associated bit groups.
2492 // If Repl32 is false, we need to add bits from bit groups that have
2493 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2494 // group is trivially convertable if it overlaps only with the lower 32
2495 // bits, and the group has not been coalesced.
2496 auto MatchingBG = [VRI](const BitGroup &BG) {
2497 if (VRI.V != BG.V)
2498 return false;
2499
2500 unsigned EffRLAmt = BG.RLAmt;
2501 if (!VRI.Repl32 && BG.Repl32) {
2502 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2503 !BG.Repl32Coalesced) {
2504 if (BG.Repl32CR)
2505 EffRLAmt += 32;
2506 } else {
2507 return false;
2508 }
2509 } else if (VRI.Repl32 != BG.Repl32) {
2510 return false;
2511 }
2512
2513 return VRI.RLAmt == EffRLAmt;
2514 };
2515
2516 for (auto &BG : BitGroups) {
2517 if (!MatchingBG(BG))
2518 continue;
2519
2520 if (BG.StartIdx <= BG.EndIdx) {
2521 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2522 Mask |= (UINT64_C(1) << i);
2523 } else {
2524 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2525 Mask |= (UINT64_C(1) << i);
2526 for (unsigned i = 0; i <= BG.EndIdx; ++i)
2527 Mask |= (UINT64_C(1) << i);
2528 }
2529 }
2530
2531 // We can use the 32-bit andi/andis technique if the mask does not
2532 // require any higher-order bits. This can save an instruction compared
2533 // to always using the general 64-bit technique.
2534 bool Use32BitInsts = isUInt<32>(Mask);
2535 // Compute the masks for andi/andis that would be necessary.
2536 unsigned ANDIMask = (Mask & UINT16_MAX),
2537 ANDISMask = (Mask >> 16) & UINT16_MAX;
2538
2539 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2540
2541 unsigned NumAndInsts = (unsigned) NeedsRotate +
2542 (unsigned) (bool) Res;
2543 unsigned NumOfSelectInsts = 0;
2544 selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2545 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2546 if (Use32BitInsts)
2547 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2548 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2549 else
2550 NumAndInsts += NumOfSelectInsts + /* and */ 1;
2551
2552 unsigned NumRLInsts = 0;
2553 bool FirstBG = true;
2554 bool MoreBG = false;
2555 for (auto &BG : BitGroups) {
2556 if (!MatchingBG(BG)) {
2557 MoreBG = true;
2558 continue;
2559 }
2560 NumRLInsts +=
2561 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2562 !FirstBG);
2563 FirstBG = false;
2564 }
2565
2566 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2567 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2568 << "\n\t\t\tisel using masking: " << NumAndInsts
2569 << " using rotates: " << NumRLInsts << "\n");
2570
2571 // When we'd use andi/andis, we bias toward using the rotates (andi only
2572 // has a record form, and is cracked on POWER cores). However, when using
2573 // general 64-bit constant formation, bias toward the constant form,
2574 // because that exposes more opportunities for CSE.
2575 if (NumAndInsts > NumRLInsts)
2576 continue;
2577 // When merging multiple bit groups, instruction or is used.
2578 // But when rotate is used, rldimi can inert the rotated value into any
2579 // register, so instruction or can be avoided.
2580 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2581 continue;
2582
2583 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2584
2585 if (InstCnt) *InstCnt += NumAndInsts;
2586
2587 SDValue VRot;
2588 // We actually need to generate a rotation if we have a non-zero rotation
2589 // factor or, in the Repl32 case, if we care about any of the
2590 // higher-order replicated bits. In the latter case, we generate a mask
2591 // backward so that it actually includes the entire 64 bits.
2592 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2593 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2594 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2595 else
2596 VRot = VRI.V;
2597
2598 SDValue TotalVal;
2599 if (Use32BitInsts) {
2600 assert((ANDIMask != 0 || ANDISMask != 0) &&
2601 "No set bits in mask when using 32-bit ands for 64-bit value");
2602
2603 SDValue ANDIVal, ANDISVal;
2604 if (ANDIMask != 0)
2605 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2606 ExtendToInt64(VRot, dl),
2607 getI32Imm(ANDIMask, dl)),
2608 0);
2609 if (ANDISMask != 0)
2610 ANDISVal =
2611 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2612 ExtendToInt64(VRot, dl),
2613 getI32Imm(ANDISMask, dl)),
2614 0);
2615
2616 if (!ANDIVal)
2617 TotalVal = ANDISVal;
2618 else if (!ANDISVal)
2619 TotalVal = ANDIVal;
2620 else
2621 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2622 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2623 } else {
2624 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2625 TotalVal =
2626 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2627 ExtendToInt64(VRot, dl), TotalVal),
2628 0);
2629 }
2630
2631 if (!Res)
2632 Res = TotalVal;
2633 else
2634 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2635 ExtendToInt64(Res, dl), TotalVal),
2636 0);
2637
2638 // Now, remove all groups with this underlying value and rotation
2639 // factor.
2640 eraseMatchingBitGroups(MatchingBG);
2641 }
2642 }
2643
2644 // Instruction selection for the 64-bit case.
2645 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2646 SDLoc dl(N);
2647 SDValue Res;
2648
2649 if (InstCnt) *InstCnt = 0;
2650
2651 // Take care of cases that should use andi/andis first.
2652 SelectAndParts64(dl, Res, InstCnt);
2653
2654 // If we've not yet selected a 'starting' instruction, and we have no zeros
2655 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2656 // number of groups), and start with this rotated value.
2657 if ((!NeedMask || LateMask) && !Res) {
2658 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2659 // groups will come first, and so the VRI representing the largest number
2660 // of groups might not be first (it might be the first Repl32 groups).
2661 unsigned MaxGroupsIdx = 0;
2662 if (!ValueRotsVec[0].Repl32) {
2663 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2664 if (ValueRotsVec[i].Repl32) {
2665 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2666 MaxGroupsIdx = i;
2667 break;
2668 }
2669 }
2670
2671 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2672 bool NeedsRotate = false;
2673 if (VRI.RLAmt) {
2674 NeedsRotate = true;
2675 } else if (VRI.Repl32) {
2676 for (auto &BG : BitGroups) {
2677 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2678 BG.Repl32 != VRI.Repl32)
2679 continue;
2680
2681 // We don't need a rotate if the bit group is confined to the lower
2682 // 32 bits.
2683 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2684 continue;
2685
2686 NeedsRotate = true;
2687 break;
2688 }
2689 }
2690
2691 if (NeedsRotate)
2692 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2693 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2694 InstCnt);
2695 else
2696 Res = VRI.V;
2697
2698 // Now, remove all groups with this underlying value and rotation factor.
2699 if (Res)
2700 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2701 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2702 BG.Repl32 == VRI.Repl32;
2703 });
2704 }
2705
2706 // Because 64-bit rotates are more flexible than inserts, we might have a
2707 // preference regarding which one we do first (to save one instruction).
2708 if (!Res)
2709 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2710 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2711 false) <
2712 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2713 true)) {
2714 if (I != BitGroups.begin()) {
2715 BitGroup BG = *I;
2716 BitGroups.erase(I);
2717 BitGroups.insert(BitGroups.begin(), BG);
2718 }
2719
2720 break;
2721 }
2722 }
2723
2724 // Insert the other groups (one at a time).
2725 for (auto &BG : BitGroups) {
2726 if (!Res)
2727 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2728 BG.EndIdx, InstCnt);
2729 else
2730 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2731 BG.StartIdx, BG.EndIdx, InstCnt);
2732 }
2733
2734 if (LateMask) {
2735 uint64_t Mask = getZerosMask();
2736
2737 // We can use the 32-bit andi/andis technique if the mask does not
2738 // require any higher-order bits. This can save an instruction compared
2739 // to always using the general 64-bit technique.
2740 bool Use32BitInsts = isUInt<32>(Mask);
2741 // Compute the masks for andi/andis that would be necessary.
2742 unsigned ANDIMask = (Mask & UINT16_MAX),
2743 ANDISMask = (Mask >> 16) & UINT16_MAX;
2744
2745 if (Use32BitInsts) {
2746 assert((ANDIMask != 0 || ANDISMask != 0) &&
2747 "No set bits in mask when using 32-bit ands for 64-bit value");
2748
2749 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2750 (unsigned) (ANDISMask != 0) +
2751 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2752
2753 SDValue ANDIVal, ANDISVal;
2754 if (ANDIMask != 0)
2755 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2756 ExtendToInt64(Res, dl),
2757 getI32Imm(ANDIMask, dl)),
2758 0);
2759 if (ANDISMask != 0)
2760 ANDISVal =
2761 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2762 ExtendToInt64(Res, dl),
2763 getI32Imm(ANDISMask, dl)),
2764 0);
2765
2766 if (!ANDIVal)
2767 Res = ANDISVal;
2768 else if (!ANDISVal)
2769 Res = ANDIVal;
2770 else
2771 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2772 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2773 } else {
2774 unsigned NumOfSelectInsts = 0;
2775 SDValue MaskVal =
2776 SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2777 Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2778 ExtendToInt64(Res, dl), MaskVal),
2779 0);
2780 if (InstCnt)
2781 *InstCnt += NumOfSelectInsts + /* and */ 1;
2782 }
2783 }
2784
2785 return Res.getNode();
2786 }
2787
2788 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2789 // Fill in BitGroups.
2790 collectBitGroups(LateMask);
2791 if (BitGroups.empty())
2792 return nullptr;
2793
2794 // For 64-bit values, figure out when we can use 32-bit instructions.
2795 if (Bits.size() == 64)
2796 assignRepl32BitGroups();
2797
2798 // Fill in ValueRotsVec.
2799 collectValueRotInfo();
2800
2801 if (Bits.size() == 32) {
2802 return Select32(N, LateMask, InstCnt);
2803 } else {
2804 assert(Bits.size() == 64 && "Not 64 bits here?");
2805 return Select64(N, LateMask, InstCnt);
2806 }
2807
2808 return nullptr;
2809 }
2810
2811 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2812 erase_if(BitGroups, F);
2813 }
2814
2816
2817 bool NeedMask = false;
2819
2820 SmallVector<BitGroup, 16> BitGroups;
2821
2822 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2823 SmallVector<ValueRotInfo, 16> ValueRotsVec;
2824
2825 SelectionDAG *CurDAG = nullptr;
2826
2827public:
2828 BitPermutationSelector(SelectionDAG *DAG)
2829 : CurDAG(DAG) {}
2830
2831 // Here we try to match complex bit permutations into a set of
2832 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2833 // known to produce optimal code for common cases (like i32 byte swapping).
2834 SDNode *Select(SDNode *N) {
2835 Memoizer.clear();
2836 auto Result =
2837 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2838 if (!Result.first)
2839 return nullptr;
2840 Bits = std::move(*Result.second);
2841
2842 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2843 " selection for: ");
2844 LLVM_DEBUG(N->dump(CurDAG));
2845
2846 // Fill it RLAmt and set NeedMask.
2847 computeRotationAmounts();
2848
2849 if (!NeedMask)
2850 return Select(N, false);
2851
2852 // We currently have two techniques for handling results with zeros: early
2853 // masking (the default) and late masking. Late masking is sometimes more
2854 // efficient, but because the structure of the bit groups is different, it
2855 // is hard to tell without generating both and comparing the results. With
2856 // late masking, we ignore zeros in the resulting value when inserting each
2857 // set of bit groups, and then mask in the zeros at the end. With early
2858 // masking, we only insert the non-zero parts of the result at every step.
2859
2860 unsigned InstCnt = 0, InstCntLateMask = 0;
2861 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2862 SDNode *RN = Select(N, false, &InstCnt);
2863 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2864
2865 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2866 SDNode *RNLM = Select(N, true, &InstCntLateMask);
2867 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2868 << " instructions\n");
2869
2870 if (InstCnt <= InstCntLateMask) {
2871 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2872 return RN;
2873 }
2874
2875 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2876 return RNLM;
2877 }
2878};
2879
2880class IntegerCompareEliminator {
2881 SelectionDAG *CurDAG;
2882 PPCDAGToDAGISel *S;
2883 // Conversion type for interpreting results of a 32-bit instruction as
2884 // a 64-bit value or vice versa.
2885 enum ExtOrTruncConversion { Ext, Trunc };
2886
2887 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2888 // in a GPR.
2889 // ZExtOrig - use the original condition code, zero-extend value
2890 // ZExtInvert - invert the condition code, zero-extend value
2891 // SExtOrig - use the original condition code, sign-extend value
2892 // SExtInvert - invert the condition code, sign-extend value
2893 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2894
2895 // Comparisons against zero to emit GPR code sequences for. Each of these
2896 // sequences may need to be emitted for two or more equivalent patterns.
2897 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2898 // matters as well as the extension type: sext (-1/0), zext (1/0).
2899 // GEZExt - (zext (LHS >= 0))
2900 // GESExt - (sext (LHS >= 0))
2901 // LEZExt - (zext (LHS <= 0))
2902 // LESExt - (sext (LHS <= 0))
2903 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2904
2905 SDNode *tryEXTEND(SDNode *N);
2906 SDNode *tryLogicOpOfCompares(SDNode *N);
2907 SDValue computeLogicOpInGPR(SDValue LogicOp);
2908 SDValue signExtendInputIfNeeded(SDValue Input);
2909 SDValue zeroExtendInputIfNeeded(SDValue Input);
2910 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2911 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2912 ZeroCompare CmpTy);
2913 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2914 int64_t RHSValue, SDLoc dl);
2915 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2916 int64_t RHSValue, SDLoc dl);
2917 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2918 int64_t RHSValue, SDLoc dl);
2919 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2920 int64_t RHSValue, SDLoc dl);
2921 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2922
2923public:
2924 IntegerCompareEliminator(SelectionDAG *DAG,
2925 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2927 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2928 "Only expecting to use this on 64 bit targets.");
2929 }
2930 SDNode *Select(SDNode *N) {
2931 if (CmpInGPR == ICGPR_None)
2932 return nullptr;
2933 switch (N->getOpcode()) {
2934 default: break;
2935 case ISD::ZERO_EXTEND:
2938 return nullptr;
2939 [[fallthrough]];
2940 case ISD::SIGN_EXTEND:
2943 return nullptr;
2944 return tryEXTEND(N);
2945 case ISD::AND:
2946 case ISD::OR:
2947 case ISD::XOR:
2948 return tryLogicOpOfCompares(N);
2949 }
2950 return nullptr;
2951 }
2952};
2953
2954// The obvious case for wanting to keep the value in a GPR. Namely, the
2955// result of the comparison is actually needed in a GPR.
2956SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2957 assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2958 N->getOpcode() == ISD::SIGN_EXTEND) &&
2959 "Expecting a zero/sign extend node!");
2960 SDValue WideRes;
2961 // If we are zero-extending the result of a logical operation on i1
2962 // values, we can keep the values in GPRs.
2963 if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&
2964 N->getOperand(0).getValueType() == MVT::i1 &&
2965 N->getOpcode() == ISD::ZERO_EXTEND)
2966 WideRes = computeLogicOpInGPR(N->getOperand(0));
2967 else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2968 return nullptr;
2969 else
2970 WideRes =
2971 getSETCCInGPR(N->getOperand(0),
2972 N->getOpcode() == ISD::SIGN_EXTEND ?
2973 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2974
2975 if (!WideRes)
2976 return nullptr;
2977
2978 SDLoc dl(N);
2979 bool Input32Bit = WideRes.getValueType() == MVT::i32;
2980 bool Output32Bit = N->getValueType(0) == MVT::i32;
2981
2982 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2983 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2984
2985 SDValue ConvOp = WideRes;
2986 if (Input32Bit != Output32Bit)
2987 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2988 ExtOrTruncConversion::Trunc);
2989 return ConvOp.getNode();
2990}
2991
2992// Attempt to perform logical operations on the results of comparisons while
2993// keeping the values in GPRs. Without doing so, these would end up being
2994// lowered to CR-logical operations which suffer from significant latency and
2995// low ILP.
2996SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2997 if (N->getValueType(0) != MVT::i1)
2998 return nullptr;
2999 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
3000 "Expected a logic operation on setcc results.");
3001 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
3002 if (!LoweredLogical)
3003 return nullptr;
3004
3005 SDLoc dl(N);
3006 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3007 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3008 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
3009 SDValue LHS = LoweredLogical.getOperand(0);
3010 SDValue RHS = LoweredLogical.getOperand(1);
3011 SDValue WideOp;
3012 SDValue OpToConvToRecForm;
3013
3014 // Look through any 32-bit to 64-bit implicit extend nodes to find the
3015 // opcode that is input to the XORI.
3016 if (IsBitwiseNegate &&
3017 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
3018 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
3019 else if (IsBitwiseNegate)
3020 // If the input to the XORI isn't an extension, that's what we're after.
3021 OpToConvToRecForm = LoweredLogical.getOperand(0);
3022 else
3023 // If this is not an XORI, it is a reg-reg logical op and we can convert
3024 // it to record-form.
3025 OpToConvToRecForm = LoweredLogical;
3026
3027 // Get the record-form version of the node we're looking to use to get the
3028 // CR result from.
3029 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3030 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
3031
3032 // Convert the right node to record-form. This is either the logical we're
3033 // looking at or it is the input node to the negation (if we're looking at
3034 // a bitwise negation).
3035 if (NewOpc != -1 && IsBitwiseNegate) {
3036 // The input to the XORI has a record-form. Use it.
3037 assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
3038 "Expected a PPC::XORI8 only for bitwise negation.");
3039 // Emit the record-form instruction.
3040 std::vector<SDValue> Ops;
3041 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3042 Ops.push_back(OpToConvToRecForm.getOperand(i));
3043
3044 WideOp =
3045 SDValue(CurDAG->getMachineNode(NewOpc, dl,
3046 OpToConvToRecForm.getValueType(),
3047 MVT::Glue, Ops), 0);
3048 } else {
3049 assert((NewOpc != -1 || !IsBitwiseNegate) &&
3050 "No record form available for AND8/OR8/XOR8?");
3051 WideOp =
3052 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
3053 dl, MVT::i64, MVT::Glue, LHS, RHS),
3054 0);
3055 }
3056
3057 // Select this node to a single bit from CR0 set by the record-form node
3058 // just created. For bitwise negation, use the EQ bit which is the equivalent
3059 // of negating the result (i.e. it is a bit set when the result of the
3060 // operation is zero).
3061 SDValue SRIdxVal =
3062 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
3063 SDValue CRBit =
3064 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
3065 MVT::i1, CR0Reg, SRIdxVal,
3066 WideOp.getValue(1)), 0);
3067 return CRBit.getNode();
3068}
3069
3070// Lower a logical operation on i1 values into a GPR sequence if possible.
3071// The result can be kept in a GPR if requested.
3072// Three types of inputs can be handled:
3073// - SETCC
3074// - TRUNCATE
3075// - Logical operation (AND/OR/XOR)
3076// There is also a special case that is handled (namely a complement operation
3077// achieved with xor %a, -1).
3078SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3080 "Can only handle logic operations here.");
3081 assert(LogicOp.getValueType() == MVT::i1 &&
3082 "Can only handle logic operations on i1 values here.");
3083 SDLoc dl(LogicOp);
3084 SDValue LHS, RHS;
3085
3086 // Special case: xor %a, -1
3087 bool IsBitwiseNegation = isBitwiseNot(LogicOp);
3088
3089 // Produces a GPR sequence for each operand of the binary logic operation.
3090 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3091 // the value in a GPR and for logic operations, it will recursively produce
3092 // a GPR sequence for the operation.
3093 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3094 unsigned OperandOpcode = Operand.getOpcode();
3095 if (OperandOpcode == ISD::SETCC)
3096 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
3097 else if (OperandOpcode == ISD::TRUNCATE) {
3098 SDValue InputOp = Operand.getOperand(0);
3099 EVT InVT = InputOp.getValueType();
3100 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
3101 PPC::RLDICL, dl, InVT, InputOp,
3102 S->getI64Imm(0, dl),
3103 S->getI64Imm(63, dl)), 0);
3104 } else if (ISD::isBitwiseLogicOp(OperandOpcode))
3105 return computeLogicOpInGPR(Operand);
3106 return SDValue();
3107 };
3108 LHS = getLogicOperand(LogicOp.getOperand(0));
3109 RHS = getLogicOperand(LogicOp.getOperand(1));
3110
3111 // If a GPR sequence can't be produced for the LHS we can't proceed.
3112 // Not producing a GPR sequence for the RHS is only a problem if this isn't
3113 // a bitwise negation operation.
3114 if (!LHS || (!RHS && !IsBitwiseNegation))
3115 return SDValue();
3116
3117 NumLogicOpsOnComparison++;
3118
3119 // We will use the inputs as 64-bit values.
3120 if (LHS.getValueType() == MVT::i32)
3121 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
3122 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3123 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
3124
3125 unsigned NewOpc;
3126 switch (LogicOp.getOpcode()) {
3127 default: llvm_unreachable("Unknown logic operation.");
3128 case ISD::AND: NewOpc = PPC::AND8; break;
3129 case ISD::OR: NewOpc = PPC::OR8; break;
3130 case ISD::XOR: NewOpc = PPC::XOR8; break;
3131 }
3132
3133 if (IsBitwiseNegation) {
3134 RHS = S->getI64Imm(1, dl);
3135 NewOpc = PPC::XORI8;
3136 }
3137
3138 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
3139
3140}
3141
3142/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3143/// Otherwise just reinterpret it as a 64-bit value.
3144/// Useful when emitting comparison code for 32-bit values without using
3145/// the compare instruction (which only considers the lower 32-bits).
3146SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3147 assert(Input.getValueType() == MVT::i32 &&
3148 "Can only sign-extend 32-bit values here.");
3149 unsigned Opc = Input.getOpcode();
3150
3151 // The value was sign extended and then truncated to 32-bits. No need to
3152 // sign extend it again.
3153 if (Opc == ISD::TRUNCATE &&
3154 (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
3155 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
3156 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3157
3158 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3159 // The input is a sign-extending load. All ppc sign-extending loads
3160 // sign-extend to the full 64-bits.
3161 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3162 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3163
3164 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3165 // We don't sign-extend constants.
3166 if (InputConst)
3167 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3168
3169 SDLoc dl(Input);
3170 SignExtensionsAdded++;
3171 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3172 MVT::i64, Input), 0);
3173}
3174
3175/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3176/// Otherwise just reinterpret it as a 64-bit value.
3177/// Useful when emitting comparison code for 32-bit values without using
3178/// the compare instruction (which only considers the lower 32-bits).
3179SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3180 assert(Input.getValueType() == MVT::i32 &&
3181 "Can only zero-extend 32-bit values here.");
3182 unsigned Opc = Input.getOpcode();
3183
3184 // The only condition under which we can omit the actual extend instruction:
3185 // - The value is a positive constant
3186 // - The value comes from a load that isn't a sign-extending load
3187 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3188 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3189 (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3190 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3191 if (IsTruncateOfZExt)
3192 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3193
3194 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3195 if (InputConst && InputConst->getSExtValue() >= 0)
3196 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3197
3198 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3199 // The input is a load that doesn't sign-extend (it will be zero-extended).
3200 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3201 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3202
3203 // None of the above, need to zero-extend.
3204 SDLoc dl(Input);
3205 ZeroExtensionsAdded++;
3206 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3207 S->getI64Imm(0, dl),
3208 S->getI64Imm(32, dl)), 0);
3209}
3210
3211// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3212// course not actual zero/sign extensions that will generate machine code,
3213// they're just a way to reinterpret a 32 bit value in a register as a
3214// 64 bit value and vice-versa.
3215SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3216 ExtOrTruncConversion Conv) {
3217 SDLoc dl(NatWidthRes);
3218
3219 // For reinterpreting 32-bit values as 64 bit values, we generate
3220 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3221 if (Conv == ExtOrTruncConversion::Ext) {
3222 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3223 SDValue SubRegIdx =
3224 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3225 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3226 ImDef, NatWidthRes, SubRegIdx), 0);
3227 }
3228
3229 assert(Conv == ExtOrTruncConversion::Trunc &&
3230 "Unknown convertion between 32 and 64 bit values.");
3231 // For reinterpreting 64-bit values as 32-bit values, we just need to
3232 // EXTRACT_SUBREG (i.e. extract the low word).
3233 SDValue SubRegIdx =
3234 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3235 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3236 NatWidthRes, SubRegIdx), 0);
3237}
3238
3239// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3240// Handle both zero-extensions and sign-extensions.
3241SDValue
3242IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3243 ZeroCompare CmpTy) {
3244 EVT InVT = LHS.getValueType();
3245 bool Is32Bit = InVT == MVT::i32;
3246 SDValue ToExtend;
3247
3248 // Produce the value that needs to be either zero or sign extended.
3249 switch (CmpTy) {
3250 case ZeroCompare::GEZExt:
3251 case ZeroCompare::GESExt:
3252 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3253 dl, InVT, LHS, LHS), 0);
3254 break;
3255 case ZeroCompare::LEZExt:
3256 case ZeroCompare::LESExt: {
3257 if (Is32Bit) {
3258 // Upper 32 bits cannot be undefined for this sequence.
3259 LHS = signExtendInputIfNeeded(LHS);
3260 SDValue Neg =
3261 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3262 ToExtend =
3263 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3264 Neg, S->getI64Imm(1, dl),
3265 S->getI64Imm(63, dl)), 0);
3266 } else {
3267 SDValue Addi =
3268 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3269 S->getI64Imm(~0ULL, dl)), 0);
3270 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3271 Addi, LHS), 0);
3272 }
3273 break;
3274 }
3275 }
3276
3277 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3278 if (!Is32Bit &&
3279 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3280 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3281 ToExtend, S->getI64Imm(1, dl),
3282 S->getI64Imm(63, dl)), 0);
3283 if (!Is32Bit &&
3284 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3285 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3286 S->getI64Imm(63, dl)), 0);
3287
3288 assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3289 // For 32-bit sequences, the extensions differ between GE/LE cases.
3290 switch (CmpTy) {
3291 case ZeroCompare::GEZExt: {
3292 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3293 S->getI32Imm(31, dl) };
3294 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3295 ShiftOps), 0);
3296 }
3297 case ZeroCompare::GESExt:
3298 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3299 S->getI32Imm(31, dl)), 0);
3300 case ZeroCompare::LEZExt:
3301 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3302 S->getI32Imm(1, dl)), 0);
3303 case ZeroCompare::LESExt:
3304 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3305 S->getI32Imm(-1, dl)), 0);
3306 }
3307
3308 // The above case covers all the enumerators so it can't have a default clause
3309 // to avoid compiler warnings.
3310 llvm_unreachable("Unknown zero-comparison type.");
3311}
3312
3313/// Produces a zero-extended result of comparing two 32-bit values according to
3314/// the passed condition code.
3315SDValue
3316IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3318 int64_t RHSValue, SDLoc dl) {
3321 return SDValue();
3322 bool IsRHSZero = RHSValue == 0;
3323 bool IsRHSOne = RHSValue == 1;
3324 bool IsRHSNegOne = RHSValue == -1LL;
3325 switch (CC) {
3326 default: return SDValue();
3327 case ISD::SETEQ: {
3328 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3329 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3330 SDValue Xor = IsRHSZero ? LHS :
3331 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3332 SDValue Clz =
3333 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3334 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3335 S->getI32Imm(31, dl) };
3336 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3337 ShiftOps), 0);
3338 }
3339 case ISD::SETNE: {
3340 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3341 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3342 SDValue Xor = IsRHSZero ? LHS :
3343 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3344 SDValue Clz =
3345 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3346 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3347 S->getI32Imm(31, dl) };
3348 SDValue Shift =
3349 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3350 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3351 S->getI32Imm(1, dl)), 0);
3352 }
3353 case ISD::SETGE: {
3354 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3355 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3356 if(IsRHSZero)
3357 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3358
3359 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3360 // by swapping inputs and falling through.
3361 std::swap(LHS, RHS);
3362 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3363 IsRHSZero = RHSConst && RHSConst->isZero();
3364 [[fallthrough]];
3365 }
3366 case ISD::SETLE: {
3367 if (CmpInGPR == ICGPR_NonExtIn)
3368 return SDValue();
3369 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3370 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3371 if(IsRHSZero) {
3372 if (CmpInGPR == ICGPR_NonExtIn)
3373 return SDValue();
3374 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3375 }
3376
3377 // The upper 32-bits of the register can't be undefined for this sequence.
3378 LHS = signExtendInputIfNeeded(LHS);
3379 RHS = signExtendInputIfNeeded(RHS);
3380 SDValue Sub =
3381 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3382 SDValue Shift =
3383 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3384 S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3385 0);
3386 return
3387 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3388 MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3389 }
3390 case ISD::SETGT: {
3391 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3392 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3393 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3394 // Handle SETLT -1 (which is equivalent to SETGE 0).
3395 if (IsRHSNegOne)
3396 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3397
3398 if (IsRHSZero) {
3399 if (CmpInGPR == ICGPR_NonExtIn)
3400 return SDValue();
3401 // The upper 32-bits of the register can't be undefined for this sequence.
3402 LHS = signExtendInputIfNeeded(LHS);
3403 RHS = signExtendInputIfNeeded(RHS);
3404 SDValue Neg =
3405 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3406 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3407 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3408 }
3409 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3410 // (%b < %a) by swapping inputs and falling through.
3411 std::swap(LHS, RHS);
3412 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3413 IsRHSZero = RHSConst && RHSConst->isZero();
3414 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3415 [[fallthrough]];
3416 }
3417 case ISD::SETLT: {
3418 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3419 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3420 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3421 // Handle SETLT 1 (which is equivalent to SETLE 0).
3422 if (IsRHSOne) {
3423 if (CmpInGPR == ICGPR_NonExtIn)
3424 return SDValue();
3425 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3426 }
3427
3428 if (IsRHSZero) {
3429 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3430 S->getI32Imm(31, dl) };
3431 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3432 ShiftOps), 0);
3433 }
3434
3435 if (CmpInGPR == ICGPR_NonExtIn)
3436 return SDValue();
3437 // The upper 32-bits of the register can't be undefined for this sequence.
3438 LHS = signExtendInputIfNeeded(LHS);
3439 RHS = signExtendInputIfNeeded(RHS);
3440 SDValue SUBFNode =
3441 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3442 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3443 SUBFNode, S->getI64Imm(1, dl),
3444 S->getI64Imm(63, dl)), 0);
3445 }
3446 case ISD::SETUGE:
3447 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3448 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3449 std::swap(LHS, RHS);
3450 [[fallthrough]];
3451 case ISD::SETULE: {
3452 if (CmpInGPR == ICGPR_NonExtIn)
3453 return SDValue();
3454 // The upper 32-bits of the register can't be undefined for this sequence.
3455 LHS = zeroExtendInputIfNeeded(LHS);
3456 RHS = zeroExtendInputIfNeeded(RHS);
3458 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3459 SDValue SrdiNode =
3460 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3461 Subtract, S->getI64Imm(1, dl),
3462 S->getI64Imm(63, dl)), 0);
3463 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3464 S->getI32Imm(1, dl)), 0);
3465 }
3466 case ISD::SETUGT:
3467 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3468 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3469 std::swap(LHS, RHS);
3470 [[fallthrough]];
3471 case ISD::SETULT: {
3472 if (CmpInGPR == ICGPR_NonExtIn)
3473 return SDValue();
3474 // The upper 32-bits of the register can't be undefined for this sequence.
3475 LHS = zeroExtendInputIfNeeded(LHS);
3476 RHS = zeroExtendInputIfNeeded(RHS);
3478 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3479 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3480 Subtract, S->getI64Imm(1, dl),
3481 S->getI64Imm(63, dl)), 0);
3482 }
3483 }
3484}
3485
3486/// Produces a sign-extended result of comparing two 32-bit values according to
3487/// the passed condition code.
3488SDValue
3489IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3491 int64_t RHSValue, SDLoc dl) {
3494 return SDValue();
3495 bool IsRHSZero = RHSValue == 0;
3496 bool IsRHSOne = RHSValue == 1;
3497 bool IsRHSNegOne = RHSValue == -1LL;
3498
3499 switch (CC) {
3500 default: return SDValue();
3501 case ISD::SETEQ: {
3502 // (sext (setcc %a, %b, seteq)) ->
3503 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3504 // (sext (setcc %a, 0, seteq)) ->
3505 // (ashr (shl (ctlz %a), 58), 63)
3506 SDValue CountInput = IsRHSZero ? LHS :
3507 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3508 SDValue Cntlzw =
3509 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3510 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3511 S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3512 SDValue Slwi =
3513 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3514 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3515 }
3516 case ISD::SETNE: {
3517 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3518 // flip the bit, finally take 2's complement.
3519 // (sext (setcc %a, %b, setne)) ->
3520 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3521 // Same as above, but the first xor is not needed.
3522 // (sext (setcc %a, 0, setne)) ->
3523 // (neg (xor (lshr (ctlz %a), 5), 1))
3524 SDValue Xor = IsRHSZero ? LHS :
3525 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3526 SDValue Clz =
3527 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3528 SDValue ShiftOps[] =
3529 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3530 SDValue Shift =
3531 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3532 SDValue Xori =
3533 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3534 S->getI32Imm(1, dl)), 0);
3535 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3536 }
3537 case ISD::SETGE: {
3538 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3539 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3540 if (IsRHSZero)
3541 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3542
3543 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3544 // by swapping inputs and falling through.
3545 std::swap(LHS, RHS);
3546 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3547 IsRHSZero = RHSConst && RHSConst->isZero();
3548 [[fallthrough]];
3549 }
3550 case ISD::SETLE: {
3551 if (CmpInGPR == ICGPR_NonExtIn)
3552 return SDValue();
3553 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3554 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3555 if (IsRHSZero)
3556 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3557
3558 // The upper 32-bits of the register can't be undefined for this sequence.
3559 LHS = signExtendInputIfNeeded(LHS);
3560 RHS = signExtendInputIfNeeded(RHS);
3561 SDValue SUBFNode =
3562 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3563 LHS, RHS), 0);
3564 SDValue Srdi =
3565 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3566 SUBFNode, S->getI64Imm(1, dl),
3567 S->getI64Imm(63, dl)), 0);
3568 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3569 S->getI32Imm(-1, dl)), 0);
3570 }
3571 case ISD::SETGT: {
3572 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3573 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3574 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3575 if (IsRHSNegOne)
3576 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3577 if (IsRHSZero) {
3578 if (CmpInGPR == ICGPR_NonExtIn)
3579 return SDValue();
3580 // The upper 32-bits of the register can't be undefined for this sequence.
3581 LHS = signExtendInputIfNeeded(LHS);
3582 RHS = signExtendInputIfNeeded(RHS);
3583 SDValue Neg =
3584 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3585 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3586 S->getI64Imm(63, dl)), 0);
3587 }
3588 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3589 // (%b < %a) by swapping inputs and falling through.
3590 std::swap(LHS, RHS);
3591 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3592 IsRHSZero = RHSConst && RHSConst->isZero();
3593 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3594 [[fallthrough]];
3595 }
3596 case ISD::SETLT: {
3597 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3598 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3599 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3600 if (IsRHSOne) {
3601 if (CmpInGPR == ICGPR_NonExtIn)
3602 return SDValue();
3603 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3604 }
3605 if (IsRHSZero)
3606 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3607 S->getI32Imm(31, dl)), 0);
3608
3609 if (CmpInGPR == ICGPR_NonExtIn)
3610 return SDValue();
3611 // The upper 32-bits of the register can't be undefined for this sequence.
3612 LHS = signExtendInputIfNeeded(LHS);
3613 RHS = signExtendInputIfNeeded(RHS);
3614 SDValue SUBFNode =
3615 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3616 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3617 SUBFNode, S->getI64Imm(63, dl)), 0);
3618 }
3619 case ISD::SETUGE:
3620 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3621 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3622 std::swap(LHS, RHS);
3623 [[fallthrough]];
3624 case ISD::SETULE: {
3625 if (CmpInGPR == ICGPR_NonExtIn)
3626 return SDValue();
3627 // The upper 32-bits of the register can't be undefined for this sequence.
3628 LHS = zeroExtendInputIfNeeded(LHS);
3629 RHS = zeroExtendInputIfNeeded(RHS);
3631 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3632 SDValue Shift =
3633 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3634 S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3635 0);
3636 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3637 S->getI32Imm(-1, dl)), 0);
3638 }
3639 case ISD::SETUGT:
3640 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3641 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3642 std::swap(LHS, RHS);
3643 [[fallthrough]];
3644 case ISD::SETULT: {
3645 if (CmpInGPR == ICGPR_NonExtIn)
3646 return SDValue();
3647 // The upper 32-bits of the register can't be undefined for this sequence.
3648 LHS = zeroExtendInputIfNeeded(LHS);
3649 RHS = zeroExtendInputIfNeeded(RHS);
3651 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3652 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3653 Subtract, S->getI64Imm(63, dl)), 0);
3654 }
3655 }
3656}
3657
3658/// Produces a zero-extended result of comparing two 64-bit values according to
3659/// the passed condition code.
3660SDValue
3661IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3663 int64_t RHSValue, SDLoc dl) {
3666 return SDValue();
3667 bool IsRHSZero = RHSValue == 0;
3668 bool IsRHSOne = RHSValue == 1;
3669 bool IsRHSNegOne = RHSValue == -1LL;
3670 switch (CC) {
3671 default: return SDValue();
3672 case ISD::SETEQ: {
3673 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3674 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3675 SDValue Xor = IsRHSZero ? LHS :
3676 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3677 SDValue Clz =
3678 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3679 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3680 S->getI64Imm(58, dl),
3681 S->getI64Imm(63, dl)), 0);
3682 }
3683 case ISD::SETNE: {
3684 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3685 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3686 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3687 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3688 SDValue Xor = IsRHSZero ? LHS :
3689 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3690 SDValue AC =
3691 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3692 Xor, S->getI32Imm(~0U, dl)), 0);
3693 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3694 Xor, AC.getValue(1)), 0);
3695 }
3696 case ISD::SETGE: {
3697 // {subc.reg, subc.CA} = (subcarry %a, %b)
3698 // (zext (setcc %a, %b, setge)) ->
3699 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3700 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3701 if (IsRHSZero)
3702 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3703 std::swap(LHS, RHS);
3704 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3705 IsRHSZero = RHSConst && RHSConst->isZero();
3706 [[fallthrough]];
3707 }
3708 case ISD::SETLE: {
3709 // {subc.reg, subc.CA} = (subcarry %b, %a)
3710 // (zext (setcc %a, %b, setge)) ->
3711 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3712 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3713 if (IsRHSZero)
3714 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3715 SDValue ShiftL =
3716 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3717 S->getI64Imm(1, dl),
3718 S->getI64Imm(63, dl)), 0);
3719 SDValue ShiftR =
3720 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3721 S->getI64Imm(63, dl)), 0);
3722 SDValue SubtractCarry =
3723 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3724 LHS, RHS), 1);
3725 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3726 ShiftR, ShiftL, SubtractCarry), 0);
3727 }
3728 case ISD::SETGT: {
3729 // {subc.reg, subc.CA} = (subcarry %b, %a)
3730 // (zext (setcc %a, %b, setgt)) ->
3731 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3732 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3733 if (IsRHSNegOne)
3734 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3735 if (IsRHSZero) {
3736 SDValue Addi =
3737 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3738 S->getI64Imm(~0ULL, dl)), 0);
3739 SDValue Nor =
3740 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3741 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3742 S->getI64Imm(1, dl),
3743 S->getI64Imm(63, dl)), 0);
3744 }
3745 std::swap(LHS, RHS);
3746 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3747 IsRHSZero = RHSConst && RHSConst->isZero();
3748 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3749 [[fallthrough]];
3750 }
3751 case ISD::SETLT: {
3752 // {subc.reg, subc.CA} = (subcarry %a, %b)
3753 // (zext (setcc %a, %b, setlt)) ->
3754 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3755 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3756 if (IsRHSOne)
3757 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3758 if (IsRHSZero)
3759 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3760 S->getI64Imm(1, dl),
3761 S->getI64Imm(63, dl)), 0);
3762 SDValue SRADINode =
3763 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3764 LHS, S->getI64Imm(63, dl)), 0);
3765 SDValue SRDINode =
3766 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3767 RHS, S->getI64Imm(1, dl),
3768 S->getI64Imm(63, dl)), 0);
3769 SDValue SUBFC8Carry =
3770 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3771 RHS, LHS), 1);
3772 SDValue ADDE8Node =
3773 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3774 SRDINode, SRADINode, SUBFC8Carry), 0);
3775 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3776 ADDE8Node, S->getI64Imm(1, dl)), 0);
3777 }
3778 case ISD::SETUGE:
3779 // {subc.reg, subc.CA} = (subcarry %a, %b)
3780 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3781 std::swap(LHS, RHS);
3782 [[fallthrough]];
3783 case ISD::SETULE: {
3784 // {subc.reg, subc.CA} = (subcarry %b, %a)
3785 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3786 SDValue SUBFC8Carry =
3787 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3788 LHS, RHS), 1);
3789 SDValue SUBFE8Node =
3790 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3791 LHS, LHS, SUBFC8Carry), 0);
3792 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3793 SUBFE8Node, S->getI64Imm(1, dl)), 0);
3794 }
3795 case ISD::SETUGT:
3796 // {subc.reg, subc.CA} = (subcarry %b, %a)
3797 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3798 std::swap(LHS, RHS);
3799 [[fallthrough]];
3800 case ISD::SETULT: {
3801 // {subc.reg, subc.CA} = (subcarry %a, %b)
3802 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3803 SDValue SubtractCarry =
3804 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3805 RHS, LHS), 1);
3806 SDValue ExtSub =
3807 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3808 LHS, LHS, SubtractCarry), 0);
3809 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3810 ExtSub), 0);
3811 }
3812 }
3813}
3814
3815/// Produces a sign-extended result of comparing two 64-bit values according to
3816/// the passed condition code.
3817SDValue
3818IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3820 int64_t RHSValue, SDLoc dl) {
3823 return SDValue();
3824 bool IsRHSZero = RHSValue == 0;
3825 bool IsRHSOne = RHSValue == 1;
3826 bool IsRHSNegOne = RHSValue == -1LL;
3827 switch (CC) {
3828 default: return SDValue();
3829 case ISD::SETEQ: {
3830 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3831 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3832 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3833 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3834 SDValue AddInput = IsRHSZero ? LHS :
3835 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3836 SDValue Addic =
3837 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3838 AddInput, S->getI32Imm(~0U, dl)), 0);
3839 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3840 Addic, Addic.getValue(1)), 0);
3841 }
3842 case ISD::SETNE: {
3843 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3844 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3845 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3846 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3847 SDValue Xor = IsRHSZero ? LHS :
3848 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3849 SDValue SC =
3850 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3851 Xor, S->getI32Imm(0, dl)), 0);
3852 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3853 SC, SC.getValue(1)), 0);
3854 }
3855 case ISD::SETGE: {
3856 // {subc.reg, subc.CA} = (subcarry %a, %b)
3857 // (zext (setcc %a, %b, setge)) ->
3858 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3859 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3860 if (IsRHSZero)
3861 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3862 std::swap(LHS, RHS);
3863 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3864 IsRHSZero = RHSConst && RHSConst->isZero();
3865 [[fallthrough]];
3866 }
3867 case ISD::SETLE: {
3868 // {subc.reg, subc.CA} = (subcarry %b, %a)
3869 // (zext (setcc %a, %b, setge)) ->
3870 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3871 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3872 if (IsRHSZero)
3873 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3874 SDValue ShiftR =
3875 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3876 S->getI64Imm(63, dl)), 0);
3877 SDValue ShiftL =
3878 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3879 S->getI64Imm(1, dl),
3880 S->getI64Imm(63, dl)), 0);
3881 SDValue SubtractCarry =
3882 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3883 LHS, RHS), 1);
3884 SDValue Adde =
3885 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3886 ShiftR, ShiftL, SubtractCarry), 0);
3887 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3888 }
3889 case ISD::SETGT: {
3890 // {subc.reg, subc.CA} = (subcarry %b, %a)
3891 // (zext (setcc %a, %b, setgt)) ->
3892 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3893 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3894 if (IsRHSNegOne)
3895 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3896 if (IsRHSZero) {
3897 SDValue Add =
3898 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3899 S->getI64Imm(-1, dl)), 0);
3900 SDValue Nor =
3901 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3902 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3903 S->getI64Imm(63, dl)), 0);
3904 }
3905 std::swap(LHS, RHS);
3906 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3907 IsRHSZero = RHSConst && RHSConst->isZero();
3908 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3909 [[fallthrough]];
3910 }
3911 case ISD::SETLT: {
3912 // {subc.reg, subc.CA} = (subcarry %a, %b)
3913 // (zext (setcc %a, %b, setlt)) ->
3914 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3915 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3916 if (IsRHSOne)
3917 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3918 if (IsRHSZero) {
3919 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3920 S->getI64Imm(63, dl)), 0);
3921 }
3922 SDValue SRADINode =
3923 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3924 LHS, S->getI64Imm(63, dl)), 0);
3925 SDValue SRDINode =
3926 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3927 RHS, S->getI64Imm(1, dl),
3928 S->getI64Imm(63, dl)), 0);
3929 SDValue SUBFC8Carry =
3930 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3931 RHS, LHS), 1);
3932 SDValue ADDE8Node =
3933 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3934 SRDINode, SRADINode, SUBFC8Carry), 0);
3935 SDValue XORI8Node =
3936 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3937 ADDE8Node, S->getI64Imm(1, dl)), 0);
3938 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3939 XORI8Node), 0);
3940 }
3941 case ISD::SETUGE:
3942 // {subc.reg, subc.CA} = (subcarry %a, %b)
3943 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3944 std::swap(LHS, RHS);
3945 [[fallthrough]];
3946 case ISD::SETULE: {
3947 // {subc.reg, subc.CA} = (subcarry %b, %a)
3948 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3949 SDValue SubtractCarry =
3950 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3951 LHS, RHS), 1);
3952 SDValue ExtSub =
3953 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3954 LHS, SubtractCarry), 0);
3955 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3956 ExtSub, ExtSub), 0);
3957 }
3958 case ISD::SETUGT:
3959 // {subc.reg, subc.CA} = (subcarry %b, %a)
3960 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3961 std::swap(LHS, RHS);
3962 [[fallthrough]];
3963 case ISD::SETULT: {
3964 // {subc.reg, subc.CA} = (subcarry %a, %b)
3965 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3966 SDValue SubCarry =
3967 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3968 RHS, LHS), 1);
3969 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3970 LHS, LHS, SubCarry), 0);
3971 }
3972 }
3973}
3974
3975/// Do all uses of this SDValue need the result in a GPR?
3976/// This is meant to be used on values that have type i1 since
3977/// it is somewhat meaningless to ask if values of other types
3978/// should be kept in GPR's.
3979static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3980 assert(Compare.getOpcode() == ISD::SETCC &&
3981 "An ISD::SETCC node required here.");
3982
3983 // For values that have a single use, the caller should obviously already have
3984 // checked if that use is an extending use. We check the other uses here.
3985 if (Compare.hasOneUse())
3986 return true;
3987 // We want the value in a GPR if it is being extended, used for a select, or
3988 // used in logical operations.
3989 for (auto *CompareUse : Compare.getNode()->uses())
3990 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3991 CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3992 CompareUse->getOpcode() != ISD::SELECT &&
3993 !ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {
3994 OmittedForNonExtendUses++;
3995 return false;
3996 }
3997 return true;
3998}
3999
4000/// Returns an equivalent of a SETCC node but with the result the same width as
4001/// the inputs. This can also be used for SELECT_CC if either the true or false
4002/// values is a power of two while the other is zero.
4003SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
4004 SetccInGPROpts ConvOpts) {
4005 assert((Compare.getOpcode() == ISD::SETCC ||
4006 Compare.getOpcode() == ISD::SELECT_CC) &&
4007 "An ISD::SETCC node required here.");
4008
4009 // Don't convert this comparison to a GPR sequence because there are uses
4010 // of the i1 result (i.e. uses that require the result in the CR).
4011 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4012 return SDValue();
4013
4014 SDValue LHS = Compare.getOperand(0);
4015 SDValue RHS = Compare.getOperand(1);
4016
4017 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4018 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
4020 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
4021 EVT InputVT = LHS.getValueType();
4022 if (InputVT != MVT::i32 && InputVT != MVT::i64)
4023 return SDValue();
4024
4025 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4026 ConvOpts == SetccInGPROpts::SExtInvert)
4027 CC = ISD::getSetCCInverse(CC, InputVT);
4028
4029 bool Inputs32Bit = InputVT == MVT::i32;
4030
4031 SDLoc dl(Compare);
4032 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4033 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4034 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4035 ConvOpts == SetccInGPROpts::SExtInvert;
4036
4037 if (IsSext && Inputs32Bit)
4038 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4039 else if (Inputs32Bit)
4040 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4041 else if (IsSext)
4042 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4043 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4044}
4045
4046} // end anonymous namespace
4047
4048bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4049 if (N->getValueType(0) != MVT::i32 &&
4050 N->getValueType(0) != MVT::i64)
4051 return false;
4052
4053 // This optimization will emit code that assumes 64-bit registers
4054 // so we don't want to run it in 32-bit mode. Also don't run it
4055 // on functions that are not to be optimized.
4056 if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
4057 return false;
4058
4059 // For POWER10, it is more profitable to use the set boolean extension
4060 // instructions rather than the integer compare elimination codegen.
4061 // Users can override this via the command line option, `--ppc-gpr-icmps`.
4062 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4063 return false;
4064
4065 switch (N->getOpcode()) {
4066 default: break;
4067 case ISD::ZERO_EXTEND:
4068 case ISD::SIGN_EXTEND:
4069 case ISD::AND:
4070 case ISD::OR:
4071 case ISD::XOR: {
4072 IntegerCompareEliminator ICmpElim(CurDAG, this);
4073 if (SDNode *New = ICmpElim.Select(N)) {
4074 ReplaceNode(N, New);
4075 return true;
4076 }
4077 }
4078 }
4079 return false;
4080}
4081
4082bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4083 if (N->getValueType(0) != MVT::i32 &&
4084 N->getValueType(0) != MVT::i64)
4085 return false;
4086
4087 if (!UseBitPermRewriter)
4088 return false;
4089
4090 switch (N->getOpcode()) {
4091 default: break;
4092 case ISD::SRL:
4093 // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4094 // uses the BRH instruction.
4095 if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
4096 N->getOperand(0).getOpcode() == ISD::BSWAP) {
4097 auto &OpRight = N->getOperand(1);
4098 ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
4099 if (SRLConst && SRLConst->getSExtValue() == 16)
4100 return false;
4101 }
4102 [[fallthrough]];
4103 case ISD::ROTL:
4104 case ISD::SHL:
4105 case ISD::AND:
4106 case ISD::OR: {
4107 BitPermutationSelector BPS(CurDAG);
4108 if (SDNode *New = BPS.Select(N)) {
4109 ReplaceNode(N, New);
4110 return true;
4111 }
4112 return false;
4113 }
4114 }
4115
4116 return false;
4117}
4118
4119/// SelectCC - Select a comparison of the specified values with the specified
4120/// condition code, returning the CR# of the expression.
4121SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4122 const SDLoc &dl, SDValue Chain) {
4123 // Always select the LHS.
4124 unsigned Opc;
4125
4126 if (LHS.getValueType() == MVT::i32) {
4127 unsigned Imm;
4128 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4129 if (isInt32Immediate(RHS, Imm)) {
4130 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4131 if (isUInt<16>(Imm))
4132 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4133 getI32Imm(Imm & 0xFFFF, dl)),
4134 0);
4135 // If this is a 16-bit signed immediate, fold it.
4136 if (isInt<16>((int)Imm))
4137 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4138 getI32Imm(Imm & 0xFFFF, dl)),
4139 0);
4140
4141 // For non-equality comparisons, the default code would materialize the
4142 // constant, then compare against it, like this:
4143 // lis r2, 4660
4144 // ori r2, r2, 22136
4145 // cmpw cr0, r3, r2
4146 // Since we are just comparing for equality, we can emit this instead:
4147 // xoris r0,r3,0x1234
4148 // cmplwi cr0,r0,0x5678
4149 // beq cr0,L6
4150 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
4151 getI32Imm(Imm >> 16, dl)), 0);
4152 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
4153 getI32Imm(Imm & 0xFFFF, dl)), 0);
4154 }
4155 Opc = PPC::CMPLW;
4156 } else if (ISD::isUnsignedIntSetCC(CC)) {
4157 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
4158 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4159 getI32Imm(Imm & 0xFFFF, dl)), 0);
4160 Opc = PPC::CMPLW;
4161 } else {
4162 int16_t SImm;
4163 if (isIntS16Immediate(RHS, SImm))
4164 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4165 getI32Imm((int)SImm & 0xFFFF,
4166 dl)),
4167 0);
4168 Opc = PPC::CMPW;
4169 }
4170 } else if (LHS.getValueType() == MVT::i64) {
4171 uint64_t Imm;
4172 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4173 if (isInt64Immediate(RHS.getNode(), Imm)) {
4174 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4175 if (isUInt<16>(Imm))
4176 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4177 getI32Imm(Imm & 0xFFFF, dl)),
4178 0);
4179 // If this is a 16-bit signed immediate, fold it.
4180 if (isInt<16>(Imm))
4181 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4182 getI32Imm(Imm & 0xFFFF, dl)),
4183 0);
4184
4185 // For non-equality comparisons, the default code would materialize the
4186 // constant, then compare against it, like this:
4187 // lis r2, 4660
4188 // ori r2, r2, 22136
4189 // cmpd cr0, r3, r2
4190 // Since we are just comparing for equality, we can emit this instead:
4191 // xoris r0,r3,0x1234
4192 // cmpldi cr0,r0,0x5678
4193 // beq cr0,L6
4194 if (isUInt<32>(Imm)) {
4195 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4196 getI64Imm(Imm >> 16, dl)), 0);
4197 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4198 getI64Imm(Imm & 0xFFFF, dl)),
4199 0);
4200 }
4201 }
4202 Opc = PPC::CMPLD;
4203 } else if (ISD::isUnsignedIntSetCC(CC)) {
4204 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4205 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4206 getI64Imm(Imm & 0xFFFF, dl)), 0);
4207 Opc = PPC::CMPLD;
4208 } else {
4209 int16_t SImm;
4210 if (isIntS16Immediate(RHS, SImm))
4211 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4212 getI64Imm(SImm & 0xFFFF, dl)),
4213 0);
4214 Opc = PPC::CMPD;
4215 }
4216 } else if (LHS.getValueType() == MVT::f32) {
4217 if (Subtarget->hasSPE()) {
4218 switch (CC) {
4219 default:
4220 case ISD::SETEQ:
4221 case ISD::SETNE:
4222 Opc = PPC::EFSCMPEQ;
4223 break;
4224 case ISD::SETLT:
4225 case ISD::SETGE:
4226 case ISD::SETOLT:
4227 case ISD::SETOGE:
4228 case ISD::SETULT:
4229 case ISD::SETUGE:
4230 Opc = PPC::EFSCMPLT;
4231 break;
4232 case ISD::SETGT:
4233 case ISD::SETLE:
4234 case ISD::SETOGT:
4235 case ISD::SETOLE:
4236 case ISD::SETUGT:
4237 case ISD::SETULE:
4238 Opc = PPC::EFSCMPGT;
4239 break;
4240 }
4241 } else
4242 Opc = PPC::FCMPUS;
4243 } else if (LHS.getValueType() == MVT::f64) {
4244 if (Subtarget->hasSPE()) {
4245 switch (CC) {
4246 default:
4247 case ISD::SETEQ:
4248 case ISD::SETNE:
4249 Opc = PPC::EFDCMPEQ;
4250 break;
4251 case ISD::SETLT:
4252 case ISD::SETGE:
4253 case ISD::SETOLT:
4254 case ISD::SETOGE:
4255 case ISD::SETULT:
4256 case ISD::SETUGE:
4257 Opc = PPC::EFDCMPLT;
4258 break;
4259 case ISD::SETGT:
4260 case ISD::SETLE:
4261 case ISD::SETOGT:
4262 case ISD::SETOLE:
4263 case ISD::SETUGT:
4264 case ISD::SETULE:
4265 Opc = PPC::EFDCMPGT;
4266 break;
4267 }
4268 } else
4269 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4270 } else {
4271 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4272 assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4273 Opc = PPC::XSCMPUQP;
4274 }
4275 if (Chain)
4276 return SDValue(
4277 CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4278 0);
4279 else
4280 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4281}
4282
4284 const PPCSubtarget *Subtarget) {
4285 // For SPE instructions, the result is in GT bit of the CR
4286 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4287
4288 switch (CC) {
4289 case ISD::SETUEQ:
4290 case ISD::SETONE:
4291 case ISD::SETOLE:
4292 case ISD::SETOGE:
4293 llvm_unreachable("Should be lowered by legalize!");
4294 default: llvm_unreachable("Unknown condition!");
4295 case ISD::SETOEQ:
4296 case ISD::SETEQ:
4297 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4298 case ISD::SETUNE:
4299 case ISD::SETNE:
4300 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4301 case ISD::SETOLT:
4302 case ISD::SETLT:
4303 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4304 case ISD::SETULE:
4305 case ISD::SETLE:
4306 return PPC::PRED_LE;
4307 case ISD::SETOGT:
4308 case ISD::SETGT:
4309 return PPC::PRED_GT;
4310 case ISD::SETUGE:
4311 case ISD::SETGE:
4312 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4313 case ISD::SETO: return PPC::PRED_NU;
4314 case ISD::SETUO: return PPC::PRED_UN;
4315 // These two are invalid for floating point. Assume we have int.
4316 case ISD::SETULT: return PPC::PRED_LT;
4317 case ISD::SETUGT: return PPC::PRED_GT;
4318 }
4319}
4320
4321/// getCRIdxForSetCC - Return the index of the condition register field
4322/// associated with the SetCC condition, and whether or not the field is
4323/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4324static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4325 Invert = false;
4326 switch (CC) {
4327 default: llvm_unreachable("Unknown condition!");
4328 case ISD::SETOLT:
4329 case ISD::SETLT: return 0; // Bit #0 = SETOLT
4330 case ISD::SETOGT:
4331 case ISD::SETGT: return 1; // Bit #1 = SETOGT
4332 case ISD::SETOEQ:
4333 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4334 case ISD::SETUO: return 3; // Bit #3 = SETUO
4335 case ISD::SETUGE:
4336 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4337 case ISD::SETULE:
4338 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4339 case ISD::SETUNE:
4340 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4341 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4342 case ISD::SETUEQ:
4343 case ISD::SETOGE:
4344 case ISD::SETOLE:
4345 case ISD::SETONE:
4346 llvm_unreachable("Invalid branch code: should be expanded by legalize");
4347 // These are invalid for floating point. Assume integer.
4348 case ISD::SETULT: return 0;
4349 case ISD::SETUGT: return 1;
4350 }
4351}
4352
4353// getVCmpInst: return the vector compare instruction for the specified
4354// vector type and condition code. Since this is for altivec specific code,
4355// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4356// and v4f32).
4357static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4358 bool HasVSX, bool &Swap, bool &Negate) {
4359 Swap = false;
4360 Negate = false;
4361
4362 if (VecVT.isFloatingPoint()) {
4363 /* Handle some cases by swapping input operands. */
4364 switch (CC) {
4365 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4366 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4367 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4368 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4369 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4370 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4371 default: break;
4372 }
4373 /* Handle some cases by negating the result. */
4374 switch (CC) {
4375 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4376 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4377 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4378 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4379 default: break;
4380 }
4381 /* We have instructions implementing the remaining cases. */
4382 switch (CC) {
4383 case ISD::SETEQ:
4384 case ISD::SETOEQ:
4385 if (VecVT == MVT::v4f32)
4386 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4387 else if (VecVT == MVT::v2f64)
4388 return PPC::XVCMPEQDP;
4389 break;
4390 case ISD::SETGT:
4391 case ISD::SETOGT:
4392 if (VecVT == MVT::v4f32)
4393 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4394 else if (VecVT == MVT::v2f64)
4395 return PPC::XVCMPGTDP;
4396 break;
4397 case ISD::SETGE:
4398 case ISD::SETOGE:
4399 if (VecVT == MVT::v4f32)
4400 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4401 else if (VecVT == MVT::v2f64)
4402 return PPC::XVCMPGEDP;
4403 break;
4404 default:
4405 break;
4406 }
4407 llvm_unreachable("Invalid floating-point vector compare condition");
4408 } else {
4409 /* Handle some cases by swapping input operands. */
4410 switch (CC) {
4411 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4412 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4413 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4414 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4415 default: break;
4416 }
4417 /* Handle some cases by negating the result. */
4418 switch (CC) {
4419 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4420 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4421 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4422 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4423 default: break;
4424 }
4425 /* We have instructions implementing the remaining cases. */
4426 switch (CC) {
4427 case ISD::SETEQ:
4428 case ISD::SETUEQ:
4429 if (VecVT == MVT::v16i8)
4430 return PPC::VCMPEQUB;
4431 else if (VecVT == MVT::v8i16)
4432 return PPC::VCMPEQUH;
4433 else if (VecVT == MVT::v4i32)
4434 return PPC::VCMPEQUW;
4435 else if (VecVT == MVT::v2i64)
4436 return PPC::VCMPEQUD;
4437 else if (VecVT == MVT::v1i128)
4438 return PPC::VCMPEQUQ;
4439 break;
4440 case ISD::SETGT:
4441 if (VecVT == MVT::v16i8)
4442 return PPC::VCMPGTSB;
4443 else if (VecVT == MVT::v8i16)
4444 return PPC::VCMPGTSH;
4445 else if (VecVT == MVT::v4i32)
4446 return PPC::VCMPGTSW;
4447 else if (VecVT == MVT::v2i64)
4448 return PPC::VCMPGTSD;
4449 else if (VecVT == MVT::v1i128)
4450 return PPC::VCMPGTSQ;
4451 break;
4452 case ISD::SETUGT:
4453 if (VecVT == MVT::v16i8)
4454 return PPC::VCMPGTUB;
4455 else if (VecVT == MVT::v8i16)
4456 return PPC::VCMPGTUH;
4457 else if (VecVT == MVT::v4i32)
4458 return PPC::VCMPGTUW;
4459 else if (VecVT == MVT::v2i64)
4460 return PPC::VCMPGTUD;
4461 else if (VecVT == MVT::v1i128)
4462 return PPC::VCMPGTUQ;
4463 break;
4464 default:
4465 break;
4466 }
4467 llvm_unreachable("Invalid integer vector compare condition");
4468 }
4469}
4470
4471bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4472 SDLoc dl(N);
4473 unsigned Imm;
4474 bool IsStrict = N->isStrictFPOpcode();
4476 cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4477 EVT PtrVT =
4479 bool isPPC64 = (PtrVT == MVT::i64);
4480 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4481
4482 SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4483 SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4484
4485 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4486 // We can codegen setcc op, imm very efficiently compared to a brcond.
4487 // Check for those cases here.
4488 // setcc op, 0
4489 if (Imm == 0) {
4490 SDValue Op = LHS;
4491 switch (CC) {
4492 default: break;
4493 case ISD::SETEQ: {
4494 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4495 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4496 getI32Imm(31, dl) };
4497 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4498 return true;
4499 }
4500 case ISD::SETNE: {
4501 if (isPPC64) break;
4502 SDValue AD =
4503 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4504 Op, getI32Imm(~0U, dl)), 0);
4505 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4506 return true;
4507 }
4508 case ISD::SETLT: {
4509 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4510 getI32Imm(31, dl) };
4511 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4512 return true;
4513 }
4514 case ISD::SETGT: {
4515 SDValue T =
4516 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4517 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4518 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4519 getI32Imm(31, dl) };
4520 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4521 return true;
4522 }
4523 }
4524 } else if (Imm == ~0U) { // setcc op, -1
4525 SDValue Op = LHS;
4526 switch (CC) {
4527 default: break;
4528 case ISD::SETEQ:
4529 if (isPPC64) break;
4530 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4531 Op, getI32Imm(1, dl)), 0);
4532 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4533 SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4534 MVT::i32,
4535 getI32Imm(0, dl)),
4536 0), Op.getValue(1));
4537 return true;
4538 case ISD::SETNE: {
4539 if (isPPC64) break;
4540 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4541 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4542 Op, getI32Imm(~0U, dl));
4543 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4544 SDValue(AD, 1));
4545 return true;
4546 }
4547 case ISD::SETLT: {
4548 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4549 getI32Imm(1, dl)), 0);
4550 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4551 Op), 0);
4552 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4553 getI32Imm(31, dl) };
4554 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4555 return true;
4556 }
4557 case ISD::SETGT: {
4558 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4559 getI32Imm(31, dl) };
4560 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4561 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4562 return true;
4563 }
4564 }
4565 }
4566 }
4567
4568 // Altivec Vector compare instructions do not set any CR register by default and
4569 // vector compare operations return the same type as the operands.
4570 if (!IsStrict && LHS.getValueType().isVector()) {
4571 if (Subtarget->hasSPE())
4572 return false;
4573
4574 EVT VecVT = LHS.getValueType();
4575 bool Swap, Negate;
4576 unsigned int VCmpInst =
4577 getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4578 if (Swap)
4579 std::swap(LHS, RHS);
4580
4581 EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4582 if (Negate) {
4583 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4584 CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4585 ResVT, VCmp, VCmp);
4586 return true;
4587 }
4588
4589 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4590 return true;
4591 }
4592
4593 if (Subtarget->useCRBits())
4594 return false;
4595
4596 bool Inv;
4597 unsigned Idx = getCRIdxForSetCC(CC, Inv);
4598 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4599 if (IsStrict)
4600 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4601 SDValue IntCR;
4602
4603 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4604 // The correct compare instruction is already set by SelectCC()
4605 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4606 Idx = 1;
4607 }
4608
4609 // Force the ccreg into CR7.
4610 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4611
4612 SDValue InGlue; // Null incoming flag value.
4613 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4614 InGlue).getValue(1);
4615
4616 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4617 CCReg), 0);
4618
4619 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4620 getI32Imm(31, dl), getI32Imm(31, dl) };
4621 if (!Inv) {
4622 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4623 return true;
4624 }
4625
4626 // Get the specified bit.
4627 SDValue Tmp =
4628 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4629 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4630 return true;
4631}
4632
4633/// Does this node represent a load/store node whose address can be represented
4634/// with a register plus an immediate that's a multiple of \p Val:
4635bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4636 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4637 StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4638 MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4639 SDValue AddrOp;
4640 if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4641 AddrOp = N->getOperand(1);
4642 else if (STN)
4643 AddrOp = STN->getOperand(2);
4644
4645 // If the address points a frame object or a frame object with an offset,
4646 // we need to check the object alignment.
4647 short Imm = 0;
4648 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4649 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4650 AddrOp)) {
4651 // If op0 is a frame index that is under aligned, we can't do it either,
4652 // because it is translated to r31 or r1 + slot + offset. We won't know the
4653 // slot number until the stack frame is finalized.
4654 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4655 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4656 if ((SlotAlign % Val) != 0)
4657 return false;
4658
4659 // If we have an offset, we need further check on the offset.
4660 if (AddrOp.getOpcode() != ISD::ADD)
4661 return true;
4662 }
4663
4664 if (AddrOp.getOpcode() == ISD::ADD)
4665 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4666
4667 // If the address comes from the outside, the offset will be zero.
4668 return AddrOp.getOpcode() == ISD::CopyFromReg;
4669}
4670
4671void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4672 // Transfer memoperands.
4673 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4674 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4675}
4676
4678 bool &NeedSwapOps, bool &IsUnCmp) {
4679
4680 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4681
4682 SDValue LHS = N->getOperand(0);
4683 SDValue RHS = N->getOperand(1);
4684 SDValue TrueRes = N->getOperand(2);
4685 SDValue FalseRes = N->getOperand(3);
4686 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4687 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4688 N->getSimpleValueType(0) != MVT::i32))
4689 return false;
4690
4691 // We are looking for any of:
4692 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4693 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4694 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4695 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4696 int64_t TrueResVal = TrueConst->getSExtValue();
4697 if ((TrueResVal < -1 || TrueResVal > 1) ||
4698 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4699 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4700 (TrueResVal == 0 &&
4701 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4702 return false;
4703
4704 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4705 ? FalseRes
4706 : FalseRes.getOperand(0);
4707 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4708 if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4709 SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4710 return false;
4711
4712 // Without this setb optimization, the outer SELECT_CC will be manually
4713 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4714 // transforms pseudo instruction to isel instruction. When there are more than
4715 // one use for result like zext/sext, with current optimization we only see
4716 // isel is replaced by setb but can't see any significant gain. Since
4717 // setb has longer latency than original isel, we should avoid this. Another
4718 // point is that setb requires comparison always kept, it can break the
4719 // opportunity to get the comparison away if we have in future.
4720 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4721 return false;
4722
4723 SDValue InnerLHS = SetOrSelCC.getOperand(0);
4724 SDValue InnerRHS = SetOrSelCC.getOperand(1);
4725 ISD::CondCode InnerCC =
4726 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4727 // If the inner comparison is a select_cc, make sure the true/false values are
4728 // 1/-1 and canonicalize it if needed.
4729 if (InnerIsSel) {
4730 ConstantSDNode *SelCCTrueConst =
4731 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4732 ConstantSDNode *SelCCFalseConst =
4733 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4734 if (!SelCCTrueConst || !SelCCFalseConst)
4735 return false;
4736 int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4737 int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4738 // The values must be -1/1 (requiring a swap) or 1/-1.
4739 if (SelCCTVal == -1 && SelCCFVal == 1) {
4740 std::swap(InnerLHS, InnerRHS);
4741 } else if (SelCCTVal != 1 || SelCCFVal != -1)
4742 return false;
4743 }
4744
4745 // Canonicalize unsigned case
4746 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4747 IsUnCmp = true;
4748 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4749 }
4750
4751 bool InnerSwapped = false;
4752 if (LHS == InnerRHS && RHS == InnerLHS)
4753 InnerSwapped = true;
4754 else if (LHS != InnerLHS || RHS != InnerRHS)
4755 return false;
4756
4757 switch (CC) {
4758 // (select_cc lhs, rhs, 0, \
4759 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4760 case ISD::SETEQ:
4761 if (!InnerIsSel)
4762 return false;
4763 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4764 return false;
4765 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4766 break;
4767
4768 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4769 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4770 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4771 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4772 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4773 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4774 case ISD::SETULT:
4775 if (!IsUnCmp && InnerCC != ISD::SETNE)
4776 return false;
4777 IsUnCmp = true;
4778 [[fallthrough]];
4779 case ISD::SETLT:
4780 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4781 (InnerCC == ISD::SETLT && InnerSwapped))
4782 NeedSwapOps = (TrueResVal == 1);
4783 else
4784 return false;
4785 break;
4786
4787 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4788 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4789 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4790 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4791 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4792 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4793 case ISD::SETUGT:
4794 if (!IsUnCmp && InnerCC != ISD::SETNE)
4795 return false;
4796 IsUnCmp = true;
4797 [[fallthrough]];
4798 case ISD::SETGT:
4799 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4800 (InnerCC == ISD::SETGT && InnerSwapped))
4801 NeedSwapOps = (TrueResVal == -1);
4802 else
4803 return false;
4804 break;
4805
4806 default:
4807 return false;
4808 }
4809
4810 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4811 LLVM_DEBUG(N->dump());
4812
4813 return true;
4814}
4815
4816// Return true if it's a software square-root/divide operand.
4817static bool isSWTestOp(SDValue N) {
4818 if (N.getOpcode() == PPCISD::FTSQRT)
4819 return true;
4820 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
4821 N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4822 return false;
4823 switch (N.getConstantOperandVal(0)) {
4824 case Intrinsic::ppc_vsx_xvtdivdp:
4825 case Intrinsic::ppc_vsx_xvtdivsp:
4826 case Intrinsic::ppc_vsx_xvtsqrtdp:
4827 case Intrinsic::ppc_vsx_xvtsqrtsp:
4828 return true;
4829 }
4830 return false;
4831}
4832
4833bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4834 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4835 // We are looking for following patterns, where `truncate to i1` actually has
4836 // the same semantic with `and 1`.
4837 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4838 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4839 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4840 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4841 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4842 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4843 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4844 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4845 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4846 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4847 return false;
4848
4849 SDValue CmpRHS = N->getOperand(3);
4850 if (!isNullConstant(CmpRHS))
4851 return false;
4852
4853 SDValue CmpLHS = N->getOperand(2);
4854 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4855 return false;
4856
4857 unsigned PCC = 0;
4858 bool IsCCNE = CC == ISD::SETNE;
4859 if (CmpLHS.getOpcode() == ISD::AND &&
4860 isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4861 switch (CmpLHS.getConstantOperandVal(1)) {
4862 case 1:
4863 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4864 break;
4865 case 2:
4866 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4867 break;
4868 case 4:
4869 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4870 break;
4871 case 8:
4872 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4873 break;
4874 default:
4875 return false;
4876 }
4877 else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4878 CmpLHS.getValueType() == MVT::i1)
4879 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4880
4881 if (PCC) {
4882 SDLoc dl(N);
4883 SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4884 N->getOperand(0)};
4885 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4886 return true;
4887 }
4888 return false;
4889}
4890
4891bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4892 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4893 // value, for example when crbits is disabled. If so, select the
4894 // loop_decrement intrinsics now.
4895 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4896 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
4897
4898 if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||
4899 isNullConstant(LHS.getOperand(1)))
4900 return false;
4901
4902 if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4903 LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement)
4904 return false;
4905
4906 if (!isa<ConstantSDNode>(RHS))
4907 return false;
4908
4909 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
4910 "Counter decrement comparison is not EQ or NE");
4911
4912 SDValue OldDecrement = LHS.getOperand(0);
4913 assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4914
4915 SDLoc DecrementLoc(OldDecrement);
4916 SDValue ChainInput = OldDecrement.getOperand(0);
4917 SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)
4918 : getI32Imm(1, DecrementLoc)};
4919 unsigned DecrementOpcode =
4920 Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4921 SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
4922 MVT::i1, DecrementOps);
4923
4924 unsigned Val = RHS->getAsZExtVal();
4925 bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
4926 unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4927
4928 ReplaceUses(LHS.getValue(0), LHS.getOperand(1));
4929 CurDAG->RemoveDeadNode(LHS.getNode());
4930
4931 // Mark the old loop_decrement intrinsic as dead.
4932 ReplaceUses(OldDecrement.getValue(1), ChainInput);
4933 CurDAG->RemoveDeadNode(OldDecrement.getNode());
4934
4935 SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
4936 ChainInput, N->getOperand(0));
4937
4938 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),
4939 N->getOperand(4), Chain);
4940 return true;
4941}
4942
4943bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4944 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4945 unsigned Imm;
4946 if (!isInt32Immediate(N->getOperand(1), Imm))
4947 return false;
4948
4949 SDLoc dl(N);
4950 SDValue Val = N->getOperand(0);
4951 unsigned SH, MB, ME;
4952 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4953 // with a mask, emit rlwinm
4954 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4955 Val = Val.getOperand(0);
4956 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4957 getI32Imm(ME, dl)};
4958 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4959 return true;
4960 }
4961
4962 // If this is just a masked value where the input is not handled, and
4963 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4964 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4965 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4966 getI32Imm(ME, dl)};
4967 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4968 return true;
4969 }
4970
4971 // AND X, 0 -> 0, not "rlwinm 32".
4972 if (Imm == 0) {
4973 ReplaceUses(SDValue(N, 0), N->getOperand(1));
4974 return true;
4975 }
4976
4977 return false;
4978}
4979
4980bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4981 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4983 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4984 return false;
4985
4986 unsigned MB, ME;
4987 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
4988 // MB ME
4989 // +----------------------+
4990 // |xxxxxxxxxxx00011111000|
4991 // +----------------------+
4992 // 0 32 64
4993 // We can only do it if the MB is larger than 32 and MB <= ME
4994 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
4995 // we didn't rotate it.
4996 SDLoc dl(N);
4997 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
4998 getI64Imm(ME - 32, dl)};
4999 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
5000 return true;
5001 }
5002
5003 return false;
5004}
5005
5006bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5007 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5009 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5010 return false;
5011
5012 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
5013 // it well with "andi.".
5014 if (isUInt<16>(Imm64))
5015 return false;
5016
5017 SDLoc Loc(N);
5018 SDValue Val = N->getOperand(0);
5019
5020 // Optimized with two rldicl's as follows:
5021 // Add missing bits on left to the mask and check that the mask is a
5022 // wrapped run of ones, i.e.
5023 // Change pattern |0001111100000011111111|
5024 // to |1111111100000011111111|.
5025 unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);
5026 if (NumOfLeadingZeros != 0)
5027 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
5028
5029 unsigned MB, ME;
5030 if (!isRunOfOnes64(Imm64, MB, ME))
5031 return false;
5032
5033 // ME MB MB-ME+63
5034 // +----------------------+ +----------------------+
5035 // |1111111100000011111111| -> |0000001111111111111111|
5036 // +----------------------+ +----------------------+
5037 // 0 63 0 63
5038 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5039 unsigned OnesOnLeft = ME + 1;
5040 unsigned ZerosInBetween = (MB - ME + 63) & 63;
5041 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5042 // on the left the bits that are already zeros in the mask.
5043 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
5044 getI64Imm(OnesOnLeft, Loc),
5045 getI64Imm(ZerosInBetween, Loc)),
5046 0);
5047 // MB-ME+63 ME MB
5048 // +----------------------+ +----------------------+
5049 // |0000001111111111111111| -> |0001111100000011111111|
5050 // +----------------------+ +----------------------+
5051 // 0 63 0 63
5052 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5053 // left the number of ones we previously added.
5054 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
5055 getI64Imm(NumOfLeadingZeros, Loc)};
5056 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5057 return true;
5058}
5059
5060bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5061 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5062 unsigned Imm;
5063 if (!isInt32Immediate(N->getOperand(1), Imm))
5064 return false;
5065
5066 SDValue Val = N->getOperand(0);
5067 unsigned Imm2;
5068 // ISD::OR doesn't get all the bitfield insertion fun.
5069 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5070 // bitfield insert.
5071 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
5072 return false;
5073
5074 // The idea here is to check whether this is equivalent to:
5075 // (c1 & m) | (x & ~m)
5076 // where m is a run-of-ones mask. The logic here is that, for each bit in
5077 // c1 and c2:
5078 // - if both are 1, then the output will be 1.
5079 // - if both are 0, then the output will be 0.
5080 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5081 // come from x.
5082 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5083 // be 0.
5084 // If that last condition is never the case, then we can form m from the
5085 // bits that are the same between c1 and c2.
5086 unsigned MB, ME;
5087 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5088 SDLoc dl(N);
5089 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
5090 getI32Imm(MB, dl), getI32Imm(ME, dl)};
5091 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
5092 return true;
5093 }
5094
5095 return false;
5096}
5097
5098bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5099 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5100
5102 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5103 return false;
5104
5105 SDValue Val = N->getOperand(0);
5106
5107 if (Val.getOpcode() != ISD::ROTL)
5108 return false;
5109
5110 // Looking to try to avoid a situation like this one:
5111 // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5112 // %and1 = and i64 %2, 9223372036854775807
5113 // In this function we are looking to try to match RLDCL. However, the above
5114 // DAG would better match RLDICL instead which is not what we are looking
5115 // for here.
5116 SDValue RotateAmt = Val.getOperand(1);
5117 if (RotateAmt.getOpcode() == ISD::Constant)
5118 return false;
5119
5120 unsigned MB = 64 - llvm::countr_one(Imm64);
5121 SDLoc dl(N);
5122 SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
5123 CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5124 return true;
5125}
5126
5127bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5128 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5130 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5131 return false;
5132
5133 // If this is a 64-bit zero-extension mask, emit rldicl.
5134 unsigned MB = 64 - llvm::countr_one(Imm64);
5135 unsigned SH = 0;
5136 unsigned Imm;
5137 SDValue Val = N->getOperand(0);
5138 SDLoc dl(N);
5139
5140 if (Val.getOpcode() == ISD::ANY_EXTEND) {
5141 auto Op0 = Val.getOperand(0);
5142 if (Op0.getOpcode() == ISD::SRL &&
5143 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
5144
5145 auto ResultType = Val.getNode()->getValueType(0);
5146 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
5147 SDValue IDVal(ImDef, 0);
5148
5149 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
5150 IDVal, Op0.getOperand(0),
5151 getI32Imm(1, dl)),
5152 0);
5153 SH = 64 - Imm;
5154 }
5155 }
5156
5157 // If the operand is a logical right shift, we can fold it into this
5158 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5159 // for n <= mb. The right shift is really a left rotate followed by a
5160 // mask, and this mask is a more-restrictive sub-mask of the mask implied
5161 // by the shift.
5162 if (Val.getOpcode() == ISD::SRL &&
5163 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
5164 assert(Imm < 64 && "Illegal shift amount");
5165 Val = Val.getOperand(0);
5166 SH = 64 - Imm;
5167 }
5168
5169 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
5170 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5171 return true;
5172}
5173
5174bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5175 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5177 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5178 !isMask_64(~Imm64))
5179 return false;
5180
5181 // If this is a negated 64-bit zero-extension mask,
5182 // i.e. the immediate is a sequence of ones from most significant side
5183 // and all zero for reminder, we should use rldicr.
5184 unsigned MB = 63 - llvm::countr_one(~Imm64);
5185 unsigned SH = 0;
5186 SDLoc dl(N);
5187 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
5188 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5189 return true;
5190}
5191
5192bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5193 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5195 unsigned MB, ME;
5196 SDValue N0 = N->getOperand(0);
5197
5198 // We won't get fewer instructions if the imm is 32-bit integer.
5199 // rldimi requires the imm to have consecutive ones with both sides zero.
5200 // Also, make sure the first Op has only one use, otherwise this may increase
5201 // register pressure since rldimi is destructive.
5202 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5203 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
5204 return false;
5205
5206 unsigned SH = 63 - ME;
5207 SDLoc Dl(N);
5208 // Use select64Imm for making LI instr instead of directly putting Imm64
5209 SDValue Ops[] = {
5210 N->getOperand(0),
5211 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
5212 getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
5213 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
5214 return true;
5215}
5216
5217// Select - Convert the specified operand from a target-independent to a
5218// target-specific node if it hasn't already been changed.
5219void PPCDAGToDAGISel::Select(SDNode *N) {
5220 SDLoc dl(N);
5221 if (N->isMachineOpcode()) {
5222 N->setNodeId(-1);
5223 return; // Already selected.
5224 }
5225
5226 // In case any misguided DAG-level optimizations form an ADD with a
5227 // TargetConstant operand, crash here instead of miscompiling (by selecting
5228 // an r+r add instead of some kind of r+i add).
5229 if (N->getOpcode() == ISD::ADD &&
5230 N->getOperand(1).getOpcode() == ISD::TargetConstant)
5231 llvm_unreachable("Invalid ADD with TargetConstant operand");
5232
5233 // Try matching complex bit permutations before doing anything else.
5234 if (tryBitPermutation(N))
5235 return;
5236
5237 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5238 if (tryIntCompareInGPR(N))
5239 return;
5240
5241 switch (N->getOpcode()) {
5242 default: break;
5243
5244 case ISD::Constant:
5245 if (N->getValueType(0) == MVT::i64) {
5246 ReplaceNode(N, selectI64Imm(CurDAG, N));
5247 return;
5248 }
5249 break;
5250
5251 case ISD::INTRINSIC_VOID: {
5252 auto IntrinsicID = N->getConstantOperandVal(1);
5253 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5254 IntrinsicID != Intrinsic::ppc_trapd &&
5255 IntrinsicID != Intrinsic::ppc_trap)
5256 break;
5257 unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
5258 IntrinsicID == Intrinsic::ppc_trapd)
5259 ? PPC::TDI
5260 : PPC::TWI;
5261 SmallVector<SDValue, 4> OpsWithMD;
5262 unsigned MDIndex;
5263 if (IntrinsicID == Intrinsic::ppc_tdw ||
5264 IntrinsicID == Intrinsic::ppc_tw) {
5265 SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
5266 int16_t SImmOperand2;
5267 int16_t SImmOperand3;
5268 int16_t SImmOperand4;
5269 bool isOperand2IntS16Immediate =
5270 isIntS16Immediate(N->getOperand(2), SImmOperand2);
5271 bool isOperand3IntS16Immediate =
5272 isIntS16Immediate(N->getOperand(3), SImmOperand3);
5273 // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5274 // reg or imm + imm. The imm + imm form will be optimized to either an
5275 // unconditional trap or a nop in a later pass.
5276 if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5277 Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5278 else if (isOperand3IntS16Immediate)
5279 // The 2nd and 3rd operands are reg + imm.
5280 Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
5281 else {
5282 // The 2nd and 3rd operands are imm + reg.
5283 bool isOperand4IntS16Immediate =
5284 isIntS16Immediate(N->getOperand(4), SImmOperand4);
5285 (void)isOperand4IntS16Immediate;
5286 assert(isOperand4IntS16Immediate &&
5287 "The 4th operand is not an Immediate");
5288 // We need to flip the condition immediate TO.
5289 int16_t TO = int(SImmOperand4) & 0x1F;
5290 // We swap the first and second bit of TO if they are not same.
5291 if ((TO & 0x1) != ((TO & 0x2) >> 1))
5292 TO = (TO & 0x1) ? TO + 1 : TO - 1;
5293 // We swap the fourth and fifth bit of TO if they are not same.
5294 if ((TO & 0x8) != ((TO & 0x10) >> 1))
5295 TO = (TO & 0x8) ? TO + 8 : TO - 8;
5296 Ops[0] = getI32Imm(TO, dl);
5297 Ops[1] = N->getOperand(3);
5298 Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
5299 }
5300 OpsWithMD = {Ops[0], Ops[1], Ops[2]};
5301 MDIndex = 5;
5302 } else {
5303 OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};
5304 MDIndex = 3;
5305 }
5306
5307 if (N->getNumOperands() > MDIndex) {
5308 SDValue MDV = N->getOperand(MDIndex);
5309 const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
5310 assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
5311 assert((isa<MDString>(MD->getOperand(0)) &&
5312 cast<MDString>(MD->getOperand(0))->getString() ==
5313 "ppc-trap-reason") &&
5314 "Unsupported annotation data type!");
5315 for (unsigned i = 1; i < MD->getNumOperands(); i++) {
5316 assert(isa<MDString>(MD->getOperand(i)) &&
5317 "Invalid data type for annotation ppc-trap-reason!");
5318 OpsWithMD.push_back(
5319 getI32Imm(std::stoi(cast<MDString>(
5320 MD->getOperand(i))->getString().str()), dl));
5321 }
5322 }
5323 OpsWithMD.push_back(N->getOperand(0)); // chain
5324 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
5325 return;
5326 }
5327
5329 // We emit the PPC::FSELS instruction here because of type conflicts with
5330 // the comparison operand. The FSELS instruction is defined to use an 8-byte
5331 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5332 // value for the comparison. When selecting through a .td file, a type
5333 // error is raised. Must check this first so we never break on the
5334 // !Subtarget->isISA3_1() check.
5335 auto IntID = N->getConstantOperandVal(0);
5336 if (IntID == Intrinsic::ppc_fsels) {
5337 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
5338 CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5339 return;
5340 }
5341
5342 if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5343 auto Pred = N->getConstantOperandVal(1);
5344 unsigned Opcode =
5345 IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5346 unsigned SubReg = 0;
5347 unsigned ShiftVal = 0;
5348 bool Reverse = false;
5349 switch (Pred) {
5350 case 0:
5351 SubReg = PPC::sub_eq;
5352 ShiftVal = 1;
5353 break;
5354 case 1:
5355 SubReg = PPC::sub_eq;
5356 ShiftVal = 1;
5357 Reverse = true;
5358 break;
5359 case 2:
5360 SubReg = PPC::sub_lt;
5361 ShiftVal = 3;
5362 break;
5363 case 3:
5364 SubReg = PPC::sub_lt;
5365 ShiftVal = 3;
5366 Reverse = true;
5367 break;
5368 case 4:
5369 SubReg = PPC::sub_gt;
5370 ShiftVal = 2;
5371 break;
5372 case 5:
5373 SubReg = PPC::sub_gt;
5374 ShiftVal = 2;
5375 Reverse = true;
5376 break;
5377 case 6:
5378 SubReg = PPC::sub_un;
5379 break;
5380 case 7:
5381 SubReg = PPC::sub_un;
5382 Reverse = true;
5383 break;
5384 }
5385
5386 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5387 SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
5388 CurDAG->getTargetConstant(0, dl, MVT::i32)};
5389 SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
5390 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5391 // On Power10, we can use SETBC[R]. On prior architectures, we have to use
5392 // MFOCRF and shift/negate the value.
5393 if (Subtarget->isISA3_1()) {
5394 SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5395 SDValue CRBit = SDValue(
5396 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5397 CR6Reg, SubRegIdx, BCDOp.getValue(1)),
5398 0);
5399 CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5400 CRBit);
5401 } else {
5402 SDValue Move =
5403 SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5404 BCDOp.getValue(1)),
5405 0);
5406 SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
5407 getI32Imm(31, dl), getI32Imm(31, dl)};
5408 if (!Reverse)
5409 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5410 else {
5411 SDValue Shift = SDValue(
5412 CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
5413 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
5414 }
5415 }
5416 return;
5417 }
5418
5419 if (!Subtarget->isISA3_1())
5420 break;
5421 unsigned Opcode = 0;
5422 switch (IntID) {
5423 default:
5424 break;
5425 case Intrinsic::ppc_altivec_vstribr_p:
5426 Opcode = PPC::VSTRIBR_rec;
5427 break;
5428 case Intrinsic::ppc_altivec_vstribl_p:
5429 Opcode = PPC::VSTRIBL_rec;
5430 break;
5431 case Intrinsic::ppc_altivec_vstrihr_p:
5432 Opcode = PPC::VSTRIHR_rec;
5433 break;
5434 case Intrinsic::ppc_altivec_vstrihl_p:
5435 Opcode = PPC::VSTRIHL_rec;
5436 break;
5437 }
5438 if (!Opcode)
5439 break;
5440
5441 // Generate the appropriate vector string isolate intrinsic to match.
5442 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5443 SDValue VecStrOp =
5444 SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
5445 // Vector string isolate instructions update the EQ bit of CR6.
5446 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5447 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5448 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5449 SDValue CRBit = SDValue(
5450 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5451 CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
5452 0);
5453 CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5454 return;
5455 }
5456
5457 case ISD::SETCC:
5458 case ISD::STRICT_FSETCC:
5460 if (trySETCC(N))
5461 return;
5462 break;
5463 // These nodes will be transformed into GETtlsADDR32 node, which
5464 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5467 const Module *Mod = MF->getFunction().getParent();
5468 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5469 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5470 Mod->getPICLevel() == PICLevel::SmallPIC)
5471 break;
5472 // Attach global base pointer on GETtlsADDR32 node in order to
5473 // generate secure plt code for TLS symbols.
5474 getGlobalBaseReg();
5475 } break;
5476 case PPCISD::CALL: {
5477 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5478 !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
5479 !Subtarget->isTargetELF())
5480 break;
5481
5482 SDValue Op = N->getOperand(1);
5483
5484 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5485 if (GA->getTargetFlags() == PPCII::MO_PLT)
5486 getGlobalBaseReg();
5487 }
5488 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
5489 if (ES->getTargetFlags() == PPCII::MO_PLT)
5490 getGlobalBaseReg();
5491 }
5492 }
5493 break;
5494
5496 ReplaceNode(N, getGlobalBaseReg());
5497 return;
5498
5499 case ISD::FrameIndex:
5500 selectFrameIndex(N, N);
5501 return;
5502
5503 case PPCISD::MFOCRF: {
5504 SDValue InGlue = N->getOperand(1);
5505 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5506 N->getOperand(0), InGlue));
5507 return;
5508 }
5509
5511 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5512 MVT::Other, N->getOperand(0)));
5513 return;
5514
5515 case PPCISD::SRA_ADDZE: {
5516 SDValue N0 = N->getOperand(0);
5517 SDValue ShiftAmt =
5518 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
5519 getConstantIntValue(), dl,
5520 N->getValueType(0));
5521 if (N->getValueType(0) == MVT::i64) {
5522 SDNode *Op =
5523 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5524 N0, ShiftAmt);
5525 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
5526 SDValue(Op, 1));
5527 return;
5528 } else {
5529 assert(N->getValueType(0) == MVT::i32 &&
5530 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5531 SDNode *Op =
5532 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5533 N0, ShiftAmt);
5534 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
5535 SDValue(Op, 1));
5536 return;
5537 }
5538 }
5539
5540 case ISD::STORE: {
5541 // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5542 // X-form stores.
5543 StoreSDNode *ST = cast<StoreSDNode>(N);
5544 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
5545 ST->getAddressingMode() != ISD::PRE_INC)
5546 if (tryTLSXFormStore(ST))
5547 return;
5548 break;
5549 }
5550 case ISD::LOAD: {
5551 // Handle preincrement loads.
5552 LoadSDNode *LD = cast<LoadSDNode>(N);
5553 EVT LoadedVT = LD->getMemoryVT();
5554
5555 // Normal loads are handled by code generated from the .td file.
5556 if (LD->getAddressingMode() != ISD::PRE_INC) {
5557 // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5558 // X-form loads.
5559 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
5560 if (tryTLSXFormLoad(LD))
5561 return;
5562 break;
5563 }
5564
5565 SDValue Offset = LD->getOffset();
5566 if (Offset.getOpcode() == ISD::TargetConstant ||
5567 Offset.getOpcode() == ISD::TargetGlobalAddress) {
5568
5569 unsigned Opcode;
5570 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5571 if (LD->getValueType(0) != MVT::i64) {
5572 // Handle PPC32 integer and normal FP loads.
5573 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5574 switch (LoadedVT.getSimpleVT().SimpleTy) {
5575 default: llvm_unreachable("Invalid PPC load type!");
5576 case MVT::f64: Opcode = PPC::LFDU; break;
5577 case MVT::f32: Opcode = PPC::LFSU; break;
5578 case MVT::i32: Opcode = PPC::LWZU; break;
5579 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5580 case MVT::i1:
5581 case MVT::i8: Opcode = PPC::LBZU; break;
5582 }
5583 } else {
5584 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5585 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5586 switch (LoadedVT.getSimpleVT().SimpleTy) {
5587 default: llvm_unreachable("Invalid PPC load type!");
5588 case MVT::i64: Opcode = PPC::LDU; break;
5589 case MVT::i32: Opcode = PPC::LWZU8; break;
5590 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5591 case MVT::i1:
5592 case MVT::i8: Opcode = PPC::LBZU8; break;
5593 }
5594 }
5595
5596 SDValue Chain = LD->getChain();
5597 SDValue Base = LD->getBasePtr();
5598 SDValue Ops[] = { Offset, Base, Chain };
5599 SDNode *MN = CurDAG->getMachineNode(
5600 Opcode, dl, LD->getValueType(0),
5601 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5602 transferMemOperands(N, MN);
5603 ReplaceNode(N, MN);
5604 return;
5605 } else {
5606 unsigned Opcode;
5607 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5608 if (LD->getValueType(0) != MVT::i64) {
5609 // Handle PPC32 integer and normal FP loads.
5610 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5611 switch (LoadedVT.getSimpleVT().SimpleTy) {
5612 default: llvm_unreachable("Invalid PPC load type!");
5613 case MVT::f64: Opcode = PPC::LFDUX; break;
5614 case MVT::f32: Opcode = PPC::LFSUX; break;
5615 case MVT::i32: Opcode = PPC::LWZUX; break;
5616 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5617 case MVT::i1:
5618 case MVT::i8: Opcode = PPC::LBZUX; break;
5619 }
5620 } else {
5621 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5622 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
5623 "Invalid sext update load");
5624 switch (LoadedVT.getSimpleVT().SimpleTy) {
5625 default: llvm_unreachable("Invalid PPC load type!");
5626 case MVT::i64: Opcode = PPC::LDUX; break;
5627 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5628 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5629 case MVT::i1:
5630 case MVT::i8: Opcode = PPC::LBZUX8; break;
5631 }
5632 }
5633
5634 SDValue Chain = LD->getChain();
5635 SDValue Base = LD->getBasePtr();
5636 SDValue Ops[] = { Base, Offset, Chain };
5637 SDNode *MN = CurDAG->getMachineNode(
5638 Opcode, dl, LD->getValueType(0),
5639 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5640 transferMemOperands(N, MN);
5641 ReplaceNode(N, MN);
5642 return;
5643 }
5644 }
5645
5646 case ISD::AND:
5647 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5648 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5649 tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5650 tryAsPairOfRLDICL(N))
5651 return;
5652
5653 // Other cases are autogenerated.
5654 break;
5655 case ISD::OR: {
5656 if (N->getValueType(0) == MVT::i32)
5657 if (tryBitfieldInsert(N))
5658 return;
5659
5660 int16_t Imm;
5661 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5662 isIntS16Immediate(N->getOperand(1), Imm)) {
5663 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
5664
5665 // If this is equivalent to an add, then we can fold it with the
5666 // FrameIndex calculation.
5667 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5668 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5669 return;
5670 }
5671 }
5672
5673 // If this is 'or' against an imm with consecutive ones and both sides zero,
5674 // try to emit rldimi
5675 if (tryAsSingleRLDIMI(N))
5676 return;
5677
5678 // OR with a 32-bit immediate can be handled by ori + oris
5679 // without creating an immediate in a GPR.
5680 uint64_t Imm64 = 0;
5681 bool IsPPC64 = Subtarget->isPPC64();
5682 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5683 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5684 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5685 uint64_t ImmHi = Imm64 >> 16;
5686 uint64_t ImmLo = Imm64 & 0xFFFF;
5687 if (ImmHi != 0 && ImmLo != 0) {
5688 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5689 N->getOperand(0),
5690 getI16Imm(ImmLo, dl));
5691 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5692 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5693 return;
5694 }
5695 }
5696
5697 // Other cases are autogenerated.
5698 break;
5699 }
5700 case ISD::XOR: {
5701 // XOR with a 32-bit immediate can be handled by xori + xoris
5702 // without creating an immediate in a GPR.
5703 uint64_t Imm64 = 0;
5704 bool IsPPC64 = Subtarget->isPPC64();
5705 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5706 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5707 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5708 uint64_t ImmHi = Imm64 >> 16;
5709 uint64_t ImmLo = Imm64 & 0xFFFF;
5710 if (ImmHi != 0 && ImmLo != 0) {
5711 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5712 N->getOperand(0),
5713 getI16Imm(ImmLo, dl));
5714 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5715 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5716 return;
5717 }
5718 }
5719
5720 break;
5721 }
5722 case ISD::ADD: {
5723 int16_t Imm;
5724 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5725 isIntS16Immediate(N->getOperand(1), Imm)) {
5726 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5727 return;
5728 }
5729
5730 break;
5731 }
5732 case ISD::SHL: {
5733 unsigned Imm, SH, MB, ME;
5734 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5735 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5736 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5737 getI32Imm(SH, dl), getI32Imm(MB, dl),
5738 getI32Imm(ME, dl) };
5739 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5740 return;
5741 }
5742
5743 // Other cases are autogenerated.
5744 break;
5745 }
5746 case ISD::SRL: {
5747 unsigned Imm, SH, MB, ME;
5748 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5749 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5750 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5751 getI32Imm(SH, dl), getI32Imm(MB, dl),
5752 getI32Imm(ME, dl) };
5753 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5754 return;
5755 }
5756
5757 // Other cases are autogenerated.
5758 break;
5759 }
5760 case ISD::MUL: {
5761 SDValue Op1 = N->getOperand(1);
5762 if (Op1.getOpcode() != ISD::Constant ||
5763 (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5764 break;
5765
5766 // If the multiplier fits int16, we can handle it with mulli.
5767 int64_t Imm = Op1->getAsZExtVal();
5768 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
5769 if (isInt<16>(Imm) || !Shift)
5770 break;
5771
5772 // If the shifted value fits int16, we can do this transformation:
5773 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5774 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5775 uint64_t ImmSh = Imm >> Shift;
5776 if (!isInt<16>(ImmSh))
5777 break;
5778
5779 uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
5780 if (Op1.getValueType() == MVT::i64) {
5781 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5782 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5783 N->getOperand(0), SDImm);
5784
5785 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5786 getI32Imm(63 - Shift, dl)};
5787 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5788 return;
5789 } else {
5790 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
5791 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
5792 N->getOperand(0), SDImm);
5793
5794 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5795 getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
5796 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5797 return;
5798 }
5799 break;
5800 }
5801 // FIXME: Remove this once the ANDI glue bug is fixed:
5804 if (!ANDIGlueBug)
5805 break;
5806
5807 EVT InVT = N->getOperand(0).getValueType();
5808 assert((InVT == MVT::i64 || InVT == MVT::i32) &&
5809 "Invalid input type for ANDI_rec_1_EQ_BIT");
5810
5811 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5812 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5813 N->getOperand(0),
5814 CurDAG->getTargetConstant(1, dl, InVT)),
5815 0);
5816 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5817 SDValue SRIdxVal = CurDAG->getTargetConstant(
5818 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5819 dl, MVT::i32);
5820
5821 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5822 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
5823 return;
5824 }
5825 case ISD::SELECT_CC: {
5826 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5827 EVT PtrVT =
5829 bool isPPC64 = (PtrVT == MVT::i64);
5830
5831 // If this is a select of i1 operands, we'll pattern match it.
5832 if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
5833 break;
5834
5835 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5836 bool NeedSwapOps = false;
5837 bool IsUnCmp = false;
5838 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
5839 SDValue LHS = N->getOperand(0);
5840 SDValue RHS = N->getOperand(1);
5841 if (NeedSwapOps)
5842 std::swap(LHS, RHS);
5843
5844 // Make use of SelectCC to generate the comparison to set CR bits, for
5845 // equality comparisons having one literal operand, SelectCC probably
5846 // doesn't need to materialize the whole literal and just use xoris to
5847 // check it first, it leads the following comparison result can't
5848 // exactly represent GT/LT relationship. So to avoid this we specify
5849 // SETGT/SETUGT here instead of SETEQ.
5850 SDValue GenCC =
5851 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5852 CurDAG->SelectNodeTo(
5853 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5854 N->getValueType(0), GenCC);
5855 NumP9Setb++;
5856 return;
5857 }
5858 }
5859
5860 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5861 if (!isPPC64 && isNullConstant(N->getOperand(1)) &&
5862 isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&
5863 CC == ISD::SETNE &&
5864 // FIXME: Implement this optzn for PPC64.
5865 N->getValueType(0) == MVT::i32) {
5866 SDNode *Tmp =
5867 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5868 N->getOperand(0), getI32Imm(~0U, dl));
5869 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
5870 N->getOperand(0), SDValue(Tmp, 1));
5871 return;
5872 }
5873
5874 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
5875
5876 if (N->getValueType(0) == MVT::i1) {
5877 // An i1 select is: (c & t) | (!c & f).
5878 bool Inv;
5879 unsigned Idx = getCRIdxForSetCC(CC, Inv);
5880
5881 unsigned SRI;
5882 switch (Idx) {
5883 default: llvm_unreachable("Invalid CC index");
5884 case 0: SRI = PPC::sub_lt; break;
5885 case 1: SRI = PPC::sub_gt; break;
5886 case 2: SRI = PPC::sub_eq; break;
5887 case 3: SRI = PPC::sub_un; break;
5888 }
5889
5890 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5891
5892 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5893 CCBit, CCBit), 0);
5894 SDValue C = Inv ? NotCCBit : CCBit,
5895 NotC = Inv ? CCBit : NotCCBit;
5896
5897 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5898 C, N->getOperand(2)), 0);
5899 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5900 NotC, N->getOperand(3)), 0);
5901
5902 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5903 return;
5904 }
5905
5906 unsigned BROpc =
5907 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
5908
5909 unsigned SelectCCOp;
5910 if (N->getValueType(0) == MVT::i32)
5911 SelectCCOp = PPC::SELECT_CC_I4;
5912 else if (N->getValueType(0) == MVT::i64)
5913 SelectCCOp = PPC::SELECT_CC_I8;
5914 else if (N->getValueType(0) == MVT::f32) {
5915 if (Subtarget->hasP8Vector())
5916 SelectCCOp = PPC::SELECT_CC_VSSRC;
5917 else if (Subtarget->hasSPE())
5918 SelectCCOp = PPC::SELECT_CC_SPE4;
5919 else
5920 SelectCCOp = PPC::SELECT_CC_F4;
5921 } else if (N->getValueType(0) == MVT::f64) {
5922 if (Subtarget->hasVSX())
5923 SelectCCOp = PPC::SELECT_CC_VSFRC;
5924 else if (Subtarget->hasSPE())
5925 SelectCCOp = PPC::SELECT_CC_SPE;
5926 else
5927 SelectCCOp = PPC::SELECT_CC_F8;
5928 } else if (N->getValueType(0) == MVT::f128)
5929 SelectCCOp = PPC::SELECT_CC_F16;
5930 else if (Subtarget->hasSPE())
5931 SelectCCOp = PPC::SELECT_CC_SPE;
5932 else if (N->getValueType(0) == MVT::v2f64 ||
5933 N->getValueType(0) == MVT::v2i64)
5934 SelectCCOp = PPC::SELECT_CC_VSRC;
5935 else
5936 SelectCCOp = PPC::SELECT_CC_VRRC;
5937
5938 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
5939 getI32Imm(BROpc, dl) };
5940 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
5941 return;
5942 }
5944 if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
5945 N->getValueType(0) == MVT::v2i64)) {
5946 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5947
5948 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
5949 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
5950 unsigned DM[2];
5951
5952 for (int i = 0; i < 2; ++i)
5953 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5954 DM[i] = 0;
5955 else
5956 DM[i] = 1;
5957
5958 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5960 isa<LoadSDNode>(Op1.getOperand(0))) {
5961 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5963
5964 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5965 (LD->getMemoryVT() == MVT::f64 ||
5966 LD->getMemoryVT() == MVT::i64) &&
5967 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5968 SDValue Chain = LD->getChain();
5969 SDValue Ops[] = { Base, Offset, Chain };
5970 MachineMemOperand *MemOp = LD->getMemOperand();
5971 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
5972 N->getValueType(0), Ops);
5973 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
5974 return;
5975 }
5976 }
5977
5978 // For little endian, we must swap the input operands and adjust
5979 // the mask elements (reverse and invert them).
5980 if (Subtarget->isLittleEndian()) {
5981 std::swap(Op1, Op2);
5982 unsigned tmp = DM[0];
5983 DM[0] = 1 - DM[1];
5984 DM[1] = 1 - tmp;
5985 }
5986
5987 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
5988 MVT::i32);
5989 SDValue Ops[] = { Op1, Op2, DMV };
5990 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
5991 return;
5992 }
5993
5994 break;
5995 case PPCISD::BDNZ:
5996 case PPCISD::BDZ: {
5997 bool IsPPC64 = Subtarget->isPPC64();
5998 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
5999 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
6000 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
6001 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
6002 MVT::Other, Ops);
6003 return;
6004 }
6005 case PPCISD::COND_BRANCH: {
6006 // Op #0 is the Chain.
6007 // Op #1 is the PPC::PRED_* number.
6008 // Op #2 is the CR#
6009 // Op #3 is the Dest MBB
6010 // Op #4 is the Flag.
6011 // Prevent PPC::PRED_* from being selected into LI.
6012 unsigned PCC = N->getConstantOperandVal(1);
6013 if (EnableBranchHint)
6014 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
6015
6016 SDValue Pred = getI32Imm(PCC, dl);
6017 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
6018 N->getOperand(0), N->getOperand(4) };
6019 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6020 return;
6021 }
6022 case ISD::BR_CC: {
6023 if (tryFoldSWTestBRCC(N))
6024 return;
6025 if (trySelectLoopCountIntrinsic(N))
6026 return;
6027 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
6028 unsigned PCC =
6029 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
6030
6031 if (N->getOperand(2).getValueType() == MVT::i1) {
6032 unsigned Opc;
6033 bool Swap;
6034 switch (PCC) {
6035 default: llvm_unreachable("Unexpected Boolean-operand predicate");
6036 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6037 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6038 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6039 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6040 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6041 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6042 }
6043
6044 // A signed comparison of i1 values produces the opposite result to an
6045 // unsigned one if the condition code includes less-than or greater-than.
6046 // This is because 1 is the most negative signed i1 number and the most
6047 // positive unsigned i1 number. The CR-logical operations used for such
6048 // comparisons are non-commutative so for signed comparisons vs. unsigned
6049 // ones, the input operands just need to be swapped.
6051 Swap = !Swap;
6052
6053 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
6054 N->getOperand(Swap ? 3 : 2),
6055 N->getOperand(Swap ? 2 : 3)), 0);
6056 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
6057 N->getOperand(0));
6058 return;
6059 }
6060
6061 if (EnableBranchHint)
6062 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
6063
6064 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
6065 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
6066 N->getOperand(4), N->getOperand(0) };
6067 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6068 return;
6069 }
6070 case ISD::BRIND: {
6071 // FIXME: Should custom lower this.
6072 SDValue Chain = N->getOperand(0);
6073 SDValue Target = N->getOperand(1);
6074 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6075 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6076 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
6077 Chain), 0);
6078 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
6079 return;
6080 }
6081 case PPCISD::TOC_ENTRY: {
6082 const bool isPPC64 = Subtarget->isPPC64();
6083 const bool isELFABI = Subtarget->isSVR4ABI();
6084 const bool isAIXABI = Subtarget->isAIXABI();
6085
6086 // PowerPC only support small, medium and large code model.
6087 const CodeModel::Model CModel = getCodeModel(*Subtarget, TM, N);
6088
6089 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
6090 "PowerPC doesn't support tiny or kernel code models.");
6091
6092 if (isAIXABI && CModel == CodeModel::Medium)
6093 report_fatal_error("Medium code model is not supported on AIX.");
6094
6095 // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6096 // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6097 // small code model, we need to check for a toc-data attribute.
6098 if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6099 break;
6100
6101 auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6102 EVT OperandTy) {
6103 SDValue GA = TocEntry->getOperand(0);
6104 SDValue TocBase = TocEntry->getOperand(1);
6105 SDNode *MN = nullptr;
6106 if (OpCode == PPC::ADDItoc || OpCode == PPC::ADDItoc8)
6107 // toc-data access doesn't involve in loading from got, no need to
6108 // keep memory operands.
6109 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, TocBase, GA);
6110 else {
6111 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
6112 transferMemOperands(TocEntry, MN);
6113 }
6114 ReplaceNode(TocEntry, MN);
6115 };
6116
6117 // Handle 32-bit small code model.
6118 if (!isPPC64 && CModel == CodeModel::Small) {
6119 // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6120 // PPC::ADDItoc, or PPC::LWZtoc
6121 if (isELFABI) {
6122 assert(TM.isPositionIndependent() &&
6123 "32-bit ELF can only have TOC entries in position independent"
6124 " code.");
6125 // 32-bit ELF always uses a small code model toc access.
6126 replaceWith(PPC::LWZtoc, N, MVT::i32);
6127 return;
6128 }
6129
6130 assert(isAIXABI && "ELF ABI already handled");
6131
6132 if (hasTocDataAttr(N->getOperand(0))) {
6133 replaceWith(PPC::ADDItoc, N, MVT::i32);
6134 return;
6135 }
6136
6137 replaceWith(PPC::LWZtoc, N, MVT::i32);
6138 return;
6139 }
6140
6141 if (isPPC64 && CModel == CodeModel::Small) {
6142 assert(isAIXABI && "ELF ABI handled in common SelectCode");
6143
6144 if (hasTocDataAttr(N->getOperand(0))) {
6145 replaceWith(PPC::ADDItoc8, N, MVT::i64);
6146 return;
6147 }
6148 // Break if it doesn't have toc data attribute. Proceed with common
6149 // SelectCode.
6150 break;
6151 }
6152
6153 assert(CModel != CodeModel::Small && "All small code models handled.");
6154
6155 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6156 " ELF/AIX or 32-bit AIX in the following.");
6157
6158 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode,
6159 // 64-bit medium (ELF-only), or 64-bit large (ELF and AIX) code model code
6160 // that does not contain TOC data symbols. We generate two instructions as
6161 // described below. The first source operand is a symbol reference. If it
6162 // must be referenced via the TOC according to Subtarget, we generate:
6163 // [32-bit AIX]
6164 // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6165 // [64-bit ELF/AIX]
6166 // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6167 // Otherwise for medium code model ELF we generate:
6168 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6169
6170 // And finally for AIX with toc-data we generate:
6171 // [32-bit AIX]
6172 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
6173 // [64-bit AIX]
6174 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6175
6176 SDValue GA = N->getOperand(0);
6177 SDValue TOCbase = N->getOperand(1);
6178
6179 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
6180 SDNode *Tmp = CurDAG->getMachineNode(
6181 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
6182
6183 // On AIX, if the symbol has the toc-data attribute it will be defined
6184 // in the TOC entry, so we use an ADDItocL/ADDItocL8.
6185 if (isAIXABI && hasTocDataAttr(GA)) {
6186 ReplaceNode(
6187 N, CurDAG->getMachineNode(isPPC64 ? PPC::ADDItocL8 : PPC::ADDItocL,
6188 dl, VT, SDValue(Tmp, 0), GA));
6189 return;
6190 }
6191
6192 if (PPCLowering->isAccessedAsGotIndirect(GA)) {
6193 // If it is accessed as got-indirect, we need an extra LWZ/LD to load
6194 // the address.
6195 SDNode *MN = CurDAG->getMachineNode(
6196 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
6197
6198 transferMemOperands(N, MN);
6199 ReplaceNode(N, MN);
6200 return;
6201 }
6202
6203 assert(isPPC64 && "TOC_ENTRY already handled for 32-bit.");
6204 // Build the address relative to the TOC-pointer.
6205 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
6206 SDValue(Tmp, 0), GA));
6207 return;
6208 }
6210 // Generate a PIC-safe GOT reference.
6211 assert(Subtarget->is32BitELFABI() &&
6212 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6213 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
6214 PPCLowering->getPointerTy(CurDAG->getDataLayout()),
6215 MVT::i32);
6216 return;
6217
6218 case PPCISD::VADD_SPLAT: {
6219 // This expands into one of three sequences, depending on whether
6220 // the first operand is odd or even, positive or negative.
6221 assert(isa<ConstantSDNode>(N->getOperand(0)) &&
6222 isa<ConstantSDNode>(N->getOperand(1)) &&
6223 "Invalid operand on VADD_SPLAT!");
6224
6225 int Elt = N->getConstantOperandVal(0);
6226 int EltSize = N->getConstantOperandVal(1);
6227 unsigned Opc1, Opc2, Opc3;
6228 EVT VT;
6229
6230 if (EltSize == 1) {
6231 Opc1 = PPC::VSPLTISB;
6232 Opc2 = PPC::VADDUBM;
6233 Opc3 = PPC::VSUBUBM;
6234 VT = MVT::v16i8;
6235 } else if (EltSize == 2) {
6236 Opc1 = PPC::VSPLTISH;
6237 Opc2 = PPC::VADDUHM;
6238 Opc3 = PPC::VSUBUHM;
6239 VT = MVT::v8i16;
6240 } else {
6241 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
6242 Opc1 = PPC::VSPLTISW;
6243 Opc2 = PPC::VADDUWM;
6244 Opc3 = PPC::VSUBUWM;
6245 VT = MVT::v4i32;
6246 }
6247
6248 if ((Elt & 1) == 0) {
6249 // Elt is even, in the range [-32,-18] + [16,30].
6250 //
6251 // Convert: VADD_SPLAT elt, size
6252 // Into: tmp = VSPLTIS[BHW] elt
6253 // VADDU[BHW]M tmp, tmp
6254 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6255 SDValue EltVal = getI32Imm(Elt >> 1, dl);
6256 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6257 SDValue TmpVal = SDValue(Tmp, 0);
6258 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
6259 return;
6260 } else if (Elt > 0) {
6261 // Elt is odd and positive, in the range [17,31].
6262 //
6263 // Convert: VADD_SPLAT elt, size
6264 // Into: tmp1 = VSPLTIS[BHW] elt-16
6265 // tmp2 = VSPLTIS[BHW] -16
6266 // VSUBU[BHW]M tmp1, tmp2
6267 SDValue EltVal = getI32Imm(Elt - 16, dl);
6268 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6269 EltVal = getI32Imm(-16, dl);
6270 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6271 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
6272 SDValue(Tmp2, 0)));
6273 return;
6274 } else {
6275 // Elt is odd and negative, in the range [-31,-17].
6276 //
6277 // Convert: VADD_SPLAT elt, size
6278 // Into: tmp1 = VSPLTIS[BHW] elt+16
6279 // tmp2 = VSPLTIS[BHW] -16
6280 // VADDU[BHW]M tmp1, tmp2
6281 SDValue EltVal = getI32Imm(Elt + 16, dl);
6282 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6283 EltVal = getI32Imm(-16, dl);
6284 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6285 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
6286 SDValue(Tmp2, 0)));
6287 return;
6288 }
6289 }
6290 case PPCISD::LD_SPLAT: {
6291 // Here we want to handle splat load for type v16i8 and v8i16 when there is
6292 // no direct move, we don't need to use stack for this case. If target has
6293 // direct move, we should be able to get the best selection in the .td file.
6294 if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
6295 break;
6296
6297 EVT Type = N->getValueType(0);
6298 if (Type != MVT::v16i8 && Type != MVT::v8i16)
6299 break;
6300
6301 // If the alignment for the load is 16 or bigger, we don't need the
6302 // permutated mask to get the required value. The value must be the 0
6303 // element in big endian target or 7/15 in little endian target in the
6304 // result vsx register of lvx instruction.
6305 // Select the instruction in the .td file.
6306 if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
6307 isOffsetMultipleOf(N, 16))
6308 break;
6309
6310 SDValue ZeroReg =
6311 CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6312 Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
6313 unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6314 // v16i8 LD_SPLAT addr
6315 // ======>
6316 // Mask = LVSR/LVSL 0, addr
6317 // LoadLow = LVX 0, addr
6318 // Perm = VPERM LoadLow, LoadLow, Mask
6319 // Splat = VSPLTB 15/0, Perm
6320 //
6321 // v8i16 LD_SPLAT addr
6322 // ======>
6323 // Mask = LVSR/LVSL 0, addr
6324 // LoadLow = LVX 0, addr
6325 // LoadHigh = LVX (LI, 1), addr
6326 // Perm = VPERM LoadLow, LoadHigh, Mask
6327 // Splat = VSPLTH 7/0, Perm
6328 unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6329 unsigned SplatElemIndex =
6330 Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
6331
6332 SDNode *Mask = CurDAG->getMachineNode(
6333 Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
6334 N->getOperand(1));
6335
6336 SDNode *LoadLow =
6337 CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
6338 {ZeroReg, N->getOperand(1), N->getOperand(0)});
6339
6340 SDNode *LoadHigh = LoadLow;
6341 if (Type == MVT::v8i16) {
6342 LoadHigh = CurDAG->getMachineNode(
6343 PPC::LVX, dl, MVT::v16i8, MVT::Other,
6344 {SDValue(CurDAG->getMachineNode(
6345 LIOpcode, dl, MVT::i32,
6346 CurDAG->getTargetConstant(1, dl, MVT::i8)),
6347 0),
6348 N->getOperand(1), SDValue(LoadLow, 1)});
6349 }
6350
6351 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
6352 transferMemOperands(N, LoadHigh);
6353
6354 SDNode *Perm =
6355 CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
6356 SDValue(LoadHigh, 0), SDValue(Mask, 0));
6357 CurDAG->SelectNodeTo(N, SplatOp, Type,
6358 CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
6359 SDValue(Perm, 0));
6360 return;
6361 }
6362 }
6363
6364 SelectCode(N);
6365}
6366
6367// If the target supports the cmpb instruction, do the idiom recognition here.
6368// We don't do this as a DAG combine because we don't want to do it as nodes
6369// are being combined (because we might miss part of the eventual idiom). We
6370// don't want to do it during instruction selection because we want to reuse
6371// the logic for lowering the masking operations already part of the
6372// instruction selector.
6373SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6374 SDLoc dl(N);
6375
6376 assert(N->getOpcode() == ISD::OR &&
6377 "Only OR nodes are supported for CMPB");
6378
6379 SDValue Res;
6380 if (!Subtarget->hasCMPB())
6381 return Res;
6382
6383 if (N->getValueType(0) != MVT::i32 &&
6384 N->getValueType(0) != MVT::i64)
6385 return Res;
6386
6387 EVT VT = N->getValueType(0);
6388
6389 SDValue RHS, LHS;
6390 bool BytesFound[8] = {false, false, false, false, false, false, false, false};
6391 uint64_t Mask = 0, Alt = 0;
6392
6393 auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6394 uint64_t &Mask, uint64_t &Alt,
6395 SDValue &LHS, SDValue &RHS) {
6396 if (O.getOpcode() != ISD::SELECT_CC)
6397 return false;
6398 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
6399
6400 if (!isa<ConstantSDNode>(O.getOperand(2)) ||
6401 !isa<ConstantSDNode>(O.getOperand(3)))
6402 return false;
6403
6404 uint64_t PM = O.getConstantOperandVal(2);
6405 uint64_t PAlt = O.getConstantOperandVal(3);
6406 for (b = 0; b < 8; ++b) {
6407 uint64_t Mask = UINT64_C(0xFF) << (8*b);
6408 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6409 break;
6410 }
6411
6412 if (b == 8)
6413 return false;
6414 Mask |= PM;
6415 Alt |= PAlt;
6416
6417 if (!isa<ConstantSDNode>(O.getOperand(1)) ||
6418 O.getConstantOperandVal(1) != 0) {
6419 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
6420 if (Op0.getOpcode() == ISD::TRUNCATE)
6421 Op0 = Op0.getOperand(0);
6422 if (Op1.getOpcode() == ISD::TRUNCATE)
6423 Op1 = Op1.getOperand(0);
6424
6425 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6426 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
6427 isa<ConstantSDNode>(Op0.getOperand(1))) {
6428
6429 unsigned Bits = Op0.getValueSizeInBits();
6430 if (b != Bits/8-1)
6431 return false;
6432 if (Op0.getConstantOperandVal(1) != Bits-8)
6433 return false;
6434
6435 LHS = Op0.getOperand(0);
6436 RHS = Op1.getOperand(0);
6437 return true;
6438 }
6439
6440 // When we have small integers (i16 to be specific), the form present
6441 // post-legalization uses SETULT in the SELECT_CC for the
6442 // higher-order byte, depending on the fact that the
6443 // even-higher-order bytes are known to all be zero, for example:
6444 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6445 // (so when the second byte is the same, because all higher-order
6446 // bits from bytes 3 and 4 are known to be zero, the result of the
6447 // xor can be at most 255)
6448 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6449 isa<ConstantSDNode>(O.getOperand(1))) {
6450
6451 uint64_t ULim = O.getConstantOperandVal(1);
6452 if (ULim != (UINT64_C(1) << b*8))
6453 return false;
6454
6455 // Now we need to make sure that the upper bytes are known to be
6456 // zero.
6457 unsigned Bits = Op0.getValueSizeInBits();
6458 if (!CurDAG->MaskedValueIsZero(
6459 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
6460 return false;
6461
6462 LHS = Op0.getOperand(0);
6463 RHS = Op0.getOperand(1);
6464 return true;
6465 }
6466
6467 return false;
6468 }
6469
6470 if (CC != ISD::SETEQ)
6471 return false;
6472
6473 SDValue Op = O.getOperand(0);
6474 if (Op.getOpcode() == ISD::AND) {
6475 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6476 return false;
6477 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
6478 return false;
6479
6480 SDValue XOR = Op.getOperand(0);
6481 if (XOR.getOpcode() == ISD::TRUNCATE)
6482 XOR = XOR.getOperand(0);
6483 if (XOR.getOpcode() != ISD::XOR)
6484 return false;
6485
6486 LHS = XOR.getOperand(0);
6487 RHS = XOR.getOperand(1);
6488 return true;
6489 } else if (Op.getOpcode() == ISD::SRL) {
6490 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6491 return false;
6492 unsigned Bits = Op.getValueSizeInBits();
6493 if (b != Bits/8-1)
6494 return false;
6495 if (Op.getConstantOperandVal(1) != Bits-8)
6496 return false;
6497
6498 SDValue XOR = Op.getOperand(0);
6499 if (XOR.getOpcode() == ISD::TRUNCATE)
6500 XOR = XOR.getOperand(0);
6501 if (XOR.getOpcode() != ISD::XOR)
6502 return false;
6503
6504 LHS = XOR.getOperand(0);
6505 RHS = XOR.getOperand(1);
6506 return true;
6507 }
6508
6509 return false;
6510 };
6511
6513 while (!Queue.empty()) {
6514 SDValue V = Queue.pop_back_val();
6515
6516 for (const SDValue &O : V.getNode()->ops()) {
6517 unsigned b = 0;
6518 uint64_t M = 0, A = 0;
6519 SDValue OLHS, ORHS;
6520 if (O.getOpcode() == ISD::OR) {
6521 Queue.push_back(O);
6522 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
6523 if (!LHS) {
6524 LHS = OLHS;
6525 RHS = ORHS;
6526 BytesFound[b] = true;
6527 Mask |= M;
6528 Alt |= A;
6529 } else if ((LHS == ORHS && RHS == OLHS) ||
6530 (RHS == ORHS && LHS == OLHS)) {
6531 BytesFound[b] = true;
6532 Mask |= M;
6533 Alt |= A;
6534 } else {
6535 return Res;
6536 }
6537 } else {
6538 return Res;
6539 }
6540 }
6541 }
6542
6543 unsigned LastB = 0, BCnt = 0;
6544 for (unsigned i = 0; i < 8; ++i)
6545 if (BytesFound[LastB]) {
6546 ++BCnt;
6547 LastB = i;
6548 }
6549
6550 if (!LastB || BCnt < 2)
6551 return Res;
6552
6553 // Because we'll be zero-extending the output anyway if don't have a specific
6554 // value for each input byte (via the Mask), we can 'anyext' the inputs.
6555 if (LHS.getValueType() != VT) {
6556 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
6557 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
6558 }
6559
6560 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
6561
6562 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
6563 if (NonTrivialMask && !Alt) {
6564 // Res = Mask & CMPB
6565 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6566 CurDAG->getConstant(Mask, dl, VT));
6567 } else if (Alt) {
6568 // Res = (CMPB & Mask) | (~CMPB & Alt)
6569 // Which, as suggested here:
6570 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6571 // can be written as:
6572 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
6573 // useful because the (Alt ^ Mask) can be pre-computed.
6574 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6575 CurDAG->getConstant(Mask ^ Alt, dl, VT));
6576 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
6577 CurDAG->getConstant(Alt, dl, VT));
6578 }
6579
6580 return Res;
6581}
6582
6583// When CR bit registers are enabled, an extension of an i1 variable to a i32
6584// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6585// involves constant materialization of a 0 or a 1 or both. If the result of
6586// the extension is then operated upon by some operator that can be constant
6587// folded with a constant 0 or 1, and that constant can be materialized using
6588// only one instruction (like a zero or one), then we should fold in those
6589// operations with the select.
6590void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6591 if (!Subtarget->useCRBits())
6592 return;
6593
6594 if (N->getOpcode() != ISD::ZERO_EXTEND &&
6595 N->getOpcode() != ISD::SIGN_EXTEND &&
6596 N->getOpcode() != ISD::ANY_EXTEND)
6597 return;
6598
6599 if (N->getOperand(0).getValueType() != MVT::i1)
6600 return;
6601
6602 if (!N->hasOneUse())
6603 return;
6604
6605 SDLoc dl(N);
6606 EVT VT = N->getValueType(0);
6607 SDValue Cond = N->getOperand(0);
6608 SDValue ConstTrue =
6609 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
6610 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
6611
6612 do {
6613 SDNode *User = *N->use_begin();
6614 if (User->getNumOperands() != 2)
6615 break;
6616
6617 auto TryFold = [this, N, User, dl](SDValue Val) {
6618 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
6619 SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6620 SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6621
6622 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
6623 User->getValueType(0), {O0, O1});
6624 };
6625
6626 // FIXME: When the semantics of the interaction between select and undef
6627 // are clearly defined, it may turn out to be unnecessary to break here.
6628 SDValue TrueRes = TryFold(ConstTrue);
6629 if (!TrueRes || TrueRes.isUndef())
6630 break;
6631 SDValue FalseRes = TryFold(ConstFalse);
6632 if (!FalseRes || FalseRes.isUndef())
6633 break;
6634
6635 // For us to materialize these using one instruction, we must be able to
6636 // represent them as signed 16-bit integers.
6637 uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal();
6638 if (!isInt<16>(True) || !isInt<16>(False))
6639 break;
6640
6641 // We can replace User with a new SELECT node, and try again to see if we
6642 // can fold the select with its user.
6643 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
6644 N = User;
6645 ConstTrue = TrueRes;
6646 ConstFalse = FalseRes;
6647 } while (N->hasOneUse());
6648}
6649
6650void PPCDAGToDAGISel::PreprocessISelDAG() {
6651 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6652
6653 bool MadeChange = false;
6654 while (Position != CurDAG->allnodes_begin()) {
6655 SDNode *N = &*--Position;
6656 if (N->use_empty())
6657 continue;
6658
6659 SDValue Res;
6660 switch (N->getOpcode()) {
6661 default: break;
6662 case ISD::OR:
6663 Res = combineToCMPB(N);
6664 break;
6665 }
6666
6667 if (!Res)
6668 foldBoolExts(Res, N);
6669
6670 if (Res) {
6671 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6672 LLVM_DEBUG(N->dump(CurDAG));
6673 LLVM_DEBUG(dbgs() << "\nNew: ");
6674 LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6675 LLVM_DEBUG(dbgs() << "\n");
6676
6677 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
6678 MadeChange = true;
6679 }
6680 }
6681
6682 if (MadeChange)
6683 CurDAG->RemoveDeadNodes();
6684}
6685
6686/// PostprocessISelDAG - Perform some late peephole optimizations
6687/// on the DAG representation.
6688void PPCDAGToDAGISel::PostprocessISelDAG() {
6689 // Skip peepholes at -O0.
6690 if (TM.getOptLevel() == CodeGenOptLevel::None)
6691 return;
6692
6693 PeepholePPC64();
6694 PeepholeCROps();
6695 PeepholePPC64ZExt();
6696}
6697
6698// Check if all users of this node will become isel where the second operand
6699// is the constant zero. If this is so, and if we can negate the condition,
6700// then we can flip the true and false operands. This will allow the zero to
6701// be folded with the isel so that we don't need to materialize a register
6702// containing zero.
6703bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6704 for (const SDNode *User : N->uses()) {
6705 if (!User->isMachineOpcode())
6706 return false;
6707 if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6708 User->getMachineOpcode() != PPC::SELECT_I8)
6709 return false;
6710
6711 SDNode *Op1 = User->getOperand(1).getNode();
6712 SDNode *Op2 = User->getOperand(2).getNode();
6713 // If we have a degenerate select with two equal operands, swapping will
6714 // not do anything, and we may run into an infinite loop.
6715 if (Op1 == Op2)
6716 return false;
6717
6718 if (!Op2->isMachineOpcode())
6719 return false;
6720
6721 if (Op2->getMachineOpcode() != PPC::LI &&
6722 Op2->getMachineOpcode() != PPC::LI8)
6723 return false;
6724
6725 if (!isNullConstant(Op2->getOperand(0)))
6726 return false;
6727 }
6728
6729 return true;
6730}
6731
6732void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6733 SmallVector<SDNode *, 4> ToReplace;
6734 for (SDNode *User : N->uses()) {
6735 assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
6736 User->getMachineOpcode() == PPC::SELECT_I8) &&
6737 "Must have all select users");
6738 ToReplace.push_back(User);
6739 }
6740
6741 for (SDNode *User : ToReplace) {
6742 SDNode *ResNode =
6743 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
6744 User->getValueType(0), User->getOperand(0),
6745 User->getOperand(2),
6746 User->getOperand(1));
6747
6748 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6749 LLVM_DEBUG(User->dump(CurDAG));
6750 LLVM_DEBUG(dbgs() << "\nNew: ");
6751 LLVM_DEBUG(ResNode->dump(CurDAG));
6752 LLVM_DEBUG(dbgs() << "\n");
6753
6754 ReplaceUses(User, ResNode);
6755 }
6756}
6757
6758void PPCDAGToDAGISel::PeepholeCROps() {
6759 bool IsModified;
6760 do {
6761 IsModified = false;
6762 for (SDNode &Node : CurDAG->allnodes()) {
6763 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
6764 if (!MachineNode || MachineNode->use_empty())
6765 continue;
6766 SDNode *ResNode = MachineNode;
6767
6768 bool Op1Set = false, Op1Unset = false,
6769 Op1Not = false,
6770 Op2Set = false, Op2Unset = false,
6771 Op2Not = false;
6772
6773 unsigned Opcode = MachineNode->getMachineOpcode();
6774 switch (Opcode) {
6775 default: break;
6776 case PPC::CRAND:
6777 case PPC::CRNAND:
6778 case PPC::CROR:
6779 case PPC::CRXOR:
6780 case PPC::CRNOR:
6781 case PPC::CREQV:
6782 case PPC::CRANDC:
6783 case PPC::CRORC: {
6784 SDValue Op = MachineNode->getOperand(1);
6785 if (Op.isMachineOpcode()) {
6786 if (Op.getMachineOpcode() == PPC::CRSET)
6787 Op2Set = true;
6788 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6789 Op2Unset = true;
6790 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6791 Op.getOperand(0) == Op.getOperand(1)) ||
6792 Op.getMachineOpcode() == PPC::CRNOT)
6793 Op2Not = true;
6794 }
6795 [[fallthrough]];
6796 }
6797 case PPC::BC:
6798 case PPC::BCn:
6799 case PPC::SELECT_I4:
6800 case PPC::SELECT_I8:
6801 case PPC::SELECT_F4:
6802 case PPC::SELECT_F8:
6803 case PPC::SELECT_SPE:
6804 case PPC::SELECT_SPE4:
6805 case PPC::SELECT_VRRC:
6806 case PPC::SELECT_VSFRC:
6807 case PPC::SELECT_VSSRC:
6808 case PPC::SELECT_VSRC: {
6809 SDValue Op = MachineNode->getOperand(0);
6810 if (Op.isMachineOpcode()) {
6811 if (Op.getMachineOpcode() == PPC::CRSET)
6812 Op1Set = true;
6813 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6814 Op1Unset = true;
6815 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6816 Op.getOperand(0) == Op.getOperand(1)) ||
6817 Op.getMachineOpcode() == PPC::CRNOT)
6818 Op1Not = true;
6819 }
6820 }
6821 break;
6822 }
6823
6824 bool SelectSwap = false;
6825 switch (Opcode) {
6826 default: break;
6827 case PPC::CRAND:
6828 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6829 // x & x = x
6830 ResNode = MachineNode->getOperand(0).getNode();
6831 else if (Op1Set)
6832 // 1 & y = y
6833 ResNode = MachineNode->getOperand(1).getNode();
6834 else if (Op2Set)
6835 // x & 1 = x
6836 ResNode = MachineNode->getOperand(0).getNode();
6837 else if (Op1Unset || Op2Unset)
6838 // x & 0 = 0 & y = 0
6839 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6840 MVT::i1);
6841 else if (Op1Not)
6842 // ~x & y = andc(y, x)
6843 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6844 MVT::i1, MachineNode->getOperand(1),
6845 MachineNode->getOperand(0).
6846 getOperand(0));
6847 else if (Op2Not)
6848 // x & ~y = andc(x, y)
6849 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6850 MVT::i1, MachineNode->getOperand(0),
6851 MachineNode->getOperand(1).
6852 getOperand(0));
6853 else if (AllUsersSelectZero(MachineNode)) {
6854 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6855 MVT::i1, MachineNode->getOperand(0),
6856 MachineNode->getOperand(1));
6857 SelectSwap = true;
6858 }
6859 break;
6860 case PPC::CRNAND:
6861 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6862 // nand(x, x) -> nor(x, x)
6863 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6864 MVT::i1, MachineNode->getOperand(0),
6865 MachineNode->getOperand(0));
6866 else if (Op1Set)
6867 // nand(1, y) -> nor(y, y)
6868 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6869 MVT::i1, MachineNode->getOperand(1),
6870 MachineNode->getOperand(1));
6871 else if (Op2Set)
6872 // nand(x, 1) -> nor(x, x)
6873 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6874 MVT::i1, MachineNode->getOperand(0),
6875 MachineNode->getOperand(0));
6876 else if (Op1Unset || Op2Unset)
6877 // nand(x, 0) = nand(0, y) = 1
6878 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6879 MVT::i1);
6880 else if (Op1Not)
6881 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6882 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6883 MVT::i1, MachineNode->getOperand(0).
6884 getOperand(0),
6885 MachineNode->getOperand(1));
6886 else if (Op2Not)
6887 // nand(x, ~y) = ~x | y = orc(y, x)
6888 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6889 MVT::i1, MachineNode->getOperand(1).
6890 getOperand(0),
6891 MachineNode->getOperand(0));
6892 else if (AllUsersSelectZero(MachineNode)) {
6893 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6894 MVT::i1, MachineNode->getOperand(0),
6895 MachineNode->getOperand(1));
6896 SelectSwap = true;
6897 }
6898 break;
6899 case PPC::CROR:
6900 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6901 // x | x = x
6902 ResNode = MachineNode->getOperand(0).getNode();
6903 else if (Op1Set || Op2Set)
6904 // x | 1 = 1 | y = 1
6905 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6906 MVT::i1);
6907 else if (Op1Unset)
6908 // 0 | y = y
6909 ResNode = MachineNode->getOperand(1).getNode();
6910 else if (Op2Unset)
6911 // x | 0 = x
6912 ResNode = MachineNode->getOperand(0).getNode();
6913 else if (Op1Not)
6914 // ~x | y = orc(y, x)
6915 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6916 MVT::i1, MachineNode->getOperand(1),
6917 MachineNode->getOperand(0).
6918 getOperand(0));
6919 else if (Op2Not)
6920 // x | ~y = orc(x, y)
6921 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6922 MVT::i1, MachineNode->getOperand(0),
6923 MachineNode->getOperand(1).
6924 getOperand(0));
6925 else if (AllUsersSelectZero(MachineNode)) {
6926 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6927 MVT::i1, MachineNode->getOperand(0),
6928 MachineNode->getOperand(1));
6929 SelectSwap = true;
6930 }
6931 break;
6932 case PPC::CRXOR:
6933 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6934 // xor(x, x) = 0
6935 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6936 MVT::i1);
6937 else if (Op1Set)
6938 // xor(1, y) -> nor(y, y)
6939 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6940 MVT::i1, MachineNode->getOperand(1),
6941 MachineNode->getOperand(1));
6942 else if (Op2Set)
6943 // xor(x, 1) -> nor(x, x)
6944 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6945 MVT::i1, MachineNode->getOperand(0),
6946 MachineNode->getOperand(0));
6947 else if (Op1Unset)
6948 // xor(0, y) = y
6949 ResNode = MachineNode->getOperand(1).getNode();
6950 else if (Op2Unset)
6951 // xor(x, 0) = x
6952 ResNode = MachineNode->getOperand(0).getNode();
6953 else if (Op1Not)
6954 // xor(~x, y) = eqv(x, y)
6955 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6956 MVT::i1, MachineNode->getOperand(0).
6957 getOperand(0),
6958 MachineNode->getOperand(1));
6959 else if (Op2Not)
6960 // xor(x, ~y) = eqv(x, y)
6961 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6962 MVT::i1, MachineNode->getOperand(0),
6963 MachineNode->getOperand(1).
6964 getOperand(0));
6965 else if (AllUsersSelectZero(MachineNode)) {
6966 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6967 MVT::i1, MachineNode->getOperand(0),
6968 MachineNode->getOperand(1));
6969 SelectSwap = true;
6970 }
6971 break;
6972 case PPC::CRNOR:
6973 if (Op1Set || Op2Set)
6974 // nor(1, y) -> 0
6975 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6976 MVT::i1);
6977 else if (Op1Unset)
6978 // nor(0, y) = ~y -> nor(y, y)
6979 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6980 MVT::i1, MachineNode->getOperand(1),
6981 MachineNode->getOperand(1));
6982 else if (Op2Unset)
6983 // nor(x, 0) = ~x
6984 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6985 MVT::i1, MachineNode->getOperand(0),
6986 MachineNode->getOperand(0));
6987 else if (Op1Not)
6988 // nor(~x, y) = andc(x, y)
6989 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6990 MVT::i1, MachineNode->getOperand(0).
6991 getOperand(0),
6992 MachineNode->getOperand(1));
6993 else if (Op2Not)
6994 // nor(x, ~y) = andc(y, x)
6995 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6996 MVT::i1, MachineNode->getOperand(1).
6997 getOperand(0),
6998 MachineNode->getOperand(0));
6999 else if (AllUsersSelectZero(MachineNode)) {
7000 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7001 MVT::i1, MachineNode->getOperand(0),
7002 MachineNode->getOperand(1));
7003 SelectSwap = true;
7004 }
7005 break;
7006 case PPC::CREQV:
7007 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7008 // eqv(x, x) = 1
7009 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7010 MVT::i1);
7011 else if (Op1Set)
7012 // eqv(1, y) = y
7013 ResNode = MachineNode->getOperand(1).getNode();
7014 else if (Op2Set)
7015 // eqv(x, 1) = x
7016 ResNode = MachineNode->getOperand(0).getNode();
7017 else if (Op1Unset)
7018 // eqv(0, y) = ~y -> nor(y, y)
7019 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7020 MVT::i1, MachineNode->getOperand(1),
7021 MachineNode->getOperand(1));
7022 else if (Op2Unset)
7023 // eqv(x, 0) = ~x
7024 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7025 MVT::i1, MachineNode->getOperand(0),
7026 MachineNode->getOperand(0));
7027 else if (Op1Not)
7028 // eqv(~x, y) = xor(x, y)
7029 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7030 MVT::i1, MachineNode->getOperand(0).
7031 getOperand(0),
7032 MachineNode->getOperand(1));
7033 else if (Op2Not)
7034 // eqv(x, ~y) = xor(x, y)
7035 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7036 MVT::i1, MachineNode->getOperand(0),
7037 MachineNode->getOperand(1).
7038 getOperand(0));
7039 else if (AllUsersSelectZero(MachineNode)) {
7040 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7041 MVT::i1, MachineNode->getOperand(0),
7042 MachineNode->getOperand(1));
7043 SelectSwap = true;
7044 }
7045 break;
7046 case PPC::CRANDC:
7047 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7048 // andc(x, x) = 0
7049 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7050 MVT::i1);
7051 else if (Op1Set)
7052 // andc(1, y) = ~y
7053 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7054 MVT::i1, MachineNode->getOperand(1),
7055 MachineNode->getOperand(1));
7056 else if (Op1Unset || Op2Set)
7057 // andc(0, y) = andc(x, 1) = 0
7058 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7059 MVT::i1);
7060 else if (Op2Unset)
7061 // andc(x, 0) = x
7062 ResNode = MachineNode->getOperand(0).getNode();
7063 else if (Op1Not)
7064 // andc(~x, y) = ~(x | y) = nor(x, y)
7065 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7066 MVT::i1, MachineNode->getOperand(0).
7067 getOperand(0),
7068 MachineNode->getOperand(1));
7069 else if (Op2Not)
7070 // andc(x, ~y) = x & y
7071 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
7072 MVT::i1, MachineNode->getOperand(0),
7073 MachineNode->getOperand(1).
7074 getOperand(0));
7075 else if (AllUsersSelectZero(MachineNode)) {
7076 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
7077 MVT::i1, MachineNode->getOperand(1),
7078 MachineNode->getOperand(0));
7079 SelectSwap = true;
7080 }
7081 break;
7082 case PPC::CRORC:
7083 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7084 // orc(x, x) = 1
7085 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7086 MVT::i1);
7087 else if (Op1Set || Op2Unset)
7088 // orc(1, y) = orc(x, 0) = 1
7089 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7090 MVT::i1);
7091 else if (Op2Set)
7092 // orc(x, 1) = x
7093 ResNode = MachineNode->getOperand(0).getNode();
7094 else if (Op1Unset)
7095 // orc(0, y) = ~y
7096 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7097 MVT::i1, MachineNode->getOperand(1),
7098 MachineNode->getOperand(1));
7099 else if (Op1Not)
7100 // orc(~x, y) = ~(x & y) = nand(x, y)
7101 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
7102 MVT::i1, MachineNode->getOperand(0).
7103 getOperand(0),
7104 MachineNode->getOperand(1));
7105 else if (Op2Not)
7106 // orc(x, ~y) = x | y
7107 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7108 MVT::i1, MachineNode->getOperand(0),
7109 MachineNode->getOperand(1).
7110 getOperand(0));
7111 else if (AllUsersSelectZero(MachineNode)) {
7112 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7113 MVT::i1, MachineNode->getOperand(1),
7114 MachineNode->getOperand(0));
7115 SelectSwap = true;
7116 }
7117 break;
7118 case PPC::SELECT_I4:
7119 case PPC::SELECT_I8:
7120 case PPC::SELECT_F4:
7121 case PPC::SELECT_F8:
7122 case PPC::SELECT_SPE:
7123 case PPC::SELECT_SPE4:
7124 case PPC::SELECT_VRRC:
7125 case PPC::SELECT_VSFRC:
7126 case PPC::SELECT_VSSRC:
7127 case PPC::SELECT_VSRC:
7128 if (Op1Set)
7129 ResNode = MachineNode->getOperand(1).getNode();
7130 else if (Op1Unset)
7131 ResNode = MachineNode->getOperand(2).getNode();
7132 else if (Op1Not)
7133 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
7134 SDLoc(MachineNode),
7135 MachineNode->getValueType(0),
7136 MachineNode->getOperand(0).
7137 getOperand(0),
7138 MachineNode->getOperand(2),
7139 MachineNode->getOperand(1));
7140 break;
7141 case PPC::BC:
7142 case PPC::BCn:
7143 if (Op1Not)
7144 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
7145 PPC::BC,
7146 SDLoc(MachineNode),
7147 MVT::Other,
7148 MachineNode->getOperand(0).
7149 getOperand(0),
7150 MachineNode->getOperand(1),
7151 MachineNode->getOperand(2));
7152 // FIXME: Handle Op1Set, Op1Unset here too.
7153 break;
7154 }
7155
7156 // If we're inverting this node because it is used only by selects that
7157 // we'd like to swap, then swap the selects before the node replacement.
7158 if (SelectSwap)
7159 SwapAllSelectUsers(MachineNode);
7160
7161 if (ResNode != MachineNode) {
7162 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7163 LLVM_DEBUG(MachineNode->dump(CurDAG));
7164 LLVM_DEBUG(dbgs() << "\nNew: ");
7165 LLVM_DEBUG(ResNode->dump(CurDAG));
7166 LLVM_DEBUG(dbgs() << "\n");
7167
7168 ReplaceUses(MachineNode, ResNode);
7169 IsModified = true;
7170 }
7171 }
7172 if (IsModified)
7173 CurDAG->RemoveDeadNodes();
7174 } while (IsModified);
7175}
7176
7177// Gather the set of 32-bit operations that are known to have their
7178// higher-order 32 bits zero, where ToPromote contains all such operations.
7180 SmallPtrSetImpl<SDNode *> &ToPromote) {
7181 if (!Op32.isMachineOpcode())
7182 return false;
7183
7184 // First, check for the "frontier" instructions (those that will clear the
7185 // higher-order 32 bits.
7186
7187 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7188 // around. If it does not, then these instructions will clear the
7189 // higher-order bits.
7190 if ((Op32.getMachineOpcode() == PPC::RLWINM ||
7191 Op32.getMachineOpcode() == PPC::RLWNM) &&
7192 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
7193 ToPromote.insert(Op32.getNode());
7194 return true;
7195 }
7196
7197 // SLW and SRW always clear the higher-order bits.
7198 if (Op32.getMachineOpcode() == PPC::SLW ||
7199 Op32.getMachineOpcode() == PPC::SRW) {
7200 ToPromote.insert(Op32.getNode());
7201 return true;
7202 }
7203
7204 // For LI and LIS, we need the immediate to be positive (so that it is not
7205 // sign extended).
7206 if (Op32.getMachineOpcode() == PPC::LI ||
7207 Op32.getMachineOpcode() == PPC::LIS) {
7208 if (!isUInt<15>(Op32.getConstantOperandVal(0)))
7209 return false;
7210
7211 ToPromote.insert(Op32.getNode());
7212 return true;
7213 }
7214
7215 // LHBRX and LWBRX always clear the higher-order bits.
7216 if (Op32.getMachineOpcode() == PPC::LHBRX ||
7217 Op32.getMachineOpcode() == PPC::LWBRX) {
7218 ToPromote.insert(Op32.getNode());
7219 return true;
7220 }
7221
7222 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7223 if (Op32.getMachineOpcode() == PPC::CNTLZW ||
7224 Op32.getMachineOpcode() == PPC::CNTTZW) {
7225 ToPromote.insert(Op32.getNode());
7226 return true;
7227 }
7228
7229 // Next, check for those instructions we can look through.
7230
7231 // Assuming the mask does not wrap around, then the higher-order bits are
7232 // taken directly from the first operand.
7233 if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7234 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
7235 SmallPtrSet<SDNode *, 16> ToPromote1;
7236 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7237 return false;
7238
7239 ToPromote.insert(Op32.getNode());
7240 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7241 return true;
7242 }
7243
7244 // For OR, the higher-order bits are zero if that is true for both operands.
7245 // For SELECT_I4, the same is true (but the relevant operand numbers are
7246 // shifted by 1).
7247 if (Op32.getMachineOpcode() == PPC::OR ||
7248 Op32.getMachineOpcode() == PPC::SELECT_I4) {
7249 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
7250 SmallPtrSet<SDNode *, 16> ToPromote1;
7251 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
7252 return false;
7253 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
7254 return false;
7255
7256 ToPromote.insert(Op32.getNode());
7257 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7258 return true;
7259 }
7260
7261 // For ORI and ORIS, we need the higher-order bits of the first operand to be
7262 // zero, and also for the constant to be positive (so that it is not sign
7263 // extended).
7264 if (Op32.getMachineOpcode() == PPC::ORI ||
7265 Op32.getMachineOpcode() == PPC::ORIS) {
7266 SmallPtrSet<SDNode *, 16> ToPromote1;
7267 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7268 return false;
7269 if (!isUInt<15>(Op32.getConstantOperandVal(1)))
7270 return false;
7271
7272 ToPromote.insert(Op32.getNode());
7273 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7274 return true;
7275 }
7276
7277 // The higher-order bits of AND are zero if that is true for at least one of
7278 // the operands.
7279 if (Op32.getMachineOpcode() == PPC::AND) {
7280 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
7281 bool Op0OK =
7282 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7283 bool Op1OK =
7284 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
7285 if (!Op0OK && !Op1OK)
7286 return false;
7287
7288 ToPromote.insert(Op32.getNode());
7289
7290 if (Op0OK)
7291 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7292
7293 if (Op1OK)
7294 ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
7295
7296 return true;
7297 }
7298
7299 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
7300 // of the first operand, or if the second operand is positive (so that it is
7301 // not sign extended).
7302 if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
7303 Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7304 SmallPtrSet<SDNode *, 16> ToPromote1;
7305 bool Op0OK =
7306 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7307 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
7308 if (!Op0OK && !Op1OK)
7309 return false;
7310
7311 ToPromote.insert(Op32.getNode());
7312
7313 if (Op0OK)
7314 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7315
7316 return true;
7317 }
7318
7319 return false;
7320}
7321
7322void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7323 if (!Subtarget->isPPC64())
7324 return;
7325
7326 // When we zero-extend from i32 to i64, we use a pattern like this:
7327 // def : Pat<(i64 (zext i32:$in)),
7328 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7329 // 0, 32)>;
7330 // There are several 32-bit shift/rotate instructions, however, that will
7331 // clear the higher-order bits of their output, rendering the RLDICL
7332 // unnecessary. When that happens, we remove it here, and redefine the
7333 // relevant 32-bit operation to be a 64-bit operation.
7334
7335 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7336
7337 bool MadeChange = false;
7338 while (Position != CurDAG->allnodes_begin()) {
7339 SDNode *N = &*--Position;
7340 // Skip dead nodes and any non-machine opcodes.
7341 if (N->use_empty() || !N->isMachineOpcode())
7342 continue;
7343
7344 if (N->getMachineOpcode() != PPC::RLDICL)
7345 continue;
7346
7347 if (N->getConstantOperandVal(1) != 0 ||
7348 N->getConstantOperandVal(2) != 32)
7349 continue;
7350
7351 SDValue ISR = N->getOperand(0);
7352 if (!ISR.isMachineOpcode() ||
7353 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7354 continue;
7355
7356 if (!ISR.hasOneUse())
7357 continue;
7358
7359 if (ISR.getConstantOperandVal(2) != PPC::sub_32)
7360 continue;
7361
7362 SDValue IDef = ISR.getOperand(0);
7363 if (!IDef.isMachineOpcode() ||
7364 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7365 continue;
7366
7367 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
7368 // can get rid of it.
7369
7370 SDValue Op32 = ISR->getOperand(1);
7371 if (!Op32.isMachineOpcode())
7372 continue;
7373
7374 // There are some 32-bit instructions that always clear the high-order 32
7375 // bits, there are also some instructions (like AND) that we can look
7376 // through.
7377 SmallPtrSet<SDNode *, 16> ToPromote;
7378 if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7379 continue;
7380
7381 // If the ToPromote set contains nodes that have uses outside of the set
7382 // (except for the original INSERT_SUBREG), then abort the transformation.
7383 bool OutsideUse = false;
7384 for (SDNode *PN : ToPromote) {
7385 for (SDNode *UN : PN->uses()) {
7386 if (!ToPromote.count(UN) && UN != ISR.getNode()) {
7387 OutsideUse = true;
7388 break;
7389 }
7390 }
7391
7392 if (OutsideUse)
7393 break;
7394 }
7395 if (OutsideUse)
7396 continue;
7397
7398 MadeChange = true;
7399
7400 // We now know that this zero extension can be removed by promoting to
7401 // nodes in ToPromote to 64-bit operations, where for operations in the
7402 // frontier of the set, we need to insert INSERT_SUBREGs for their
7403 // operands.
7404 for (SDNode *PN : ToPromote) {
7405 unsigned NewOpcode;
7406 switch (PN->getMachineOpcode()) {
7407 default:
7408 llvm_unreachable("Don't know the 64-bit variant of this instruction");
7409 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7410 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7411 case PPC::SLW: NewOpcode = PPC::SLW8; break;
7412 case PPC::SRW: NewOpcode = PPC::SRW8; break;
7413 case PPC::LI: NewOpcode = PPC::LI8; break;
7414 case PPC::LIS: NewOpcode = PPC::LIS8; break;
7415 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7416 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7417 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7418 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7419 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7420 case PPC::OR: NewOpcode = PPC::OR8; break;
7421 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7422 case PPC::ORI: NewOpcode = PPC::ORI8; break;
7423 case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7424 case PPC::AND: NewOpcode = PPC::AND8; break;
7425 case PPC::ANDI_rec:
7426 NewOpcode = PPC::ANDI8_rec;
7427 break;
7428 case PPC::ANDIS_rec:
7429 NewOpcode = PPC::ANDIS8_rec;
7430 break;
7431 }
7432
7433 // Note: During the replacement process, the nodes will be in an
7434 // inconsistent state (some instructions will have operands with values
7435 // of the wrong type). Once done, however, everything should be right
7436 // again.
7437
7439 for (const SDValue &V : PN->ops()) {
7440 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
7441 !isa<ConstantSDNode>(V)) {
7442 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
7443 SDNode *ReplOp =
7444 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
7445 ISR.getNode()->getVTList(), ReplOpOps);
7446 Ops.push_back(SDValue(ReplOp, 0));
7447 } else {
7448 Ops.push_back(V);
7449 }
7450 }
7451
7452 // Because all to-be-promoted nodes only have users that are other
7453 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
7454 // the i32 result value type with i64.
7455
7456 SmallVector<EVT, 2> NewVTs;
7457 SDVTList VTs = PN->getVTList();
7458 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
7459 if (VTs.VTs[i] == MVT::i32)
7460 NewVTs.push_back(MVT::i64);
7461 else
7462 NewVTs.push_back(VTs.VTs[i]);
7463
7464 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7465 LLVM_DEBUG(PN->dump(CurDAG));
7466
7467 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
7468
7469 LLVM_DEBUG(dbgs() << "\nNew: ");
7470 LLVM_DEBUG(PN->dump(CurDAG));
7471 LLVM_DEBUG(dbgs() << "\n");
7472 }
7473
7474 // Now we replace the original zero extend and its associated INSERT_SUBREG
7475 // with the value feeding the INSERT_SUBREG (which has now been promoted to
7476 // return an i64).
7477
7478 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7479 LLVM_DEBUG(N->dump(CurDAG));
7480 LLVM_DEBUG(dbgs() << "\nNew: ");
7481 LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7482 LLVM_DEBUG(dbgs() << "\n");
7483
7484 ReplaceUses(N, Op32.getNode());
7485 }
7486
7487 if (MadeChange)
7488 CurDAG->RemoveDeadNodes();
7489}
7490
7491static bool isVSXSwap(SDValue N) {
7492 if (!N->isMachineOpcode())
7493 return false;
7494 unsigned Opc = N->getMachineOpcode();
7495
7496 // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7497 // operand is 2.
7498 if (Opc == PPC::XXPERMDIs) {
7499 return isa<ConstantSDNode>(N->getOperand(1)) &&
7500 N->getConstantOperandVal(1) == 2;
7501 } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
7502 return N->getOperand(0) == N->getOperand(1) &&
7503 isa<ConstantSDNode>(N->getOperand(2)) &&
7504 N->getConstantOperandVal(2) == 2;
7505 }
7506
7507 return false;
7508}
7509
7510// TODO: Make this complete and replace with a table-gen bit.
7512 if (!N->isMachineOpcode())
7513 return false;
7514 unsigned Opc = N->getMachineOpcode();
7515
7516 switch (Opc) {
7517 default:
7518 return false;
7519 case PPC::VAVGSB:
7520 case PPC::VAVGUB:
7521 case PPC::VAVGSH:
7522 case PPC::VAVGUH:
7523 case PPC::VAVGSW:
7524 case PPC::VAVGUW:
7525 case PPC::VMAXFP:
7526 case PPC::VMAXSB:
7527 case PPC::VMAXUB:
7528 case PPC::VMAXSH:
7529 case PPC::VMAXUH:
7530 case PPC::VMAXSW:
7531 case PPC::VMAXUW:
7532 case PPC::VMINFP:
7533 case PPC::VMINSB:
7534 case PPC::VMINUB:
7535 case PPC::VMINSH:
7536 case PPC::VMINUH:
7537 case PPC::VMINSW:
7538 case PPC::VMINUW:
7539 case PPC::VADDFP:
7540 case PPC::VADDUBM:
7541 case PPC::VADDUHM:
7542 case PPC::VADDUWM:
7543 case PPC::VSUBFP:
7544 case PPC::VSUBUBM:
7545 case PPC::VSUBUHM:
7546 case PPC::VSUBUWM:
7547 case PPC::VAND:
7548 case PPC::VANDC:
7549 case PPC::VOR:
7550 case PPC::VORC:
7551 case PPC::VXOR:
7552 case PPC::VNOR:
7553 case PPC::VMULUWM:
7554 return true;
7555 }
7556}
7557
7558// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7559// lane-insensitive.
7560static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
7561 // Our desired xxswap might be source of COPY_TO_REGCLASS.
7562 // TODO: Can we put this a common method for DAG?
7563 auto SkipRCCopy = [](SDValue V) {
7564 while (V->isMachineOpcode() &&
7565 V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7566 // All values in the chain should have single use.
7567 if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
7568 return SDValue();
7569 V = V->getOperand(0);
7570 }
7571 return V.hasOneUse() ? V : SDValue();
7572 };
7573
7574 SDValue VecOp = SkipRCCopy(N->getOperand(0));
7575 if (!VecOp || !isLaneInsensitive(VecOp))
7576 return;
7577
7578 SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
7579 RHS = SkipRCCopy(VecOp.getOperand(1));
7580 if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
7581 return;
7582
7583 // These swaps may still have chain-uses here, count on dead code elimination
7584 // in following passes to remove them.
7585 DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
7586 DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
7587 DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
7588}
7589
7590// Check if an SDValue has the 'aix-small-tls' global variable attribute.
7591static bool hasAIXSmallTLSAttr(SDValue Val) {
7592 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val))
7593 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()))
7594 if (GV->hasAttribute("aix-small-tls"))
7595 return true;
7596
7597 return false;
7598}
7599
7600// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
7601// accesses?
7603 SDValue ADDIToFold) {
7604 // Check if ADDIToFold (the ADDI that we want to fold into local-exec
7605 // accesses), is truly an ADDI.
7606 if (!ADDIToFold.isMachineOpcode() ||
7607 (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7608 return false;
7609
7610 // Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
7611 // attribute or when the 'aix-small-tls' global variable attribute is present.
7612 const PPCSubtarget &Subtarget =
7614 SDValue TLSVarNode = ADDIToFold.getOperand(1);
7615 if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
7616 Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
7617 return false;
7618
7619 // The second operand of the ADDIToFold should be the global TLS address
7620 // (the local-exec TLS variable). We only perform the folding if the TLS
7621 // variable is the second operand.
7622 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7623 if (!GA)
7624 return false;
7625
7626 if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
7627 // The first operand of the ADDIToFold should be the thread pointer.
7628 // This transformation is only performed if the first operand of the
7629 // addi is the thread pointer.
7630 SDValue TPRegNode = ADDIToFold.getOperand(0);
7631 RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7632 if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7633 return false;
7634 }
7635
7636 // The local-[exec|dynamic] TLS variable should only have the
7637 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
7638 // performed otherwise if the flag is not set.
7639 unsigned TargetFlags = GA->getTargetFlags();
7640 if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
7641 TargetFlags == PPCII::MO_TLSLD_FLAG))
7642 return false;
7643
7644 // If all conditions are satisfied, the ADDI is valid for folding.
7645 return true;
7646}
7647
7648// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
7649// another addi, fold this sequence into a single addi if possible. Before this
7650// optimization, the sequence appears as:
7651// addi rN, r13, sym@[le|ld]
7652// addi rM, rN, imm
7653// After this optimization, we can fold the two addi into a single one:
7654// addi rM, r13, sym@[le|ld] + imm
7656 if (N->getMachineOpcode() != PPC::ADDI8)
7657 return;
7658
7659 // InitialADDI is the addi feeding into N (also an addi), and the addi that
7660 // we want optimized out.
7661 SDValue InitialADDI = N->getOperand(0);
7662
7663 if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
7664 return;
7665
7666 // The second operand of the InitialADDI should be the global TLS address
7667 // (the local-[exec|dynamic] TLS variable), with the
7668 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
7669 // isEligibleToFoldADDIForFasterLocalAccesses().
7670 SDValue TLSVarNode = InitialADDI.getOperand(1);
7671 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7672 assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7673 "local-[exec|dynamic] accesses!");
7674 unsigned TargetFlags = GA->getTargetFlags();
7675
7676 // The second operand of the addi that we want to preserve will be an
7677 // immediate. We add this immediate, together with the address of the TLS
7678 // variable found in InitialADDI, in order to preserve the correct TLS address
7679 // information during assembly printing. The offset is likely to be non-zero
7680 // when we end up in this case.
7681 int Offset = N->getConstantOperandVal(1);
7682 TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7683 Offset, TargetFlags);
7684
7685 (void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
7686 if (InitialADDI.getNode()->use_empty())
7687 DAG->RemoveDeadNode(InitialADDI.getNode());
7688}
7689
7690void PPCDAGToDAGISel::PeepholePPC64() {
7691 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7692
7693 while (Position != CurDAG->allnodes_begin()) {
7694 SDNode *N = &*--Position;
7695 // Skip dead nodes and any non-machine opcodes.
7696 if (N->use_empty() || !N->isMachineOpcode())
7697 continue;
7698
7699 if (isVSXSwap(SDValue(N, 0)))
7700 reduceVSXSwap(N, CurDAG);
7701
7702 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7703 // accesses.
7705
7706 unsigned FirstOp;
7707 unsigned StorageOpcode = N->getMachineOpcode();
7708 bool RequiresMod4Offset = false;
7709
7710 switch (StorageOpcode) {
7711 default: continue;
7712
7713 case PPC::LWA:
7714 case PPC::LD:
7715 case PPC::DFLOADf64:
7716 case PPC::DFLOADf32:
7717 RequiresMod4Offset = true;
7718 [[fallthrough]];
7719 case PPC::LBZ:
7720 case PPC::LBZ8:
7721 case PPC::LFD:
7722 case PPC::LFS:
7723 case PPC::LHA:
7724 case PPC::LHA8:
7725 case PPC::LHZ:
7726 case PPC::LHZ8:
7727 case PPC::LWZ:
7728 case PPC::LWZ8:
7729 FirstOp = 0;
7730 break;
7731
7732 case PPC::STD:
7733 case PPC::DFSTOREf64:
7734 case PPC::DFSTOREf32:
7735 RequiresMod4Offset = true;
7736 [[fallthrough]];
7737 case PPC::STB:
7738 case PPC::STB8:
7739 case PPC::STFD:
7740 case PPC::STFS:
7741 case PPC::STH:
7742 case PPC::STH8:
7743 case PPC::STW:
7744 case PPC::STW8:
7745 FirstOp = 1;
7746 break;
7747 }
7748
7749 // If this is a load or store with a zero offset, or within the alignment,
7750 // we may be able to fold an add-immediate into the memory operation.
7751 // The check against alignment is below, as it can't occur until we check
7752 // the arguments to N
7753 if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
7754 continue;
7755
7756 SDValue Base = N->getOperand(FirstOp + 1);
7757 if (!Base.isMachineOpcode())
7758 continue;
7759
7760 unsigned Flags = 0;
7761 bool ReplaceFlags = true;
7762
7763 // When the feeding operation is an add-immediate of some sort,
7764 // determine whether we need to add relocation information to the
7765 // target flags on the immediate operand when we fold it into the
7766 // load instruction.
7767 //
7768 // For something like ADDItocL8, the relocation information is
7769 // inferred from the opcode; when we process it in the AsmPrinter,
7770 // we add the necessary relocation there. A load, though, can receive
7771 // relocation from various flavors of ADDIxxx, so we need to carry
7772 // the relocation information in the target flags.
7773 switch (Base.getMachineOpcode()) {
7774 default: continue;
7775
7776 case PPC::ADDI8:
7777 case PPC::ADDI:
7778 // In some cases (such as TLS) the relocation information
7779 // is already in place on the operand, so copying the operand
7780 // is sufficient.
7781 ReplaceFlags = false;
7782 break;
7783 case PPC::ADDIdtprelL:
7785 break;
7786 case PPC::ADDItlsldL:
7788 break;
7789 case PPC::ADDItocL8:
7790 // Skip the following peephole optimizations for ADDItocL8 on AIX which
7791 // is used for toc-data access.
7792 if (Subtarget->isAIXABI())
7793 continue;
7795 break;
7796 }
7797
7798 SDValue ImmOpnd = Base.getOperand(1);
7799
7800 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7801 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7802 // we might have needed different @ha relocation values for the offset
7803 // pointers).
7804 int MaxDisplacement = 7;
7805 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7806 const GlobalValue *GV = GA->getGlobal();
7807 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7808 MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
7809 }
7810
7811 bool UpdateHBase = false;
7812 SDValue HBase = Base.getOperand(0);
7813
7814 int Offset = N->getConstantOperandVal(FirstOp);
7815 if (ReplaceFlags) {
7816 if (Offset < 0 || Offset > MaxDisplacement) {
7817 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7818 // one use, then we can do this for any offset, we just need to also
7819 // update the offset (i.e. the symbol addend) on the addis also.
7820 if (Base.getMachineOpcode() != PPC::ADDItocL8)
7821 continue;
7822
7823 if (!HBase.isMachineOpcode() ||
7824 HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7825 continue;
7826
7827 if (!Base.hasOneUse() || !HBase.hasOneUse())
7828 continue;
7829
7830 SDValue HImmOpnd = HBase.getOperand(1);
7831 if (HImmOpnd != ImmOpnd)
7832 continue;
7833
7834 UpdateHBase = true;
7835 }
7836 } else {
7837 // Global addresses can be folded, but only if they are sufficiently
7838 // aligned.
7839 if (RequiresMod4Offset) {
7840 if (GlobalAddressSDNode *GA =
7841 dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7842 const GlobalValue *GV = GA->getGlobal();
7843 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7844 if (Alignment < 4)
7845 continue;
7846 }
7847 }
7848
7849 // If we're directly folding the addend from an addi instruction, then:
7850 // 1. In general, the offset on the memory access must be zero.
7851 // 2. If the addend is a constant, then it can be combined with a
7852 // non-zero offset, but only if the result meets the encoding
7853 // requirements.
7854 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
7855 Offset += C->getSExtValue();
7856
7857 if (RequiresMod4Offset && (Offset % 4) != 0)
7858 continue;
7859
7860 if (!isInt<16>(Offset))
7861 continue;
7862
7863 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
7864 ImmOpnd.getValueType());
7865 } else if (Offset != 0) {
7866 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7867 // accesses.
7869 // Add the non-zero offset information into the load or store
7870 // instruction to be used for non-TOC-based local-[exec|dynamic]
7871 // accesses.
7872 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
7873 assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7874 "addi into local-[exec|dynamic] accesses!");
7875 ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7876 MVT::i64, Offset,
7877 GA->getTargetFlags());
7878 } else
7879 continue;
7880 }
7881 }
7882
7883 // We found an opportunity. Reverse the operands from the add
7884 // immediate and substitute them into the load or store. If
7885 // needed, update the target flags for the immediate operand to
7886 // reflect the necessary relocation information.
7887 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7888 LLVM_DEBUG(Base->dump(CurDAG));
7889 LLVM_DEBUG(dbgs() << "\nN: ");
7890 LLVM_DEBUG(N->dump(CurDAG));
7891 LLVM_DEBUG(dbgs() << "\n");
7892
7893 // If the relocation information isn't already present on the
7894 // immediate operand, add it now.
7895 if (ReplaceFlags) {
7896 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7897 SDLoc dl(GA);
7898 const GlobalValue *GV = GA->getGlobal();
7899 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7900 // We can't perform this optimization for data whose alignment
7901 // is insufficient for the instruction encoding.
7902 if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
7903 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7904 continue;
7905 }
7906 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
7907 } else if (ConstantPoolSDNode *CP =
7908 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
7909 const Constant *C = CP->getConstVal();
7910 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
7911 Offset, Flags);
7912 }
7913 }
7914
7915 if (FirstOp == 1) // Store
7916 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7917 Base.getOperand(0), N->getOperand(3));
7918 else // Load
7919 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7920 N->getOperand(2));
7921
7922 if (UpdateHBase)
7923 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
7924 ImmOpnd);
7925
7926 // The add-immediate may now be dead, in which case remove it.
7927 if (Base.getNode()->use_empty())
7928 CurDAG->RemoveDeadNode(Base.getNode());
7929 }
7930}
7931
7932/// createPPCISelDag - This pass converts a legalized DAG into a
7933/// PowerPC-specific DAG, ready for instruction scheduling.
7934///
7936 CodeGenOptLevel OptLevel) {
7937 return new PPCDAGToDAGISelLegacy(TM, OptLevel);
7938}
unsigned SubReg
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
amdgpu AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1309
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:531
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:512
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG, SDValue ADDIToFold)
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base)
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
static bool hasTocDataAttr(SDValue Val)
static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG)
static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG)
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode * > &ToPromote)
static bool isLaneInsensitive(SDValue N)
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N)
static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget, const TargetMachine &TM, const SDNode *Node)
static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, const PPCSubtarget *Subtarget)
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num)
static unsigned getBranchHint(unsigned PCC, const FunctionLoweringInfo &FuncInfo, const SDValue &DestMBB)
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp)
static cl::opt< bool > EnableTLSOpt("ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden)
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
static cl::opt< ICmpInGPRType > CmpInGPR("ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result.")))
static SDNode * selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
#define PASS_NAME
#define DEBUG_TYPE
static SDNode * selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static bool hasAIXSmallTLSAttr(SDValue Val)
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden)
static bool isSWTestOp(SDValue N)
static SDNode * selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned *InstCnt=nullptr)
ICmpInGPRType
@ ICGPR_ZextI32
@ ICGPR_I64
@ ICGPR_All
@ ICGPR_None
@ ICGPR_NonExtIn
@ ICGPR_Sext
@ ICGPR_I32
@ ICGPR_SextI64
@ ICGPR_ZextI64
@ ICGPR_SextI32
@ ICGPR_Zext
static bool isVSXSwap(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition: APInt.cpp:1124
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
MachineBasicBlock * getBasicBlock() const
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
BranchProbabilityInfo * BPI
MachineBasicBlock * MBB
MBB - The current block.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists.
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1436
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setROPProtectionHashSaveIndex(int Idx)
static int getRecordFormOpcode(unsigned Opcode)
bool is32BitELFABI() const
Definition: PPCSubtarget.h:220
bool isAIXABI() const
Definition: PPCSubtarget.h:215
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:147
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:146
MCRegister getThreadPointerRegister() const
Definition: PPCSubtarget.h:284
bool isSVR4ABI() const
Definition: PPCSubtarget.h:216
bool isLittleEndian() const
Definition: PPCSubtarget.h:182
bool isTargetELF() const
Definition: PPCSubtarget.h:211
CodeModel::Model getCodeModel(const TargetMachine &TM, const GlobalValue *GV) const
Calculates the effective code model for argument GV.
bool isELFv2ABI() const
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:153
Common code between 32-bit and 64-bit PowerPC targets.
Register getReg() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
void dump() const
Dump this node, for debugging.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual void PostprocessISelDAG()
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:493
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:547
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:548
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:488
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:559
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:550
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
iterator end() const
Definition: SmallPtrSet.h:460
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
iterator begin() const
Definition: SmallPtrSet.h:455
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5266
An efficient, type-erasing, non-owning reference to a callable.
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1145
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1120
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1495
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1636
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1642
@ MO_TLSLD_LO
Definition: PPC.h:184
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_DTPREL_LO
These values identify relocations on immediates folded into memory operations.
Definition: PPC.h:183
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TOC_LO
Definition: PPC.h:185
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ SRL
These nodes represent PPC shifts.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ CALL
CALL - A direct function call.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ FTSQRT
Test instruction for software square root.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ BR_NONTAKEN_HINT
Definition: PPCPredicates.h:64
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:154
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:159
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOptLevel OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2082
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
unsigned int NumVTs