LLVM 19.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
16#include "PPC.h"
17#include "PPCISelLowering.h"
19#include "PPCSubtarget.h"
20#include "PPCTargetMachine.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/ADT/APSInt.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/Statistic.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/DebugLoc.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/InlineAsm.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/IntrinsicsPowerPC.h"
50#include "llvm/IR/Module.h"
55#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <cstdint>
63#include <iterator>
64#include <limits>
65#include <memory>
66#include <new>
67#include <tuple>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "ppc-isel"
73#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
74
75STATISTIC(NumSextSetcc,
76 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
77STATISTIC(NumZextSetcc,
78 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
79STATISTIC(SignExtensionsAdded,
80 "Number of sign extensions for compare inputs added.");
81STATISTIC(ZeroExtensionsAdded,
82 "Number of zero extensions for compare inputs added.");
83STATISTIC(NumLogicOpsOnComparison,
84 "Number of logical ops on i1 values calculated in GPR.");
85STATISTIC(OmittedForNonExtendUses,
86 "Number of compares not eliminated as they have non-extending uses.");
87STATISTIC(NumP9Setb,
88 "Number of compares lowered to setb.");
89
90// FIXME: Remove this once the bug has been fixed!
91cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
92cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
93
94static cl::opt<bool>
95 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
96 cl::desc("use aggressive ppc isel for bit permutations"),
99 "ppc-bit-perm-rewriter-stress-rotates",
100 cl::desc("stress rotate selection in aggressive ppc isel for "
101 "bit permutations"),
102 cl::Hidden);
103
105 "ppc-use-branch-hint", cl::init(true),
106 cl::desc("Enable static hinting of branches on ppc"),
107 cl::Hidden);
108
110 "ppc-tls-opt", cl::init(true),
111 cl::desc("Enable tls optimization peephole"),
112 cl::Hidden);
113
117
119 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
120 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
121 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
122 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
123 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
124 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
125 clEnumValN(ICGPR_NonExtIn, "nonextin",
126 "Only comparisons where inputs don't need [sz]ext."),
127 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
128 clEnumValN(ICGPR_ZextI32, "zexti32",
129 "Only i32 comparisons with zext result."),
130 clEnumValN(ICGPR_ZextI64, "zexti64",
131 "Only i64 comparisons with zext result."),
132 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
133 clEnumValN(ICGPR_SextI32, "sexti32",
134 "Only i32 comparisons with sext result."),
135 clEnumValN(ICGPR_SextI64, "sexti64",
136 "Only i64 comparisons with sext result.")));
137namespace {
138
139 //===--------------------------------------------------------------------===//
140 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
141 /// instructions for SelectionDAG operations.
142 ///
143 class PPCDAGToDAGISel : public SelectionDAGISel {
144 const PPCTargetMachine &TM;
145 const PPCSubtarget *Subtarget = nullptr;
146 const PPCTargetLowering *PPCLowering = nullptr;
147 unsigned GlobalBaseReg = 0;
148
149 public:
150 static char ID;
151
152 PPCDAGToDAGISel() = delete;
153
154 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
155 : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {}
156
157 bool runOnMachineFunction(MachineFunction &MF) override {
158 // Make sure we re-emit a set of the global base reg if necessary
159 GlobalBaseReg = 0;
160 Subtarget = &MF.getSubtarget<PPCSubtarget>();
161 PPCLowering = Subtarget->getTargetLowering();
162 if (Subtarget->hasROPProtect()) {
163 // Create a place on the stack for the ROP Protection Hash.
164 // The ROP Protection Hash will always be 8 bytes and aligned to 8
165 // bytes.
166 MachineFrameInfo &MFI = MF.getFrameInfo();
168 const int Result = MFI.CreateStackObject(8, Align(8), false);
170 }
172
173 return true;
174 }
175
176 void PreprocessISelDAG() override;
177 void PostprocessISelDAG() override;
178
179 /// getI16Imm - Return a target constant with the specified value, of type
180 /// i16.
181 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
182 return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
183 }
184
185 /// getI32Imm - Return a target constant with the specified value, of type
186 /// i32.
187 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
188 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
189 }
190
191 /// getI64Imm - Return a target constant with the specified value, of type
192 /// i64.
193 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
194 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
195 }
196
197 /// getSmallIPtrImm - Return a target constant of pointer type.
198 inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) {
199 return CurDAG->getTargetConstant(
200 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
201 }
202
203 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
204 /// rotate and mask opcode and mask operation.
205 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
206 unsigned &SH, unsigned &MB, unsigned &ME);
207
208 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
209 /// base register. Return the virtual register that holds this value.
210 SDNode *getGlobalBaseReg();
211
212 void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0);
213
214 // Select - Convert the specified operand from a target-independent to a
215 // target-specific node if it hasn't already been changed.
216 void Select(SDNode *N) override;
217
218 bool tryBitfieldInsert(SDNode *N);
219 bool tryBitPermutation(SDNode *N);
220 bool tryIntCompareInGPR(SDNode *N);
221
222 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
223 // an X-Form load instruction with the offset being a relocation coming from
224 // the PPCISD::ADD_TLS.
225 bool tryTLSXFormLoad(LoadSDNode *N);
226 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
227 // an X-Form store instruction with the offset being a relocation coming from
228 // the PPCISD::ADD_TLS.
229 bool tryTLSXFormStore(StoreSDNode *N);
230 /// SelectCC - Select a comparison of the specified values with the
231 /// specified condition code, returning the CR# of the expression.
232 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
233 const SDLoc &dl, SDValue Chain = SDValue());
234
235 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
236 /// immediate field. Note that the operand at this point is already the
237 /// result of a prior SelectAddressRegImm call.
238 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
239 if (N.getOpcode() == ISD::TargetConstant ||
240 N.getOpcode() == ISD::TargetGlobalAddress) {
241 Out = N;
242 return true;
243 }
244
245 return false;
246 }
247
248 /// SelectDSForm - Returns true if address N can be represented by the
249 /// addressing mode of DSForm instructions (a base register, plus a signed
250 /// 16-bit displacement that is a multiple of 4.
251 bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
252 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
253 Align(4)) == PPC::AM_DSForm;
254 }
255
256 /// SelectDQForm - Returns true if address N can be represented by the
257 /// addressing mode of DQForm instructions (a base register, plus a signed
258 /// 16-bit displacement that is a multiple of 16.
259 bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
260 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
261 Align(16)) == PPC::AM_DQForm;
262 }
263
264 /// SelectDForm - Returns true if address N can be represented by
265 /// the addressing mode of DForm instructions (a base register, plus a
266 /// signed 16-bit immediate.
267 bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
268 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
269 std::nullopt) == PPC::AM_DForm;
270 }
271
272 /// SelectPCRelForm - Returns true if address N can be represented by
273 /// PC-Relative addressing mode.
274 bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
275 SDValue &Base) {
276 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
277 std::nullopt) == PPC::AM_PCRel;
278 }
279
280 /// SelectPDForm - Returns true if address N can be represented by Prefixed
281 /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
282 bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
283 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
284 std::nullopt) ==
286 }
287
288 /// SelectXForm - Returns true if address N can be represented by the
289 /// addressing mode of XForm instructions (an indexed [r+r] operation).
290 bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
291 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
292 std::nullopt) == PPC::AM_XForm;
293 }
294
295 /// SelectForceXForm - Given the specified address, force it to be
296 /// represented as an indexed [r+r] operation (an XForm instruction).
297 bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
298 SDValue &Base) {
299 return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
301 }
302
303 /// SelectAddrIdx - Given the specified address, check to see if it can be
304 /// represented as an indexed [r+r] operation.
305 /// This is for xform instructions whose associated displacement form is D.
306 /// The last parameter \p 0 means associated D form has no requirment for 16
307 /// bit signed displacement.
308 /// Returns false if it can be represented by [r+imm], which are preferred.
309 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
310 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
311 std::nullopt);
312 }
313
314 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
315 /// represented as an indexed [r+r] operation.
316 /// This is for xform instructions whose associated displacement form is DS.
317 /// The last parameter \p 4 means associated DS form 16 bit signed
318 /// displacement must be a multiple of 4.
319 /// Returns false if it can be represented by [r+imm], which are preferred.
320 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
321 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
322 Align(4));
323 }
324
325 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
326 /// represented as an indexed [r+r] operation.
327 /// This is for xform instructions whose associated displacement form is DQ.
328 /// The last parameter \p 16 means associated DQ form 16 bit signed
329 /// displacement must be a multiple of 16.
330 /// Returns false if it can be represented by [r+imm], which are preferred.
331 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
332 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
333 Align(16));
334 }
335
336 /// SelectAddrIdxOnly - Given the specified address, force it to be
337 /// represented as an indexed [r+r] operation.
338 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
339 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
340 }
341
342 /// SelectAddrImm - Returns true if the address N can be represented by
343 /// a base register plus a signed 16-bit displacement [r+imm].
344 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
345 /// displacement.
346 bool SelectAddrImm(SDValue N, SDValue &Disp,
347 SDValue &Base) {
348 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
349 std::nullopt);
350 }
351
352 /// SelectAddrImmX4 - Returns true if the address N can be represented by
353 /// a base register plus a signed 16-bit displacement that is a multiple of
354 /// 4 (last parameter). Suitable for use by STD and friends.
355 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
356 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
357 }
358
359 /// SelectAddrImmX16 - Returns true if the address N can be represented by
360 /// a base register plus a signed 16-bit displacement that is a multiple of
361 /// 16(last parameter). Suitable for use by STXV and friends.
362 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
363 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
364 Align(16));
365 }
366
367 /// SelectAddrImmX34 - Returns true if the address N can be represented by
368 /// a base register plus a signed 34-bit displacement. Suitable for use by
369 /// PSTXVP and friends.
370 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
371 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
372 }
373
374 // Select an address into a single register.
375 bool SelectAddr(SDValue N, SDValue &Base) {
376 Base = N;
377 return true;
378 }
379
380 bool SelectAddrPCRel(SDValue N, SDValue &Base) {
381 return PPCLowering->SelectAddressPCRel(N, Base);
382 }
383
384 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
385 /// inline asm expressions. It is always correct to compute the value into
386 /// a register. The case of adding a (possibly relocatable) constant to a
387 /// register can be improved, but it is wrong to substitute Reg+Reg for
388 /// Reg in an asm, because the load or store opcode would have to change.
390 InlineAsm::ConstraintCode ConstraintID,
391 std::vector<SDValue> &OutOps) override {
392 switch(ConstraintID) {
393 default:
394 errs() << "ConstraintID: "
395 << InlineAsm::getMemConstraintName(ConstraintID) << "\n";
396 llvm_unreachable("Unexpected asm memory constraint");
403 // We need to make sure that this one operand does not end up in r0
404 // (because we might end up lowering this as 0(%op)).
405 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
406 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
407 SDLoc dl(Op);
408 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
409 SDValue NewOp =
410 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
411 dl, Op.getValueType(),
412 Op, RC), 0);
413
414 OutOps.push_back(NewOp);
415 return false;
416 }
417 return true;
418 }
419
420// Include the pieces autogenerated from the target description.
421#include "PPCGenDAGISel.inc"
422
423private:
424 bool trySETCC(SDNode *N);
425 bool tryFoldSWTestBRCC(SDNode *N);
426 bool trySelectLoopCountIntrinsic(SDNode *N);
427 bool tryAsSingleRLDICL(SDNode *N);
428 bool tryAsSingleRLDCL(SDNode *N);
429 bool tryAsSingleRLDICR(SDNode *N);
430 bool tryAsSingleRLWINM(SDNode *N);
431 bool tryAsSingleRLWINM8(SDNode *N);
432 bool tryAsSingleRLWIMI(SDNode *N);
433 bool tryAsPairOfRLDICL(SDNode *N);
434 bool tryAsSingleRLDIMI(SDNode *N);
435
436 void PeepholePPC64();
437 void PeepholePPC64ZExt();
438 void PeepholeCROps();
439
440 SDValue combineToCMPB(SDNode *N);
441 void foldBoolExts(SDValue &Res, SDNode *&N);
442
443 bool AllUsersSelectZero(SDNode *N);
444 void SwapAllSelectUsers(SDNode *N);
445
446 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
447 void transferMemOperands(SDNode *N, SDNode *Result);
448 };
449
450} // end anonymous namespace
451
452char PPCDAGToDAGISel::ID = 0;
453
454INITIALIZE_PASS(PPCDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
455
456/// getGlobalBaseReg - Output the instructions required to put the
457/// base address to use for accessing globals into a register.
458///
459SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
460 if (!GlobalBaseReg) {
461 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
462 // Insert the set of GlobalBaseReg into the first MBB of the function
463 MachineBasicBlock &FirstMBB = MF->front();
465 const Module *M = MF->getFunction().getParent();
466 DebugLoc dl;
467
468 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
469 if (Subtarget->isTargetELF()) {
470 GlobalBaseReg = PPC::R30;
471 if (!Subtarget->isSecurePlt() &&
472 M->getPICLevel() == PICLevel::SmallPIC) {
473 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
474 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
475 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
476 } else {
477 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
478 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
479 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
480 BuildMI(FirstMBB, MBBI, dl,
481 TII.get(PPC::UpdateGBR), GlobalBaseReg)
482 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
483 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
484 }
485 } else {
487 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
488 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
489 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
490 }
491 } else {
492 // We must ensure that this sequence is dominated by the prologue.
493 // FIXME: This is a bit of a big hammer since we don't get the benefits
494 // of shrink-wrapping whenever we emit this instruction. Considering
495 // this is used in any function where we emit a jump table, this may be
496 // a significant limitation. We should consider inserting this in the
497 // block where it is used and then commoning this sequence up if it
498 // appears in multiple places.
499 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
500 // MovePCtoLR8.
501 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
502 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
503 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
504 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
505 }
506 }
507 return CurDAG->getRegister(GlobalBaseReg,
508 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
509 .getNode();
510}
511
512// Check if a SDValue has the toc-data attribute.
513static bool hasTocDataAttr(SDValue Val) {
514 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
515 if (!GA)
516 return false;
517
518 const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
519 if (!GV)
520 return false;
521
522 if (!GV->hasAttribute("toc-data"))
523 return false;
524 return true;
525}
526
528 const TargetMachine &TM,
529 const SDNode *Node) {
530 // If there isn't an attribute to override the module code model
531 // this will be the effective code model.
532 CodeModel::Model ModuleModel = TM.getCodeModel();
533
534 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Node->getOperand(0));
535 if (!GA)
536 return ModuleModel;
537
538 const GlobalValue *GV = GA->getGlobal();
539 if (!GV)
540 return ModuleModel;
541
542 return Subtarget.getCodeModel(TM, GV);
543}
544
545/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
546/// operand. If so Imm will receive the 32-bit value.
547static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
548 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
549 Imm = N->getAsZExtVal();
550 return true;
551 }
552 return false;
553}
554
555/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
556/// operand. If so Imm will receive the 64-bit value.
557static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
558 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
559 Imm = N->getAsZExtVal();
560 return true;
561 }
562 return false;
563}
564
565// isInt32Immediate - This method tests to see if a constant operand.
566// If so Imm will receive the 32 bit value.
567static bool isInt32Immediate(SDValue N, unsigned &Imm) {
568 return isInt32Immediate(N.getNode(), Imm);
569}
570
571/// isInt64Immediate - This method tests to see if the value is a 64-bit
572/// constant operand. If so Imm will receive the 64-bit value.
573static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
574 return isInt64Immediate(N.getNode(), Imm);
575}
576
577static unsigned getBranchHint(unsigned PCC,
578 const FunctionLoweringInfo &FuncInfo,
579 const SDValue &DestMBB) {
580 assert(isa<BasicBlockSDNode>(DestMBB));
581
582 if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
583
584 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
585 const Instruction *BBTerm = BB->getTerminator();
586
587 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
588
589 const BasicBlock *TBB = BBTerm->getSuccessor(0);
590 const BasicBlock *FBB = BBTerm->getSuccessor(1);
591
592 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
593 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
594
595 // We only want to handle cases which are easy to predict at static time, e.g.
596 // C++ throw statement, that is very likely not taken, or calling never
597 // returned function, e.g. stdlib exit(). So we set Threshold to filter
598 // unwanted cases.
599 //
600 // Below is LLVM branch weight table, we only want to handle case 1, 2
601 //
602 // Case Taken:Nontaken Example
603 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
604 // 2. Invoke-terminating 1:1048575
605 // 3. Coldblock 4:64 __builtin_expect
606 // 4. Loop Branch 124:4 For loop
607 // 5. PH/ZH/FPH 20:12
608 const uint32_t Threshold = 10000;
609
610 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
611 return PPC::BR_NO_HINT;
612
613 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
614 << "::" << BB->getName() << "'\n"
615 << " -> " << TBB->getName() << ": " << TProb << "\n"
616 << " -> " << FBB->getName() << ": " << FProb << "\n");
617
618 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
619
620 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
621 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
622 if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
623 std::swap(TProb, FProb);
624
625 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
626}
627
628// isOpcWithIntImmediate - This method tests to see if the node is a specific
629// opcode and that it has a immediate integer right operand.
630// If so Imm will receive the 32 bit value.
631static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
632 return N->getOpcode() == Opc
633 && isInt32Immediate(N->getOperand(1).getNode(), Imm);
634}
635
636void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) {
637 SDLoc dl(SN);
638 int FI = cast<FrameIndexSDNode>(N)->getIndex();
639 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
640 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
641 if (SN->hasOneUse())
642 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
643 getSmallIPtrImm(Offset, dl));
644 else
645 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
646 getSmallIPtrImm(Offset, dl)));
647}
648
649bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
650 bool isShiftMask, unsigned &SH,
651 unsigned &MB, unsigned &ME) {
652 // Don't even go down this path for i64, since different logic will be
653 // necessary for rldicl/rldicr/rldimi.
654 if (N->getValueType(0) != MVT::i32)
655 return false;
656
657 unsigned Shift = 32;
658 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
659 unsigned Opcode = N->getOpcode();
660 if (N->getNumOperands() != 2 ||
661 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
662 return false;
663
664 if (Opcode == ISD::SHL) {
665 // apply shift left to mask if it comes first
666 if (isShiftMask) Mask = Mask << Shift;
667 // determine which bits are made indeterminant by shift
668 Indeterminant = ~(0xFFFFFFFFu << Shift);
669 } else if (Opcode == ISD::SRL) {
670 // apply shift right to mask if it comes first
671 if (isShiftMask) Mask = Mask >> Shift;
672 // determine which bits are made indeterminant by shift
673 Indeterminant = ~(0xFFFFFFFFu >> Shift);
674 // adjust for the left rotate
675 Shift = 32 - Shift;
676 } else if (Opcode == ISD::ROTL) {
677 Indeterminant = 0;
678 } else {
679 return false;
680 }
681
682 // if the mask doesn't intersect any Indeterminant bits
683 if (Mask && !(Mask & Indeterminant)) {
684 SH = Shift & 31;
685 // make sure the mask is still a mask (wrap arounds may not be)
686 return isRunOfOnes(Mask, MB, ME);
687 }
688 return false;
689}
690
691// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
692// instruction use the thread pointer.
694 assert(
695 Base.getOpcode() == PPCISD::ADD_TLS &&
696 "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
697 const PPCSubtarget &Subtarget =
699 SDValue ADDTLSOp1 = Base.getOperand(0);
700 unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
701
702 // Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
703 //
704 // Although ADD_TLS does not explicitly use the thread pointer
705 // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
706 // instruction will have a relocation specifier, @got@tprel, that is used to
707 // generate a GOT entry. The linker replaces this entry with an offset for a
708 // for a thread local variable, which will be relative to the thread pointer.
709 if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
710 return true;
711 // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
712 // node is produced instead to represent the aforementioned situation.
713 LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
714 if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
715 return true;
716
717 // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
718 // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
719 // later returning it into R3.
720 if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
721 return true;
722
723 // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
724 RegisterSDNode *AddFirstOpReg =
725 dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
726 if (AddFirstOpReg &&
727 AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
728 return true;
729
730 return false;
731}
732
733// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
734// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
735// operation, can be optimized to use an X-Form load or store, allowing the
736// ADD_TLS node to be removed completely.
738
739 // Do not do this transformation at -O0.
740 if (CurDAG->getTarget().getOptLevel() == CodeGenOptLevel::None)
741 return false;
742
743 // In order to perform this optimization inside tryTLSXForm[Load|Store],
744 // Base is expected to be an ADD_TLS node.
745 if (Base.getOpcode() != PPCISD::ADD_TLS)
746 return false;
747 for (auto *ADDTLSUse : Base.getNode()->uses()) {
748 // The optimization to convert the D-Form load/store into its X-Form
749 // counterpart should only occur if the source value offset of the load/
750 // store is 0. This also means that The offset should always be undefined.
751 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
752 if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
753 return false;
754 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
755 if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
756 return false;
757 } else // Don't optimize if there are ADD_TLS users that aren't load/stores.
758 return false;
759 }
760
761 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
762 return false;
763
764 // Does the ADD_TLS node of the load/store use the thread pointer?
765 // If the thread pointer is not used as one of the operands of ADD_TLS,
766 // then this optimization is not valid.
767 return isThreadPointerAcquisitionNode(Base, CurDAG);
768}
769
770bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
771 SDValue Base = ST->getBasePtr();
772 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
773 return false;
774
775 SDLoc dl(ST);
776 EVT MemVT = ST->getMemoryVT();
777 EVT RegVT = ST->getValue().getValueType();
778
779 unsigned Opcode;
780 switch (MemVT.getSimpleVT().SimpleTy) {
781 default:
782 return false;
783 case MVT::i8: {
784 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
785 break;
786 }
787 case MVT::i16: {
788 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
789 break;
790 }
791 case MVT::i32: {
792 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
793 break;
794 }
795 case MVT::i64: {
796 Opcode = PPC::STDXTLS;
797 break;
798 }
799 case MVT::f32: {
800 Opcode = PPC::STFSXTLS;
801 break;
802 }
803 case MVT::f64: {
804 Opcode = PPC::STFDXTLS;
805 break;
806 }
807 }
808 SDValue Chain = ST->getChain();
809 SDVTList VTs = ST->getVTList();
810 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
811 Chain};
812 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
813 transferMemOperands(ST, MN);
814 ReplaceNode(ST, MN);
815 return true;
816}
817
818bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
819 SDValue Base = LD->getBasePtr();
820 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
821 return false;
822
823 SDLoc dl(LD);
824 EVT MemVT = LD->getMemoryVT();
825 EVT RegVT = LD->getValueType(0);
826 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
827 unsigned Opcode;
828 switch (MemVT.getSimpleVT().SimpleTy) {
829 default:
830 return false;
831 case MVT::i8: {
832 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
833 break;
834 }
835 case MVT::i16: {
836 if (RegVT == MVT::i32)
837 Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
838 else
839 Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
840 break;
841 }
842 case MVT::i32: {
843 if (RegVT == MVT::i32)
844 Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
845 else
846 Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
847 break;
848 }
849 case MVT::i64: {
850 Opcode = PPC::LDXTLS;
851 break;
852 }
853 case MVT::f32: {
854 Opcode = PPC::LFSXTLS;
855 break;
856 }
857 case MVT::f64: {
858 Opcode = PPC::LFDXTLS;
859 break;
860 }
861 }
862 SDValue Chain = LD->getChain();
863 SDVTList VTs = LD->getVTList();
864 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
865 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
866 transferMemOperands(LD, MN);
867 ReplaceNode(LD, MN);
868 return true;
869}
870
871/// Turn an or of two masked values into the rotate left word immediate then
872/// mask insert (rlwimi) instruction.
873bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
874 SDValue Op0 = N->getOperand(0);
875 SDValue Op1 = N->getOperand(1);
876 SDLoc dl(N);
877
878 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
879 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
880
881 unsigned TargetMask = LKnown.Zero.getZExtValue();
882 unsigned InsertMask = RKnown.Zero.getZExtValue();
883
884 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
885 unsigned Op0Opc = Op0.getOpcode();
886 unsigned Op1Opc = Op1.getOpcode();
887 unsigned Value, SH = 0;
888 TargetMask = ~TargetMask;
889 InsertMask = ~InsertMask;
890
891 // If the LHS has a foldable shift and the RHS does not, then swap it to the
892 // RHS so that we can fold the shift into the insert.
893 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
894 if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
895 Op0.getOperand(0).getOpcode() == ISD::SRL) {
896 if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
897 Op1.getOperand(0).getOpcode() != ISD::SRL) {
898 std::swap(Op0, Op1);
899 std::swap(Op0Opc, Op1Opc);
900 std::swap(TargetMask, InsertMask);
901 }
902 }
903 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
904 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
905 Op1.getOperand(0).getOpcode() != ISD::SRL) {
906 std::swap(Op0, Op1);
907 std::swap(Op0Opc, Op1Opc);
908 std::swap(TargetMask, InsertMask);
909 }
910 }
911
912 unsigned MB, ME;
913 if (isRunOfOnes(InsertMask, MB, ME)) {
914 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
916 Op1 = Op1.getOperand(0);
917 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
918 }
919 if (Op1Opc == ISD::AND) {
920 // The AND mask might not be a constant, and we need to make sure that
921 // if we're going to fold the masking with the insert, all bits not
922 // know to be zero in the mask are known to be one.
923 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
924 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
925
926 unsigned SHOpc = Op1.getOperand(0).getOpcode();
927 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
929 // Note that Value must be in range here (less than 32) because
930 // otherwise there would not be any bits set in InsertMask.
931 Op1 = Op1.getOperand(0).getOperand(0);
932 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
933 }
934 }
935
936 SH &= 31;
937 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
938 getI32Imm(ME, dl) };
939 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
940 return true;
941 }
942 }
943 return false;
944}
945
946static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
947 unsigned MaxTruncation = 0;
948 // Cannot use range-based for loop here as we need the actual use (i.e. we
949 // need the operand number corresponding to the use). A range-based for
950 // will unbox the use and provide an SDNode*.
951 for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
952 Use != UseEnd; ++Use) {
953 unsigned Opc =
954 Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
955 switch (Opc) {
956 default: return 0;
957 case ISD::TRUNCATE:
958 if (Use->isMachineOpcode())
959 return 0;
960 MaxTruncation =
961 std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
962 continue;
963 case ISD::STORE: {
964 if (Use->isMachineOpcode())
965 return 0;
966 StoreSDNode *STN = cast<StoreSDNode>(*Use);
967 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
968 if (MemVTSize == 64 || Use.getOperandNo() != 0)
969 return 0;
970 MaxTruncation = std::max(MaxTruncation, MemVTSize);
971 continue;
972 }
973 case PPC::STW8:
974 case PPC::STWX8:
975 case PPC::STWU8:
976 case PPC::STWUX8:
977 if (Use.getOperandNo() != 0)
978 return 0;
979 MaxTruncation = std::max(MaxTruncation, 32u);
980 continue;
981 case PPC::STH8:
982 case PPC::STHX8:
983 case PPC::STHU8:
984 case PPC::STHUX8:
985 if (Use.getOperandNo() != 0)
986 return 0;
987 MaxTruncation = std::max(MaxTruncation, 16u);
988 continue;
989 case PPC::STB8:
990 case PPC::STBX8:
991 case PPC::STBU8:
992 case PPC::STBUX8:
993 if (Use.getOperandNo() != 0)
994 return 0;
995 MaxTruncation = std::max(MaxTruncation, 8u);
996 continue;
997 }
998 }
999 return MaxTruncation;
1000}
1001
1002// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1003// zeros and return the number of bits by the left of these consecutive zeros.
1004static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1005 unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
1006 unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
1007 if ((HiTZ + LoLZ) >= Num)
1008 return (32 + HiTZ);
1009 return 0;
1010}
1011
1012// Direct materialization of 64-bit constants by enumerated patterns.
1013static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
1014 uint64_t Imm, unsigned &InstCnt) {
1015 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1016 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1017 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1018 unsigned LO = llvm::countl_one<uint64_t>(Imm);
1019 unsigned Hi32 = Hi_32(Imm);
1020 unsigned Lo32 = Lo_32(Imm);
1021 SDNode *Result = nullptr;
1022 unsigned Shift = 0;
1023
1024 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1025 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1026 };
1027
1028 // Following patterns use 1 instructions to materialize the Imm.
1029 InstCnt = 1;
1030 // 1-1) Patterns : {zeros}{15-bit valve}
1031 // {ones}{15-bit valve}
1032 if (isInt<16>(Imm)) {
1033 SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1034 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1035 }
1036 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1037 // {ones}{15-bit valve}{16 zeros}
1038 if (TZ > 15 && (LZ > 32 || LO > 32))
1039 return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1040 getI32Imm((Imm >> 16) & 0xffff));
1041
1042 // Following patterns use 2 instructions to materialize the Imm.
1043 InstCnt = 2;
1044 assert(LZ < 64 && "Unexpected leading zeros here.");
1045 // Count of ones follwing the leading zeros.
1046 unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
1047 // 2-1) Patterns : {zeros}{31-bit value}
1048 // {ones}{31-bit value}
1049 if (isInt<32>(Imm)) {
1050 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1051 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1052 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1053 return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1054 getI32Imm(Imm & 0xffff));
1055 }
1056 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1057 // {zeros}{15-bit value}{zeros}
1058 // {zeros}{ones}{15-bit value}
1059 // {ones}{15-bit value}{zeros}
1060 // We can take advantage of LI's sign-extension semantics to generate leading
1061 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
1062 if ((LZ + FO + TZ) > 48) {
1063 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1064 getI32Imm((Imm >> TZ) & 0xffff));
1065 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1066 getI32Imm(TZ), getI32Imm(LZ));
1067 }
1068 // 2-3) Pattern : {zeros}{15-bit value}{ones}
1069 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1070 // therefore we can take advantage of LI's sign-extension semantics, and then
1071 // mask them off after rotation.
1072 //
1073 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
1074 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1075 // +------------------------+ +------------------------+
1076 // 63 0 63 0
1077 // Imm (Imm >> (48 - LZ) & 0xffff)
1078 // +----sext-----|--16-bit--+ +clear-|-----------------+
1079 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1080 // +------------------------+ +------------------------+
1081 // 63 0 63 0
1082 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1083 if ((LZ + TO) > 48) {
1084 // Since the immediates with (LZ > 32) have been handled by previous
1085 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1086 // the Imm by a negative value.
1087 assert(LZ <= 32 && "Unexpected shift value.");
1088 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1089 getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1090 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1091 getI32Imm(48 - LZ), getI32Imm(LZ));
1092 }
1093 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1094 // {ones}{15-bit value}{ones}
1095 // We can take advantage of LI's sign-extension semantics to generate leading
1096 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1097 // after rotation.
1098 //
1099 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1100 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1101 // +------------------------+ +------------------------+
1102 // 63 0 63 0
1103 // Imm (Imm >> TO) & 0xffff
1104 // +----sext-----|--16-bit--+ +LZ|---------------------+
1105 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1106 // +------------------------+ +------------------------+
1107 // 63 0 63 0
1108 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1109 if ((LZ + FO + TO) > 48) {
1110 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1111 getI32Imm((Imm >> TO) & 0xffff));
1112 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1113 getI32Imm(TO), getI32Imm(LZ));
1114 }
1115 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1116 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1117 // value, we can use LI for Lo16 without generating leading ones then add the
1118 // Hi16(in Lo32).
1119 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1120 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1121 getI32Imm(Lo32 & 0xffff));
1122 return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1123 getI32Imm(Lo32 >> 16));
1124 }
1125 // 2-6) Patterns : {******}{49 zeros}{******}
1126 // {******}{49 ones}{******}
1127 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1128 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1129 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1130 // it back.
1131 //
1132 // 1) findContiguousZerosAtLeast(Imm, 49)
1133 // +------|--zeros-|------+ +---ones--||---15 bit--+
1134 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1135 // +----------------------+ +----------------------+
1136 // 63 0 63 0
1137 //
1138 // 2) findContiguousZerosAtLeast(~Imm, 49)
1139 // +------|--ones--|------+ +---ones--||---15 bit--+
1140 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1141 // +----------------------+ +----------------------+
1142 // 63 0 63 0
1143 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1144 (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1145 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1146 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1147 getI32Imm(RotImm & 0xffff));
1148 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1149 getI32Imm(Shift), getI32Imm(0));
1150 }
1151 // 2-7) Patterns : High word == Low word
1152 // This may require 2 to 3 instructions, depending on whether Lo32 can be
1153 // materialized in 1 instruction.
1154 if (Hi32 == Lo32) {
1155 // Handle the first 32 bits.
1156 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1157 uint64_t ImmLo16 = Lo32 & 0xffff;
1158 if (isInt<16>(Lo32))
1159 Result =
1160 CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1161 else if (!ImmLo16)
1162 Result =
1163 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1164 else {
1165 InstCnt = 3;
1166 Result =
1167 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1168 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1169 SDValue(Result, 0), getI32Imm(ImmLo16));
1170 }
1171 // Use rldimi to insert the Low word into High word.
1172 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1173 getI32Imm(0)};
1174 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1175 }
1176
1177 // Following patterns use 3 instructions to materialize the Imm.
1178 InstCnt = 3;
1179 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1180 // {zeros}{31-bit value}{zeros}
1181 // {zeros}{ones}{31-bit value}
1182 // {ones}{31-bit value}{zeros}
1183 // We can take advantage of LIS's sign-extension semantics to generate leading
1184 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1185 // ones in both sides after rotation.
1186 if ((LZ + FO + TZ) > 32) {
1187 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1188 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1189 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1190 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1191 getI32Imm((Imm >> TZ) & 0xffff));
1192 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1193 getI32Imm(TZ), getI32Imm(LZ));
1194 }
1195 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1196 // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1197 // value, therefore we can take advantage of LIS's sign-extension semantics,
1198 // add the remaining bits with ORI, and then mask them off after rotation.
1199 // This is similar to Pattern 2-3, please refer to the diagram there.
1200 if ((LZ + TO) > 32) {
1201 // Since the immediates with (LZ > 32) have been handled by previous
1202 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1203 // the Imm by a negative value.
1204 assert(LZ <= 32 && "Unexpected shift value.");
1205 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1206 getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1207 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1208 getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1209 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1210 getI32Imm(32 - LZ), getI32Imm(LZ));
1211 }
1212 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1213 // {ones}{31-bit value}{ones}
1214 // We can take advantage of LIS's sign-extension semantics to generate leading
1215 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1216 // ones in left sides (if required) after rotation.
1217 // This is similar to Pattern 2-4, please refer to the diagram there.
1218 if ((LZ + FO + TO) > 32) {
1219 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1220 getI32Imm((Imm >> (TO + 16)) & 0xffff));
1221 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1222 getI32Imm((Imm >> TO) & 0xffff));
1223 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1224 getI32Imm(TO), getI32Imm(LZ));
1225 }
1226 // 3-4) Patterns : {******}{33 zeros}{******}
1227 // {******}{33 ones}{******}
1228 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1229 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1230 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1231 // rotate it back.
1232 // This is similar to Pattern 2-6, please refer to the diagram there.
1233 if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1234 (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1235 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1236 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1237 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1238 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1239 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1240 getI32Imm(RotImm & 0xffff));
1241 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1242 getI32Imm(Shift), getI32Imm(0));
1243 }
1244
1245 InstCnt = 0;
1246 return nullptr;
1247}
1248
1249// Try to select instructions to generate a 64 bit immediate using prefix as
1250// well as non prefix instructions. The function will return the SDNode
1251// to materialize that constant or it will return nullptr if it does not
1252// find one. The variable InstCnt is set to the number of instructions that
1253// were selected.
1255 uint64_t Imm, unsigned &InstCnt) {
1256 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1257 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1258 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1259 unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1260 unsigned Hi32 = Hi_32(Imm);
1261 unsigned Lo32 = Lo_32(Imm);
1262
1263 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1264 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1265 };
1266
1267 auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1268 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1269 };
1270
1271 // Following patterns use 1 instruction to materialize Imm.
1272 InstCnt = 1;
1273
1274 // The pli instruction can materialize up to 34 bits directly.
1275 // If a constant fits within 34-bits, emit the pli instruction here directly.
1276 if (isInt<34>(Imm))
1277 return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1278 CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1279
1280 // Require at least two instructions.
1281 InstCnt = 2;
1282 SDNode *Result = nullptr;
1283 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1284 // {zeros}{33-bit value}{zeros}
1285 // {zeros}{ones}{33-bit value}
1286 // {ones}{33-bit value}{zeros}
1287 // We can take advantage of PLI's sign-extension semantics to generate leading
1288 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1289 if ((LZ + FO + TZ) > 30) {
1290 APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1291 APInt Extended = SignedInt34.sext(64);
1292 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1293 getI64Imm(*Extended.getRawData()));
1294 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1295 getI32Imm(TZ), getI32Imm(LZ));
1296 }
1297 // Pattern : {zeros}{33-bit value}{ones}
1298 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1299 // therefore we can take advantage of PLI's sign-extension semantics, and then
1300 // mask them off after rotation.
1301 //
1302 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1303 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1304 // +------------------------+ +------------------------+
1305 // 63 0 63 0
1306 //
1307 // +----sext-----|--34-bit--+ +clear-|-----------------+
1308 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1309 // +------------------------+ +------------------------+
1310 // 63 0 63 0
1311 if ((LZ + TO) > 30) {
1312 APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1313 APInt Extended = SignedInt34.sext(64);
1314 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1315 getI64Imm(*Extended.getRawData()));
1316 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1317 getI32Imm(30 - LZ), getI32Imm(LZ));
1318 }
1319 // Patterns : {zeros}{ones}{33-bit value}{ones}
1320 // {ones}{33-bit value}{ones}
1321 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1322 // generate leading ones, and then use RLDICL to mask off the ones in left
1323 // sides (if required) after rotation.
1324 if ((LZ + FO + TO) > 30) {
1325 APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1326 APInt Extended = SignedInt34.sext(64);
1327 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1328 getI64Imm(*Extended.getRawData()));
1329 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1330 getI32Imm(TO), getI32Imm(LZ));
1331 }
1332 // Patterns : {******}{31 zeros}{******}
1333 // : {******}{31 ones}{******}
1334 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1335 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1336 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1337 //
1338 // +------|--ones--|------+ +---ones--||---33 bit--+
1339 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1340 // +----------------------+ +----------------------+
1341 // 63 0 63 0
1342 for (unsigned Shift = 0; Shift < 63; ++Shift) {
1343 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1344 if (isInt<34>(RotImm)) {
1345 Result =
1346 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1347 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1348 SDValue(Result, 0), getI32Imm(Shift),
1349 getI32Imm(0));
1350 }
1351 }
1352
1353 // Patterns : High word == Low word
1354 // This is basically a splat of a 32 bit immediate.
1355 if (Hi32 == Lo32) {
1356 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1357 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1358 getI32Imm(0)};
1359 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1360 }
1361
1362 InstCnt = 3;
1363 // Catch-all
1364 // This pattern can form any 64 bit immediate in 3 instructions.
1365 SDNode *ResultHi =
1366 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1367 SDNode *ResultLo =
1368 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1369 SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1370 getI32Imm(0)};
1371 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1372}
1373
1374static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1375 unsigned *InstCnt = nullptr) {
1376 unsigned InstCntDirect = 0;
1377 // No more than 3 instructions are used if we can select the i64 immediate
1378 // directly.
1379 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1380
1381 const PPCSubtarget &Subtarget =
1383
1384 // If we have prefixed instructions and there is a chance we can
1385 // materialize the constant with fewer prefixed instructions than
1386 // non-prefixed, try that.
1387 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1388 unsigned InstCntDirectP = 0;
1389 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1390 // Use the prefix case in either of two cases:
1391 // 1) We have no result from the non-prefix case to use.
1392 // 2) The non-prefix case uses more instructions than the prefix case.
1393 // If the prefix and non-prefix cases use the same number of instructions
1394 // we will prefer the non-prefix case.
1395 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1396 if (InstCnt)
1397 *InstCnt = InstCntDirectP;
1398 return ResultP;
1399 }
1400 }
1401
1402 if (Result) {
1403 if (InstCnt)
1404 *InstCnt = InstCntDirect;
1405 return Result;
1406 }
1407 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1408 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1409 };
1410
1411 uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
1412 uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
1413
1414 // Try to use 4 instructions to materialize the immediate which is "almost" a
1415 // splat of a 32 bit immediate.
1416 if (Hi16OfLo32 && Lo16OfLo32) {
1417 uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
1418 uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
1419 bool IsSelected = false;
1420
1421 auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1422 SDNode *Result =
1423 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1424 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1425 SDValue(Result, 0), getI32Imm(Lo16));
1426 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1427 getI32Imm(0)};
1428 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1429 };
1430
1431 if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1432 IsSelected = true;
1433 Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1434 // Modify Hi16OfHi32.
1435 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
1436 getI32Imm(0)};
1437 Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1438 } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1439 IsSelected = true;
1440 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1441 // Modify Lo16OfLo32.
1442 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1443 getI32Imm(16), getI32Imm(31)};
1444 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1445 } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1446 IsSelected = true;
1447 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1448 // Modify Hi16OfLo32.
1449 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1450 getI32Imm(0), getI32Imm(15)};
1451 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1452 }
1453 if (IsSelected == true) {
1454 if (InstCnt)
1455 *InstCnt = 4;
1456 return Result;
1457 }
1458 }
1459
1460 // Handle the upper 32 bit value.
1461 Result =
1462 selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1463 // Add in the last bits as required.
1464 if (Hi16OfLo32) {
1465 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1466 SDValue(Result, 0), getI32Imm(Hi16OfLo32));
1467 ++InstCntDirect;
1468 }
1469 if (Lo16OfLo32) {
1470 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1471 getI32Imm(Lo16OfLo32));
1472 ++InstCntDirect;
1473 }
1474 if (InstCnt)
1475 *InstCnt = InstCntDirect;
1476 return Result;
1477}
1478
1479// Select a 64-bit constant.
1481 SDLoc dl(N);
1482
1483 // Get 64 bit value.
1484 int64_t Imm = N->getAsZExtVal();
1485 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1486 uint64_t SextImm = SignExtend64(Imm, MinSize);
1487 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1488 if (isInt<16>(SextImm))
1489 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1490 }
1491 return selectI64Imm(CurDAG, dl, Imm);
1492}
1493
1494namespace {
1495
1496class BitPermutationSelector {
1497 struct ValueBit {
1498 SDValue V;
1499
1500 // The bit number in the value, using a convention where bit 0 is the
1501 // lowest-order bit.
1502 unsigned Idx;
1503
1504 // ConstZero means a bit we need to mask off.
1505 // Variable is a bit comes from an input variable.
1506 // VariableKnownToBeZero is also a bit comes from an input variable,
1507 // but it is known to be already zero. So we do not need to mask them.
1508 enum Kind {
1509 ConstZero,
1510 Variable,
1511 VariableKnownToBeZero
1512 } K;
1513
1514 ValueBit(SDValue V, unsigned I, Kind K = Variable)
1515 : V(V), Idx(I), K(K) {}
1516 ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1517
1518 bool isZero() const {
1519 return K == ConstZero || K == VariableKnownToBeZero;
1520 }
1521
1522 bool hasValue() const {
1523 return K == Variable || K == VariableKnownToBeZero;
1524 }
1525
1526 SDValue getValue() const {
1527 assert(hasValue() && "Cannot get the value of a constant bit");
1528 return V;
1529 }
1530
1531 unsigned getValueBitIndex() const {
1532 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1533 return Idx;
1534 }
1535 };
1536
1537 // A bit group has the same underlying value and the same rotate factor.
1538 struct BitGroup {
1539 SDValue V;
1540 unsigned RLAmt;
1541 unsigned StartIdx, EndIdx;
1542
1543 // This rotation amount assumes that the lower 32 bits of the quantity are
1544 // replicated in the high 32 bits by the rotation operator (which is done
1545 // by rlwinm and friends in 64-bit mode).
1546 bool Repl32;
1547 // Did converting to Repl32 == true change the rotation factor? If it did,
1548 // it decreased it by 32.
1549 bool Repl32CR;
1550 // Was this group coalesced after setting Repl32 to true?
1551 bool Repl32Coalesced;
1552
1553 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1554 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1555 Repl32Coalesced(false) {
1556 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1557 << " [" << S << ", " << E << "]\n");
1558 }
1559 };
1560
1561 // Information on each (Value, RLAmt) pair (like the number of groups
1562 // associated with each) used to choose the lowering method.
1563 struct ValueRotInfo {
1564 SDValue V;
1565 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1566 unsigned NumGroups = 0;
1567 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1568 bool Repl32 = false;
1569
1570 ValueRotInfo() = default;
1571
1572 // For sorting (in reverse order) by NumGroups, and then by
1573 // FirstGroupStartIdx.
1574 bool operator < (const ValueRotInfo &Other) const {
1575 // We need to sort so that the non-Repl32 come first because, when we're
1576 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1577 // masking operation.
1578 if (Repl32 < Other.Repl32)
1579 return true;
1580 else if (Repl32 > Other.Repl32)
1581 return false;
1582 else if (NumGroups > Other.NumGroups)
1583 return true;
1584 else if (NumGroups < Other.NumGroups)
1585 return false;
1586 else if (RLAmt == 0 && Other.RLAmt != 0)
1587 return true;
1588 else if (RLAmt != 0 && Other.RLAmt == 0)
1589 return false;
1590 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1591 return true;
1592 return false;
1593 }
1594 };
1595
1596 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1597 using ValueBitsMemoizer =
1599 ValueBitsMemoizer Memoizer;
1600
1601 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1602 // The bool is true if something interesting was deduced, otherwise if we're
1603 // providing only a generic representation of V (or something else likewise
1604 // uninteresting for instruction selection) through the SmallVector.
1605 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1606 unsigned NumBits) {
1607 auto &ValueEntry = Memoizer[V];
1608 if (ValueEntry)
1609 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1610 ValueEntry.reset(new ValueBitsMemoizedValue());
1611 bool &Interesting = ValueEntry->first;
1612 SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1613 Bits.resize(NumBits);
1614
1615 switch (V.getOpcode()) {
1616 default: break;
1617 case ISD::ROTL:
1618 if (isa<ConstantSDNode>(V.getOperand(1))) {
1619 assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1620 unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
1621
1622 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1623
1624 for (unsigned i = 0; i < NumBits; ++i)
1625 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1626
1627 return std::make_pair(Interesting = true, &Bits);
1628 }
1629 break;
1630 case ISD::SHL:
1631 case PPCISD::SHL:
1632 if (isa<ConstantSDNode>(V.getOperand(1))) {
1633 // sld takes 7 bits, slw takes 6.
1634 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1635
1636 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1637
1638 if (ShiftAmt >= NumBits) {
1639 for (unsigned i = 0; i < NumBits; ++i)
1640 Bits[i] = ValueBit(ValueBit::ConstZero);
1641 } else {
1642 for (unsigned i = ShiftAmt; i < NumBits; ++i)
1643 Bits[i] = LHSBits[i - ShiftAmt];
1644 for (unsigned i = 0; i < ShiftAmt; ++i)
1645 Bits[i] = ValueBit(ValueBit::ConstZero);
1646 }
1647
1648 return std::make_pair(Interesting = true, &Bits);
1649 }
1650 break;
1651 case ISD::SRL:
1652 case PPCISD::SRL:
1653 if (isa<ConstantSDNode>(V.getOperand(1))) {
1654 // srd takes lowest 7 bits, srw takes 6.
1655 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1656
1657 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1658
1659 if (ShiftAmt >= NumBits) {
1660 for (unsigned i = 0; i < NumBits; ++i)
1661 Bits[i] = ValueBit(ValueBit::ConstZero);
1662 } else {
1663 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1664 Bits[i] = LHSBits[i + ShiftAmt];
1665 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1666 Bits[i] = ValueBit(ValueBit::ConstZero);
1667 }
1668
1669 return std::make_pair(Interesting = true, &Bits);
1670 }
1671 break;
1672 case ISD::AND:
1673 if (isa<ConstantSDNode>(V.getOperand(1))) {
1674 uint64_t Mask = V.getConstantOperandVal(1);
1675
1676 const SmallVector<ValueBit, 64> *LHSBits;
1677 // Mark this as interesting, only if the LHS was also interesting. This
1678 // prevents the overall procedure from matching a single immediate 'and'
1679 // (which is non-optimal because such an and might be folded with other
1680 // things if we don't select it here).
1681 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1682
1683 for (unsigned i = 0; i < NumBits; ++i)
1684 if (((Mask >> i) & 1) == 1)
1685 Bits[i] = (*LHSBits)[i];
1686 else {
1687 // AND instruction masks this bit. If the input is already zero,
1688 // we have nothing to do here. Otherwise, make the bit ConstZero.
1689 if ((*LHSBits)[i].isZero())
1690 Bits[i] = (*LHSBits)[i];
1691 else
1692 Bits[i] = ValueBit(ValueBit::ConstZero);
1693 }
1694
1695 return std::make_pair(Interesting, &Bits);
1696 }
1697 break;
1698 case ISD::OR: {
1699 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1700 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1701
1702 bool AllDisjoint = true;
1703 SDValue LastVal = SDValue();
1704 unsigned LastIdx = 0;
1705 for (unsigned i = 0; i < NumBits; ++i) {
1706 if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1707 // If both inputs are known to be zero and one is ConstZero and
1708 // another is VariableKnownToBeZero, we can select whichever
1709 // we like. To minimize the number of bit groups, we select
1710 // VariableKnownToBeZero if this bit is the next bit of the same
1711 // input variable from the previous bit. Otherwise, we select
1712 // ConstZero.
1713 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1714 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1715 Bits[i] = LHSBits[i];
1716 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1717 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1718 Bits[i] = RHSBits[i];
1719 else
1720 Bits[i] = ValueBit(ValueBit::ConstZero);
1721 }
1722 else if (LHSBits[i].isZero())
1723 Bits[i] = RHSBits[i];
1724 else if (RHSBits[i].isZero())
1725 Bits[i] = LHSBits[i];
1726 else {
1727 AllDisjoint = false;
1728 break;
1729 }
1730 // We remember the value and bit index of this bit.
1731 if (Bits[i].hasValue()) {
1732 LastVal = Bits[i].getValue();
1733 LastIdx = Bits[i].getValueBitIndex();
1734 }
1735 else {
1736 if (LastVal) LastVal = SDValue();
1737 LastIdx = 0;
1738 }
1739 }
1740
1741 if (!AllDisjoint)
1742 break;
1743
1744 return std::make_pair(Interesting = true, &Bits);
1745 }
1746 case ISD::ZERO_EXTEND: {
1747 // We support only the case with zero extension from i32 to i64 so far.
1748 if (V.getValueType() != MVT::i64 ||
1749 V.getOperand(0).getValueType() != MVT::i32)
1750 break;
1751
1752 const SmallVector<ValueBit, 64> *LHSBits;
1753 const unsigned NumOperandBits = 32;
1754 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1755 NumOperandBits);
1756
1757 for (unsigned i = 0; i < NumOperandBits; ++i)
1758 Bits[i] = (*LHSBits)[i];
1759
1760 for (unsigned i = NumOperandBits; i < NumBits; ++i)
1761 Bits[i] = ValueBit(ValueBit::ConstZero);
1762
1763 return std::make_pair(Interesting, &Bits);
1764 }
1765 case ISD::TRUNCATE: {
1766 EVT FromType = V.getOperand(0).getValueType();
1767 EVT ToType = V.getValueType();
1768 // We support only the case with truncate from i64 to i32.
1769 if (FromType != MVT::i64 || ToType != MVT::i32)
1770 break;
1771 const unsigned NumAllBits = FromType.getSizeInBits();
1773 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1774 NumAllBits);
1775 const unsigned NumValidBits = ToType.getSizeInBits();
1776
1777 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1778 // So, we cannot include this truncate.
1779 bool UseUpper32bit = false;
1780 for (unsigned i = 0; i < NumValidBits; ++i)
1781 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1782 UseUpper32bit = true;
1783 break;
1784 }
1785 if (UseUpper32bit)
1786 break;
1787
1788 for (unsigned i = 0; i < NumValidBits; ++i)
1789 Bits[i] = (*InBits)[i];
1790
1791 return std::make_pair(Interesting, &Bits);
1792 }
1793 case ISD::AssertZext: {
1794 // For AssertZext, we look through the operand and
1795 // mark the bits known to be zero.
1796 const SmallVector<ValueBit, 64> *LHSBits;
1797 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1798 NumBits);
1799
1800 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1801 const unsigned NumValidBits = FromType.getSizeInBits();
1802 for (unsigned i = 0; i < NumValidBits; ++i)
1803 Bits[i] = (*LHSBits)[i];
1804
1805 // These bits are known to be zero but the AssertZext may be from a value
1806 // that already has some constant zero bits (i.e. from a masking and).
1807 for (unsigned i = NumValidBits; i < NumBits; ++i)
1808 Bits[i] = (*LHSBits)[i].hasValue()
1809 ? ValueBit((*LHSBits)[i].getValue(),
1810 (*LHSBits)[i].getValueBitIndex(),
1811 ValueBit::VariableKnownToBeZero)
1812 : ValueBit(ValueBit::ConstZero);
1813
1814 return std::make_pair(Interesting, &Bits);
1815 }
1816 case ISD::LOAD:
1817 LoadSDNode *LD = cast<LoadSDNode>(V);
1818 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1819 EVT VT = LD->getMemoryVT();
1820 const unsigned NumValidBits = VT.getSizeInBits();
1821
1822 for (unsigned i = 0; i < NumValidBits; ++i)
1823 Bits[i] = ValueBit(V, i);
1824
1825 // These bits are known to be zero.
1826 for (unsigned i = NumValidBits; i < NumBits; ++i)
1827 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1828
1829 // Zero-extending load itself cannot be optimized. So, it is not
1830 // interesting by itself though it gives useful information.
1831 return std::make_pair(Interesting = false, &Bits);
1832 }
1833 break;
1834 }
1835
1836 for (unsigned i = 0; i < NumBits; ++i)
1837 Bits[i] = ValueBit(V, i);
1838
1839 return std::make_pair(Interesting = false, &Bits);
1840 }
1841
1842 // For each value (except the constant ones), compute the left-rotate amount
1843 // to get it from its original to final position.
1844 void computeRotationAmounts() {
1845 NeedMask = false;
1846 RLAmt.resize(Bits.size());
1847 for (unsigned i = 0; i < Bits.size(); ++i)
1848 if (Bits[i].hasValue()) {
1849 unsigned VBI = Bits[i].getValueBitIndex();
1850 if (i >= VBI)
1851 RLAmt[i] = i - VBI;
1852 else
1853 RLAmt[i] = Bits.size() - (VBI - i);
1854 } else if (Bits[i].isZero()) {
1855 NeedMask = true;
1856 RLAmt[i] = UINT32_MAX;
1857 } else {
1858 llvm_unreachable("Unknown value bit type");
1859 }
1860 }
1861
1862 // Collect groups of consecutive bits with the same underlying value and
1863 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1864 // they break up groups.
1865 void collectBitGroups(bool LateMask) {
1866 BitGroups.clear();
1867
1868 unsigned LastRLAmt = RLAmt[0];
1869 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1870 unsigned LastGroupStartIdx = 0;
1871 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1872 for (unsigned i = 1; i < Bits.size(); ++i) {
1873 unsigned ThisRLAmt = RLAmt[i];
1874 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1875 if (LateMask && !ThisValue) {
1876 ThisValue = LastValue;
1877 ThisRLAmt = LastRLAmt;
1878 // If we're doing late masking, then the first bit group always starts
1879 // at zero (even if the first bits were zero).
1880 if (BitGroups.empty())
1881 LastGroupStartIdx = 0;
1882 }
1883
1884 // If this bit is known to be zero and the current group is a bit group
1885 // of zeros, we do not need to terminate the current bit group even the
1886 // Value or RLAmt does not match here. Instead, we terminate this group
1887 // when the first non-zero bit appears later.
1888 if (IsGroupOfZeros && Bits[i].isZero())
1889 continue;
1890
1891 // If this bit has the same underlying value and the same rotate factor as
1892 // the last one, then they're part of the same group.
1893 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1894 // We cannot continue the current group if this bits is not known to
1895 // be zero in a bit group of zeros.
1896 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1897 continue;
1898
1899 if (LastValue.getNode())
1900 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1901 i-1));
1902 LastRLAmt = ThisRLAmt;
1903 LastValue = ThisValue;
1904 LastGroupStartIdx = i;
1905 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1906 }
1907 if (LastValue.getNode())
1908 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1909 Bits.size()-1));
1910
1911 if (BitGroups.empty())
1912 return;
1913
1914 // We might be able to combine the first and last groups.
1915 if (BitGroups.size() > 1) {
1916 // If the first and last groups are the same, then remove the first group
1917 // in favor of the last group, making the ending index of the last group
1918 // equal to the ending index of the to-be-removed first group.
1919 if (BitGroups[0].StartIdx == 0 &&
1920 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1921 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1922 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1923 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1924 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1925 BitGroups.erase(BitGroups.begin());
1926 }
1927 }
1928 }
1929
1930 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1931 // associated with each. If the number of groups are same, we prefer a group
1932 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1933 // instruction. If there is a degeneracy, pick the one that occurs
1934 // first (in the final value).
1935 void collectValueRotInfo() {
1936 ValueRots.clear();
1937
1938 for (auto &BG : BitGroups) {
1939 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1940 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1941 VRI.V = BG.V;
1942 VRI.RLAmt = BG.RLAmt;
1943 VRI.Repl32 = BG.Repl32;
1944 VRI.NumGroups += 1;
1945 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1946 }
1947
1948 // Now that we've collected the various ValueRotInfo instances, we need to
1949 // sort them.
1950 ValueRotsVec.clear();
1951 for (auto &I : ValueRots) {
1952 ValueRotsVec.push_back(I.second);
1953 }
1954 llvm::sort(ValueRotsVec);
1955 }
1956
1957 // In 64-bit mode, rlwinm and friends have a rotation operator that
1958 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1959 // indices of these instructions can only be in the lower 32 bits, so they
1960 // can only represent some 64-bit bit groups. However, when they can be used,
1961 // the 32-bit replication can be used to represent, as a single bit group,
1962 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1963 // groups when possible. Returns true if any of the bit groups were
1964 // converted.
1965 void assignRepl32BitGroups() {
1966 // If we have bits like this:
1967 //
1968 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1969 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1970 // Groups: | RLAmt = 8 | RLAmt = 40 |
1971 //
1972 // But, making use of a 32-bit operation that replicates the low-order 32
1973 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1974 // of 8.
1975
1976 auto IsAllLow32 = [this](BitGroup & BG) {
1977 if (BG.StartIdx <= BG.EndIdx) {
1978 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1979 if (!Bits[i].hasValue())
1980 continue;
1981 if (Bits[i].getValueBitIndex() >= 32)
1982 return false;
1983 }
1984 } else {
1985 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1986 if (!Bits[i].hasValue())
1987 continue;
1988 if (Bits[i].getValueBitIndex() >= 32)
1989 return false;
1990 }
1991 for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1992 if (!Bits[i].hasValue())
1993 continue;
1994 if (Bits[i].getValueBitIndex() >= 32)
1995 return false;
1996 }
1997 }
1998
1999 return true;
2000 };
2001
2002 for (auto &BG : BitGroups) {
2003 // If this bit group has RLAmt of 0 and will not be merged with
2004 // another bit group, we don't benefit from Repl32. We don't mark
2005 // such group to give more freedom for later instruction selection.
2006 if (BG.RLAmt == 0) {
2007 auto PotentiallyMerged = [this](BitGroup & BG) {
2008 for (auto &BG2 : BitGroups)
2009 if (&BG != &BG2 && BG.V == BG2.V &&
2010 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
2011 return true;
2012 return false;
2013 };
2014 if (!PotentiallyMerged(BG))
2015 continue;
2016 }
2017 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2018 if (IsAllLow32(BG)) {
2019 if (BG.RLAmt >= 32) {
2020 BG.RLAmt -= 32;
2021 BG.Repl32CR = true;
2022 }
2023
2024 BG.Repl32 = true;
2025
2026 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2027 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2028 << BG.StartIdx << ", " << BG.EndIdx << "]\n");
2029 }
2030 }
2031 }
2032
2033 // Now walk through the bit groups, consolidating where possible.
2034 for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2035 // We might want to remove this bit group by merging it with the previous
2036 // group (which might be the ending group).
2037 auto IP = (I == BitGroups.begin()) ?
2038 std::prev(BitGroups.end()) : std::prev(I);
2039 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2040 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
2041
2042 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2043 << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2044 << I->StartIdx << ", " << I->EndIdx
2045 << "] with group with range [" << IP->StartIdx << ", "
2046 << IP->EndIdx << "]\n");
2047
2048 IP->EndIdx = I->EndIdx;
2049 IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
2050 IP->Repl32Coalesced = true;
2051 I = BitGroups.erase(I);
2052 continue;
2053 } else {
2054 // There is a special case worth handling: If there is a single group
2055 // covering the entire upper 32 bits, and it can be merged with both
2056 // the next and previous groups (which might be the same group), then
2057 // do so. If it is the same group (so there will be only one group in
2058 // total), then we need to reverse the order of the range so that it
2059 // covers the entire 64 bits.
2060 if (I->StartIdx == 32 && I->EndIdx == 63) {
2061 assert(std::next(I) == BitGroups.end() &&
2062 "bit group ends at index 63 but there is another?");
2063 auto IN = BitGroups.begin();
2064
2065 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2066 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
2067 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
2068 IsAllLow32(*I)) {
2069
2070 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2071 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2072 << ", " << I->EndIdx
2073 << "] with 32-bit replicated groups with ranges ["
2074 << IP->StartIdx << ", " << IP->EndIdx << "] and ["
2075 << IN->StartIdx << ", " << IN->EndIdx << "]\n");
2076
2077 if (IP == IN) {
2078 // There is only one other group; change it to cover the whole
2079 // range (backward, so that it can still be Repl32 but cover the
2080 // whole 64-bit range).
2081 IP->StartIdx = 31;
2082 IP->EndIdx = 30;
2083 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
2084 IP->Repl32Coalesced = true;
2085 I = BitGroups.erase(I);
2086 } else {
2087 // There are two separate groups, one before this group and one
2088 // after us (at the beginning). We're going to remove this group,
2089 // but also the group at the very beginning.
2090 IP->EndIdx = IN->EndIdx;
2091 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
2092 IP->Repl32Coalesced = true;
2093 I = BitGroups.erase(I);
2094 BitGroups.erase(BitGroups.begin());
2095 }
2096
2097 // This must be the last group in the vector (and we might have
2098 // just invalidated the iterator above), so break here.
2099 break;
2100 }
2101 }
2102 }
2103
2104 ++I;
2105 }
2106 }
2107
2108 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2109 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2110 }
2111
2112 uint64_t getZerosMask() {
2113 uint64_t Mask = 0;
2114 for (unsigned i = 0; i < Bits.size(); ++i) {
2115 if (Bits[i].hasValue())
2116 continue;
2117 Mask |= (UINT64_C(1) << i);
2118 }
2119
2120 return ~Mask;
2121 }
2122
2123 // This method extends an input value to 64 bit if input is 32-bit integer.
2124 // While selecting instructions in BitPermutationSelector in 64-bit mode,
2125 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2126 // In such case, we extend it to 64 bit to be consistent with other values.
2127 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2128 if (V.getValueSizeInBits() == 64)
2129 return V;
2130
2131 assert(V.getValueSizeInBits() == 32);
2132 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2133 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2134 MVT::i64), 0);
2135 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2136 MVT::i64, ImDef, V,
2137 SubRegIdx), 0);
2138 return ExtVal;
2139 }
2140
2141 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2142 if (V.getValueSizeInBits() == 32)
2143 return V;
2144
2145 assert(V.getValueSizeInBits() == 64);
2146 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2147 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2148 MVT::i32, V, SubRegIdx), 0);
2149 return SubVal;
2150 }
2151
2152 // Depending on the number of groups for a particular value, it might be
2153 // better to rotate, mask explicitly (using andi/andis), and then or the
2154 // result. Select this part of the result first.
2155 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2157 return;
2158
2159 for (ValueRotInfo &VRI : ValueRotsVec) {
2160 unsigned Mask = 0;
2161 for (unsigned i = 0; i < Bits.size(); ++i) {
2162 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2163 continue;
2164 if (RLAmt[i] != VRI.RLAmt)
2165 continue;
2166 Mask |= (1u << i);
2167 }
2168
2169 // Compute the masks for andi/andis that would be necessary.
2170 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2171 assert((ANDIMask != 0 || ANDISMask != 0) &&
2172 "No set bits in mask for value bit groups");
2173 bool NeedsRotate = VRI.RLAmt != 0;
2174
2175 // We're trying to minimize the number of instructions. If we have one
2176 // group, using one of andi/andis can break even. If we have three
2177 // groups, we can use both andi and andis and break even (to use both
2178 // andi and andis we also need to or the results together). We need four
2179 // groups if we also need to rotate. To use andi/andis we need to do more
2180 // than break even because rotate-and-mask instructions tend to be easier
2181 // to schedule.
2182
2183 // FIXME: We've biased here against using andi/andis, which is right for
2184 // POWER cores, but not optimal everywhere. For example, on the A2,
2185 // andi/andis have single-cycle latency whereas the rotate-and-mask
2186 // instructions take two cycles, and it would be better to bias toward
2187 // andi/andis in break-even cases.
2188
2189 unsigned NumAndInsts = (unsigned) NeedsRotate +
2190 (unsigned) (ANDIMask != 0) +
2191 (unsigned) (ANDISMask != 0) +
2192 (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2193 (unsigned) (bool) Res;
2194
2195 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2196 << " RL: " << VRI.RLAmt << ":"
2197 << "\n\t\t\tisel using masking: " << NumAndInsts
2198 << " using rotates: " << VRI.NumGroups << "\n");
2199
2200 if (NumAndInsts >= VRI.NumGroups)
2201 continue;
2202
2203 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2204
2205 if (InstCnt) *InstCnt += NumAndInsts;
2206
2207 SDValue VRot;
2208 if (VRI.RLAmt) {
2209 SDValue Ops[] =
2210 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2211 getI32Imm(0, dl), getI32Imm(31, dl) };
2212 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2213 Ops), 0);
2214 } else {
2215 VRot = TruncateToInt32(VRI.V, dl);
2216 }
2217
2218 SDValue ANDIVal, ANDISVal;
2219 if (ANDIMask != 0)
2220 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2221 VRot, getI32Imm(ANDIMask, dl)),
2222 0);
2223 if (ANDISMask != 0)
2224 ANDISVal =
2225 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2226 getI32Imm(ANDISMask, dl)),
2227 0);
2228
2229 SDValue TotalVal;
2230 if (!ANDIVal)
2231 TotalVal = ANDISVal;
2232 else if (!ANDISVal)
2233 TotalVal = ANDIVal;
2234 else
2235 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2236 ANDIVal, ANDISVal), 0);
2237
2238 if (!Res)
2239 Res = TotalVal;
2240 else
2241 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2242 Res, TotalVal), 0);
2243
2244 // Now, remove all groups with this underlying value and rotation
2245 // factor.
2246 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2247 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2248 });
2249 }
2250 }
2251
2252 // Instruction selection for the 32-bit case.
2253 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2254 SDLoc dl(N);
2255 SDValue Res;
2256
2257 if (InstCnt) *InstCnt = 0;
2258
2259 // Take care of cases that should use andi/andis first.
2260 SelectAndParts32(dl, Res, InstCnt);
2261
2262 // If we've not yet selected a 'starting' instruction, and we have no zeros
2263 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2264 // number of groups), and start with this rotated value.
2265 if ((!NeedMask || LateMask) && !Res) {
2266 ValueRotInfo &VRI = ValueRotsVec[0];
2267 if (VRI.RLAmt) {
2268 if (InstCnt) *InstCnt += 1;
2269 SDValue Ops[] =
2270 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2271 getI32Imm(0, dl), getI32Imm(31, dl) };
2272 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2273 0);
2274 } else {
2275 Res = TruncateToInt32(VRI.V, dl);
2276 }
2277
2278 // Now, remove all groups with this underlying value and rotation factor.
2279 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2280 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2281 });
2282 }
2283
2284 if (InstCnt) *InstCnt += BitGroups.size();
2285
2286 // Insert the other groups (one at a time).
2287 for (auto &BG : BitGroups) {
2288 if (!Res) {
2289 SDValue Ops[] =
2290 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2291 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2292 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2293 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2294 } else {
2295 SDValue Ops[] =
2296 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2297 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2298 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2299 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2300 }
2301 }
2302
2303 if (LateMask) {
2304 unsigned Mask = (unsigned) getZerosMask();
2305
2306 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2307 assert((ANDIMask != 0 || ANDISMask != 0) &&
2308 "No set bits in zeros mask?");
2309
2310 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2311 (unsigned) (ANDISMask != 0) +
2312 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2313
2314 SDValue ANDIVal, ANDISVal;
2315 if (ANDIMask != 0)
2316 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2317 Res, getI32Imm(ANDIMask, dl)),
2318 0);
2319 if (ANDISMask != 0)
2320 ANDISVal =
2321 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2322 getI32Imm(ANDISMask, dl)),
2323 0);
2324
2325 if (!ANDIVal)
2326 Res = ANDISVal;
2327 else if (!ANDISVal)
2328 Res = ANDIVal;
2329 else
2330 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2331 ANDIVal, ANDISVal), 0);
2332 }
2333
2334 return Res.getNode();
2335 }
2336
2337 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2338 unsigned MaskStart, unsigned MaskEnd,
2339 bool IsIns) {
2340 // In the notation used by the instructions, 'start' and 'end' are reversed
2341 // because bits are counted from high to low order.
2342 unsigned InstMaskStart = 64 - MaskEnd - 1,
2343 InstMaskEnd = 64 - MaskStart - 1;
2344
2345 if (Repl32)
2346 return 1;
2347
2348 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2349 InstMaskEnd == 63 - RLAmt)
2350 return 1;
2351
2352 return 2;
2353 }
2354
2355 // For 64-bit values, not all combinations of rotates and masks are
2356 // available. Produce one if it is available.
2357 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2358 bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2359 unsigned *InstCnt = nullptr) {
2360 // In the notation used by the instructions, 'start' and 'end' are reversed
2361 // because bits are counted from high to low order.
2362 unsigned InstMaskStart = 64 - MaskEnd - 1,
2363 InstMaskEnd = 64 - MaskStart - 1;
2364
2365 if (InstCnt) *InstCnt += 1;
2366
2367 if (Repl32) {
2368 // This rotation amount assumes that the lower 32 bits of the quantity
2369 // are replicated in the high 32 bits by the rotation operator (which is
2370 // done by rlwinm and friends).
2371 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2372 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2373 SDValue Ops[] =
2374 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2375 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2376 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2377 Ops), 0);
2378 }
2379
2380 if (InstMaskEnd == 63) {
2381 SDValue Ops[] =
2382 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2383 getI32Imm(InstMaskStart, dl) };
2384 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2385 }
2386
2387 if (InstMaskStart == 0) {
2388 SDValue Ops[] =
2389 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2390 getI32Imm(InstMaskEnd, dl) };
2391 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2392 }
2393
2394 if (InstMaskEnd == 63 - RLAmt) {
2395 SDValue Ops[] =
2396 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2397 getI32Imm(InstMaskStart, dl) };
2398 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2399 }
2400
2401 // We cannot do this with a single instruction, so we'll use two. The
2402 // problem is that we're not free to choose both a rotation amount and mask
2403 // start and end independently. We can choose an arbitrary mask start and
2404 // end, but then the rotation amount is fixed. Rotation, however, can be
2405 // inverted, and so by applying an "inverse" rotation first, we can get the
2406 // desired result.
2407 if (InstCnt) *InstCnt += 1;
2408
2409 // The rotation mask for the second instruction must be MaskStart.
2410 unsigned RLAmt2 = MaskStart;
2411 // The first instruction must rotate V so that the overall rotation amount
2412 // is RLAmt.
2413 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2414 if (RLAmt1)
2415 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2416 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2417 }
2418
2419 // For 64-bit values, not all combinations of rotates and masks are
2420 // available. Produce a rotate-mask-and-insert if one is available.
2421 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2422 unsigned RLAmt, bool Repl32, unsigned MaskStart,
2423 unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2424 // In the notation used by the instructions, 'start' and 'end' are reversed
2425 // because bits are counted from high to low order.
2426 unsigned InstMaskStart = 64 - MaskEnd - 1,
2427 InstMaskEnd = 64 - MaskStart - 1;
2428
2429 if (InstCnt) *InstCnt += 1;
2430
2431 if (Repl32) {
2432 // This rotation amount assumes that the lower 32 bits of the quantity
2433 // are replicated in the high 32 bits by the rotation operator (which is
2434 // done by rlwinm and friends).
2435 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2436 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2437 SDValue Ops[] =
2438 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2439 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2440 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2441 Ops), 0);
2442 }
2443
2444 if (InstMaskEnd == 63 - RLAmt) {
2445 SDValue Ops[] =
2446 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2447 getI32Imm(InstMaskStart, dl) };
2448 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2449 }
2450
2451 // We cannot do this with a single instruction, so we'll use two. The
2452 // problem is that we're not free to choose both a rotation amount and mask
2453 // start and end independently. We can choose an arbitrary mask start and
2454 // end, but then the rotation amount is fixed. Rotation, however, can be
2455 // inverted, and so by applying an "inverse" rotation first, we can get the
2456 // desired result.
2457 if (InstCnt) *InstCnt += 1;
2458
2459 // The rotation mask for the second instruction must be MaskStart.
2460 unsigned RLAmt2 = MaskStart;
2461 // The first instruction must rotate V so that the overall rotation amount
2462 // is RLAmt.
2463 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2464 if (RLAmt1)
2465 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2466 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2467 }
2468
2469 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2471 return;
2472
2473 // The idea here is the same as in the 32-bit version, but with additional
2474 // complications from the fact that Repl32 might be true. Because we
2475 // aggressively convert bit groups to Repl32 form (which, for small
2476 // rotation factors, involves no other change), and then coalesce, it might
2477 // be the case that a single 64-bit masking operation could handle both
2478 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2479 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2480 // completely capture the new combined bit group.
2481
2482 for (ValueRotInfo &VRI : ValueRotsVec) {
2483 uint64_t Mask = 0;
2484
2485 // We need to add to the mask all bits from the associated bit groups.
2486 // If Repl32 is false, we need to add bits from bit groups that have
2487 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2488 // group is trivially convertable if it overlaps only with the lower 32
2489 // bits, and the group has not been coalesced.
2490 auto MatchingBG = [VRI](const BitGroup &BG) {
2491 if (VRI.V != BG.V)
2492 return false;
2493
2494 unsigned EffRLAmt = BG.RLAmt;
2495 if (!VRI.Repl32 && BG.Repl32) {
2496 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2497 !BG.Repl32Coalesced) {
2498 if (BG.Repl32CR)
2499 EffRLAmt += 32;
2500 } else {
2501 return false;
2502 }
2503 } else if (VRI.Repl32 != BG.Repl32) {
2504 return false;
2505 }
2506
2507 return VRI.RLAmt == EffRLAmt;
2508 };
2509
2510 for (auto &BG : BitGroups) {
2511 if (!MatchingBG(BG))
2512 continue;
2513
2514 if (BG.StartIdx <= BG.EndIdx) {
2515 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2516 Mask |= (UINT64_C(1) << i);
2517 } else {
2518 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2519 Mask |= (UINT64_C(1) << i);
2520 for (unsigned i = 0; i <= BG.EndIdx; ++i)
2521 Mask |= (UINT64_C(1) << i);
2522 }
2523 }
2524
2525 // We can use the 32-bit andi/andis technique if the mask does not
2526 // require any higher-order bits. This can save an instruction compared
2527 // to always using the general 64-bit technique.
2528 bool Use32BitInsts = isUInt<32>(Mask);
2529 // Compute the masks for andi/andis that would be necessary.
2530 unsigned ANDIMask = (Mask & UINT16_MAX),
2531 ANDISMask = (Mask >> 16) & UINT16_MAX;
2532
2533 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2534
2535 unsigned NumAndInsts = (unsigned) NeedsRotate +
2536 (unsigned) (bool) Res;
2537 unsigned NumOfSelectInsts = 0;
2538 selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2539 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2540 if (Use32BitInsts)
2541 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2542 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2543 else
2544 NumAndInsts += NumOfSelectInsts + /* and */ 1;
2545
2546 unsigned NumRLInsts = 0;
2547 bool FirstBG = true;
2548 bool MoreBG = false;
2549 for (auto &BG : BitGroups) {
2550 if (!MatchingBG(BG)) {
2551 MoreBG = true;
2552 continue;
2553 }
2554 NumRLInsts +=
2555 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2556 !FirstBG);
2557 FirstBG = false;
2558 }
2559
2560 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2561 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2562 << "\n\t\t\tisel using masking: " << NumAndInsts
2563 << " using rotates: " << NumRLInsts << "\n");
2564
2565 // When we'd use andi/andis, we bias toward using the rotates (andi only
2566 // has a record form, and is cracked on POWER cores). However, when using
2567 // general 64-bit constant formation, bias toward the constant form,
2568 // because that exposes more opportunities for CSE.
2569 if (NumAndInsts > NumRLInsts)
2570 continue;
2571 // When merging multiple bit groups, instruction or is used.
2572 // But when rotate is used, rldimi can inert the rotated value into any
2573 // register, so instruction or can be avoided.
2574 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2575 continue;
2576
2577 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2578
2579 if (InstCnt) *InstCnt += NumAndInsts;
2580
2581 SDValue VRot;
2582 // We actually need to generate a rotation if we have a non-zero rotation
2583 // factor or, in the Repl32 case, if we care about any of the
2584 // higher-order replicated bits. In the latter case, we generate a mask
2585 // backward so that it actually includes the entire 64 bits.
2586 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2587 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2588 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2589 else
2590 VRot = VRI.V;
2591
2592 SDValue TotalVal;
2593 if (Use32BitInsts) {
2594 assert((ANDIMask != 0 || ANDISMask != 0) &&
2595 "No set bits in mask when using 32-bit ands for 64-bit value");
2596
2597 SDValue ANDIVal, ANDISVal;
2598 if (ANDIMask != 0)
2599 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2600 ExtendToInt64(VRot, dl),
2601 getI32Imm(ANDIMask, dl)),
2602 0);
2603 if (ANDISMask != 0)
2604 ANDISVal =
2605 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2606 ExtendToInt64(VRot, dl),
2607 getI32Imm(ANDISMask, dl)),
2608 0);
2609
2610 if (!ANDIVal)
2611 TotalVal = ANDISVal;
2612 else if (!ANDISVal)
2613 TotalVal = ANDIVal;
2614 else
2615 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2616 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2617 } else {
2618 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2619 TotalVal =
2620 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2621 ExtendToInt64(VRot, dl), TotalVal),
2622 0);
2623 }
2624
2625 if (!Res)
2626 Res = TotalVal;
2627 else
2628 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2629 ExtendToInt64(Res, dl), TotalVal),
2630 0);
2631
2632 // Now, remove all groups with this underlying value and rotation
2633 // factor.
2634 eraseMatchingBitGroups(MatchingBG);
2635 }
2636 }
2637
2638 // Instruction selection for the 64-bit case.
2639 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2640 SDLoc dl(N);
2641 SDValue Res;
2642
2643 if (InstCnt) *InstCnt = 0;
2644
2645 // Take care of cases that should use andi/andis first.
2646 SelectAndParts64(dl, Res, InstCnt);
2647
2648 // If we've not yet selected a 'starting' instruction, and we have no zeros
2649 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2650 // number of groups), and start with this rotated value.
2651 if ((!NeedMask || LateMask) && !Res) {
2652 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2653 // groups will come first, and so the VRI representing the largest number
2654 // of groups might not be first (it might be the first Repl32 groups).
2655 unsigned MaxGroupsIdx = 0;
2656 if (!ValueRotsVec[0].Repl32) {
2657 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2658 if (ValueRotsVec[i].Repl32) {
2659 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2660 MaxGroupsIdx = i;
2661 break;
2662 }
2663 }
2664
2665 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2666 bool NeedsRotate = false;
2667 if (VRI.RLAmt) {
2668 NeedsRotate = true;
2669 } else if (VRI.Repl32) {
2670 for (auto &BG : BitGroups) {
2671 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2672 BG.Repl32 != VRI.Repl32)
2673 continue;
2674
2675 // We don't need a rotate if the bit group is confined to the lower
2676 // 32 bits.
2677 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2678 continue;
2679
2680 NeedsRotate = true;
2681 break;
2682 }
2683 }
2684
2685 if (NeedsRotate)
2686 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2687 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2688 InstCnt);
2689 else
2690 Res = VRI.V;
2691
2692 // Now, remove all groups with this underlying value and rotation factor.
2693 if (Res)
2694 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2695 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2696 BG.Repl32 == VRI.Repl32;
2697 });
2698 }
2699
2700 // Because 64-bit rotates are more flexible than inserts, we might have a
2701 // preference regarding which one we do first (to save one instruction).
2702 if (!Res)
2703 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2704 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2705 false) <
2706 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2707 true)) {
2708 if (I != BitGroups.begin()) {
2709 BitGroup BG = *I;
2710 BitGroups.erase(I);
2711 BitGroups.insert(BitGroups.begin(), BG);
2712 }
2713
2714 break;
2715 }
2716 }
2717
2718 // Insert the other groups (one at a time).
2719 for (auto &BG : BitGroups) {
2720 if (!Res)
2721 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2722 BG.EndIdx, InstCnt);
2723 else
2724 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2725 BG.StartIdx, BG.EndIdx, InstCnt);
2726 }
2727
2728 if (LateMask) {
2729 uint64_t Mask = getZerosMask();
2730
2731 // We can use the 32-bit andi/andis technique if the mask does not
2732 // require any higher-order bits. This can save an instruction compared
2733 // to always using the general 64-bit technique.
2734 bool Use32BitInsts = isUInt<32>(Mask);
2735 // Compute the masks for andi/andis that would be necessary.
2736 unsigned ANDIMask = (Mask & UINT16_MAX),
2737 ANDISMask = (Mask >> 16) & UINT16_MAX;
2738
2739 if (Use32BitInsts) {
2740 assert((ANDIMask != 0 || ANDISMask != 0) &&
2741 "No set bits in mask when using 32-bit ands for 64-bit value");
2742
2743 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2744 (unsigned) (ANDISMask != 0) +
2745 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2746
2747 SDValue ANDIVal, ANDISVal;
2748 if (ANDIMask != 0)
2749 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2750 ExtendToInt64(Res, dl),
2751 getI32Imm(ANDIMask, dl)),
2752 0);
2753 if (ANDISMask != 0)
2754 ANDISVal =
2755 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2756 ExtendToInt64(Res, dl),
2757 getI32Imm(ANDISMask, dl)),
2758 0);
2759
2760 if (!ANDIVal)
2761 Res = ANDISVal;
2762 else if (!ANDISVal)
2763 Res = ANDIVal;
2764 else
2765 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2766 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2767 } else {
2768 unsigned NumOfSelectInsts = 0;
2769 SDValue MaskVal =
2770 SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2771 Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2772 ExtendToInt64(Res, dl), MaskVal),
2773 0);
2774 if (InstCnt)
2775 *InstCnt += NumOfSelectInsts + /* and */ 1;
2776 }
2777 }
2778
2779 return Res.getNode();
2780 }
2781
2782 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2783 // Fill in BitGroups.
2784 collectBitGroups(LateMask);
2785 if (BitGroups.empty())
2786 return nullptr;
2787
2788 // For 64-bit values, figure out when we can use 32-bit instructions.
2789 if (Bits.size() == 64)
2790 assignRepl32BitGroups();
2791
2792 // Fill in ValueRotsVec.
2793 collectValueRotInfo();
2794
2795 if (Bits.size() == 32) {
2796 return Select32(N, LateMask, InstCnt);
2797 } else {
2798 assert(Bits.size() == 64 && "Not 64 bits here?");
2799 return Select64(N, LateMask, InstCnt);
2800 }
2801
2802 return nullptr;
2803 }
2804
2805 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2806 erase_if(BitGroups, F);
2807 }
2808
2810
2811 bool NeedMask = false;
2813
2814 SmallVector<BitGroup, 16> BitGroups;
2815
2816 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2817 SmallVector<ValueRotInfo, 16> ValueRotsVec;
2818
2819 SelectionDAG *CurDAG = nullptr;
2820
2821public:
2822 BitPermutationSelector(SelectionDAG *DAG)
2823 : CurDAG(DAG) {}
2824
2825 // Here we try to match complex bit permutations into a set of
2826 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2827 // known to produce optimal code for common cases (like i32 byte swapping).
2828 SDNode *Select(SDNode *N) {
2829 Memoizer.clear();
2830 auto Result =
2831 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2832 if (!Result.first)
2833 return nullptr;
2834 Bits = std::move(*Result.second);
2835
2836 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2837 " selection for: ");
2838 LLVM_DEBUG(N->dump(CurDAG));
2839
2840 // Fill it RLAmt and set NeedMask.
2841 computeRotationAmounts();
2842
2843 if (!NeedMask)
2844 return Select(N, false);
2845
2846 // We currently have two techniques for handling results with zeros: early
2847 // masking (the default) and late masking. Late masking is sometimes more
2848 // efficient, but because the structure of the bit groups is different, it
2849 // is hard to tell without generating both and comparing the results. With
2850 // late masking, we ignore zeros in the resulting value when inserting each
2851 // set of bit groups, and then mask in the zeros at the end. With early
2852 // masking, we only insert the non-zero parts of the result at every step.
2853
2854 unsigned InstCnt = 0, InstCntLateMask = 0;
2855 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2856 SDNode *RN = Select(N, false, &InstCnt);
2857 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2858
2859 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2860 SDNode *RNLM = Select(N, true, &InstCntLateMask);
2861 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2862 << " instructions\n");
2863
2864 if (InstCnt <= InstCntLateMask) {
2865 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2866 return RN;
2867 }
2868
2869 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2870 return RNLM;
2871 }
2872};
2873
2874class IntegerCompareEliminator {
2875 SelectionDAG *CurDAG;
2876 PPCDAGToDAGISel *S;
2877 // Conversion type for interpreting results of a 32-bit instruction as
2878 // a 64-bit value or vice versa.
2879 enum ExtOrTruncConversion { Ext, Trunc };
2880
2881 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2882 // in a GPR.
2883 // ZExtOrig - use the original condition code, zero-extend value
2884 // ZExtInvert - invert the condition code, zero-extend value
2885 // SExtOrig - use the original condition code, sign-extend value
2886 // SExtInvert - invert the condition code, sign-extend value
2887 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2888
2889 // Comparisons against zero to emit GPR code sequences for. Each of these
2890 // sequences may need to be emitted for two or more equivalent patterns.
2891 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2892 // matters as well as the extension type: sext (-1/0), zext (1/0).
2893 // GEZExt - (zext (LHS >= 0))
2894 // GESExt - (sext (LHS >= 0))
2895 // LEZExt - (zext (LHS <= 0))
2896 // LESExt - (sext (LHS <= 0))
2897 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2898
2899 SDNode *tryEXTEND(SDNode *N);
2900 SDNode *tryLogicOpOfCompares(SDNode *N);
2901 SDValue computeLogicOpInGPR(SDValue LogicOp);
2902 SDValue signExtendInputIfNeeded(SDValue Input);
2903 SDValue zeroExtendInputIfNeeded(SDValue Input);
2904 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2905 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2906 ZeroCompare CmpTy);
2907 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2908 int64_t RHSValue, SDLoc dl);
2909 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2910 int64_t RHSValue, SDLoc dl);
2911 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2912 int64_t RHSValue, SDLoc dl);
2913 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2914 int64_t RHSValue, SDLoc dl);
2915 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2916
2917public:
2918 IntegerCompareEliminator(SelectionDAG *DAG,
2919 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2921 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2922 "Only expecting to use this on 64 bit targets.");
2923 }
2924 SDNode *Select(SDNode *N) {
2925 if (CmpInGPR == ICGPR_None)
2926 return nullptr;
2927 switch (N->getOpcode()) {
2928 default: break;
2929 case ISD::ZERO_EXTEND:
2932 return nullptr;
2933 [[fallthrough]];
2934 case ISD::SIGN_EXTEND:
2937 return nullptr;
2938 return tryEXTEND(N);
2939 case ISD::AND:
2940 case ISD::OR:
2941 case ISD::XOR:
2942 return tryLogicOpOfCompares(N);
2943 }
2944 return nullptr;
2945 }
2946};
2947
2948// The obvious case for wanting to keep the value in a GPR. Namely, the
2949// result of the comparison is actually needed in a GPR.
2950SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2951 assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2952 N->getOpcode() == ISD::SIGN_EXTEND) &&
2953 "Expecting a zero/sign extend node!");
2954 SDValue WideRes;
2955 // If we are zero-extending the result of a logical operation on i1
2956 // values, we can keep the values in GPRs.
2957 if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&
2958 N->getOperand(0).getValueType() == MVT::i1 &&
2959 N->getOpcode() == ISD::ZERO_EXTEND)
2960 WideRes = computeLogicOpInGPR(N->getOperand(0));
2961 else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2962 return nullptr;
2963 else
2964 WideRes =
2965 getSETCCInGPR(N->getOperand(0),
2966 N->getOpcode() == ISD::SIGN_EXTEND ?
2967 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2968
2969 if (!WideRes)
2970 return nullptr;
2971
2972 SDLoc dl(N);
2973 bool Input32Bit = WideRes.getValueType() == MVT::i32;
2974 bool Output32Bit = N->getValueType(0) == MVT::i32;
2975
2976 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2977 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2978
2979 SDValue ConvOp = WideRes;
2980 if (Input32Bit != Output32Bit)
2981 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2982 ExtOrTruncConversion::Trunc);
2983 return ConvOp.getNode();
2984}
2985
2986// Attempt to perform logical operations on the results of comparisons while
2987// keeping the values in GPRs. Without doing so, these would end up being
2988// lowered to CR-logical operations which suffer from significant latency and
2989// low ILP.
2990SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2991 if (N->getValueType(0) != MVT::i1)
2992 return nullptr;
2993 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
2994 "Expected a logic operation on setcc results.");
2995 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2996 if (!LoweredLogical)
2997 return nullptr;
2998
2999 SDLoc dl(N);
3000 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3001 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3002 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
3003 SDValue LHS = LoweredLogical.getOperand(0);
3004 SDValue RHS = LoweredLogical.getOperand(1);
3005 SDValue WideOp;
3006 SDValue OpToConvToRecForm;
3007
3008 // Look through any 32-bit to 64-bit implicit extend nodes to find the
3009 // opcode that is input to the XORI.
3010 if (IsBitwiseNegate &&
3011 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
3012 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
3013 else if (IsBitwiseNegate)
3014 // If the input to the XORI isn't an extension, that's what we're after.
3015 OpToConvToRecForm = LoweredLogical.getOperand(0);
3016 else
3017 // If this is not an XORI, it is a reg-reg logical op and we can convert
3018 // it to record-form.
3019 OpToConvToRecForm = LoweredLogical;
3020
3021 // Get the record-form version of the node we're looking to use to get the
3022 // CR result from.
3023 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3024 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
3025
3026 // Convert the right node to record-form. This is either the logical we're
3027 // looking at or it is the input node to the negation (if we're looking at
3028 // a bitwise negation).
3029 if (NewOpc != -1 && IsBitwiseNegate) {
3030 // The input to the XORI has a record-form. Use it.
3031 assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
3032 "Expected a PPC::XORI8 only for bitwise negation.");
3033 // Emit the record-form instruction.
3034 std::vector<SDValue> Ops;
3035 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3036 Ops.push_back(OpToConvToRecForm.getOperand(i));
3037
3038 WideOp =
3039 SDValue(CurDAG->getMachineNode(NewOpc, dl,
3040 OpToConvToRecForm.getValueType(),
3041 MVT::Glue, Ops), 0);
3042 } else {
3043 assert((NewOpc != -1 || !IsBitwiseNegate) &&
3044 "No record form available for AND8/OR8/XOR8?");
3045 WideOp =
3046 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
3047 dl, MVT::i64, MVT::Glue, LHS, RHS),
3048 0);
3049 }
3050
3051 // Select this node to a single bit from CR0 set by the record-form node
3052 // just created. For bitwise negation, use the EQ bit which is the equivalent
3053 // of negating the result (i.e. it is a bit set when the result of the
3054 // operation is zero).
3055 SDValue SRIdxVal =
3056 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
3057 SDValue CRBit =
3058 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
3059 MVT::i1, CR0Reg, SRIdxVal,
3060 WideOp.getValue(1)), 0);
3061 return CRBit.getNode();
3062}
3063
3064// Lower a logical operation on i1 values into a GPR sequence if possible.
3065// The result can be kept in a GPR if requested.
3066// Three types of inputs can be handled:
3067// - SETCC
3068// - TRUNCATE
3069// - Logical operation (AND/OR/XOR)
3070// There is also a special case that is handled (namely a complement operation
3071// achieved with xor %a, -1).
3072SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3074 "Can only handle logic operations here.");
3075 assert(LogicOp.getValueType() == MVT::i1 &&
3076 "Can only handle logic operations on i1 values here.");
3077 SDLoc dl(LogicOp);
3078 SDValue LHS, RHS;
3079
3080 // Special case: xor %a, -1
3081 bool IsBitwiseNegation = isBitwiseNot(LogicOp);
3082
3083 // Produces a GPR sequence for each operand of the binary logic operation.
3084 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3085 // the value in a GPR and for logic operations, it will recursively produce
3086 // a GPR sequence for the operation.
3087 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3088 unsigned OperandOpcode = Operand.getOpcode();
3089 if (OperandOpcode == ISD::SETCC)
3090 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
3091 else if (OperandOpcode == ISD::TRUNCATE) {
3092 SDValue InputOp = Operand.getOperand(0);
3093 EVT InVT = InputOp.getValueType();
3094 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
3095 PPC::RLDICL, dl, InVT, InputOp,
3096 S->getI64Imm(0, dl),
3097 S->getI64Imm(63, dl)), 0);
3098 } else if (ISD::isBitwiseLogicOp(OperandOpcode))
3099 return computeLogicOpInGPR(Operand);
3100 return SDValue();
3101 };
3102 LHS = getLogicOperand(LogicOp.getOperand(0));
3103 RHS = getLogicOperand(LogicOp.getOperand(1));
3104
3105 // If a GPR sequence can't be produced for the LHS we can't proceed.
3106 // Not producing a GPR sequence for the RHS is only a problem if this isn't
3107 // a bitwise negation operation.
3108 if (!LHS || (!RHS && !IsBitwiseNegation))
3109 return SDValue();
3110
3111 NumLogicOpsOnComparison++;
3112
3113 // We will use the inputs as 64-bit values.
3114 if (LHS.getValueType() == MVT::i32)
3115 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
3116 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3117 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
3118
3119 unsigned NewOpc;
3120 switch (LogicOp.getOpcode()) {
3121 default: llvm_unreachable("Unknown logic operation.");
3122 case ISD::AND: NewOpc = PPC::AND8; break;
3123 case ISD::OR: NewOpc = PPC::OR8; break;
3124 case ISD::XOR: NewOpc = PPC::XOR8; break;
3125 }
3126
3127 if (IsBitwiseNegation) {
3128 RHS = S->getI64Imm(1, dl);
3129 NewOpc = PPC::XORI8;
3130 }
3131
3132 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
3133
3134}
3135
3136/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3137/// Otherwise just reinterpret it as a 64-bit value.
3138/// Useful when emitting comparison code for 32-bit values without using
3139/// the compare instruction (which only considers the lower 32-bits).
3140SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3141 assert(Input.getValueType() == MVT::i32 &&
3142 "Can only sign-extend 32-bit values here.");
3143 unsigned Opc = Input.getOpcode();
3144
3145 // The value was sign extended and then truncated to 32-bits. No need to
3146 // sign extend it again.
3147 if (Opc == ISD::TRUNCATE &&
3148 (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
3149 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
3150 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3151
3152 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3153 // The input is a sign-extending load. All ppc sign-extending loads
3154 // sign-extend to the full 64-bits.
3155 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3156 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3157
3158 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3159 // We don't sign-extend constants.
3160 if (InputConst)
3161 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3162
3163 SDLoc dl(Input);
3164 SignExtensionsAdded++;
3165 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3166 MVT::i64, Input), 0);
3167}
3168
3169/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3170/// Otherwise just reinterpret it as a 64-bit value.
3171/// Useful when emitting comparison code for 32-bit values without using
3172/// the compare instruction (which only considers the lower 32-bits).
3173SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3174 assert(Input.getValueType() == MVT::i32 &&
3175 "Can only zero-extend 32-bit values here.");
3176 unsigned Opc = Input.getOpcode();
3177
3178 // The only condition under which we can omit the actual extend instruction:
3179 // - The value is a positive constant
3180 // - The value comes from a load that isn't a sign-extending load
3181 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3182 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3183 (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3184 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3185 if (IsTruncateOfZExt)
3186 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3187
3188 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3189 if (InputConst && InputConst->getSExtValue() >= 0)
3190 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3191
3192 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3193 // The input is a load that doesn't sign-extend (it will be zero-extended).
3194 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3195 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3196
3197 // None of the above, need to zero-extend.
3198 SDLoc dl(Input);
3199 ZeroExtensionsAdded++;
3200 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3201 S->getI64Imm(0, dl),
3202 S->getI64Imm(32, dl)), 0);
3203}
3204
3205// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3206// course not actual zero/sign extensions that will generate machine code,
3207// they're just a way to reinterpret a 32 bit value in a register as a
3208// 64 bit value and vice-versa.
3209SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3210 ExtOrTruncConversion Conv) {
3211 SDLoc dl(NatWidthRes);
3212
3213 // For reinterpreting 32-bit values as 64 bit values, we generate
3214 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3215 if (Conv == ExtOrTruncConversion::Ext) {
3216 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3217 SDValue SubRegIdx =
3218 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3219 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3220 ImDef, NatWidthRes, SubRegIdx), 0);
3221 }
3222
3223 assert(Conv == ExtOrTruncConversion::Trunc &&
3224 "Unknown convertion between 32 and 64 bit values.");
3225 // For reinterpreting 64-bit values as 32-bit values, we just need to
3226 // EXTRACT_SUBREG (i.e. extract the low word).
3227 SDValue SubRegIdx =
3228 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3229 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3230 NatWidthRes, SubRegIdx), 0);
3231}
3232
3233// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3234// Handle both zero-extensions and sign-extensions.
3235SDValue
3236IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3237 ZeroCompare CmpTy) {
3238 EVT InVT = LHS.getValueType();
3239 bool Is32Bit = InVT == MVT::i32;
3240 SDValue ToExtend;
3241
3242 // Produce the value that needs to be either zero or sign extended.
3243 switch (CmpTy) {
3244 case ZeroCompare::GEZExt:
3245 case ZeroCompare::GESExt:
3246 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3247 dl, InVT, LHS, LHS), 0);
3248 break;
3249 case ZeroCompare::LEZExt:
3250 case ZeroCompare::LESExt: {
3251 if (Is32Bit) {
3252 // Upper 32 bits cannot be undefined for this sequence.
3253 LHS = signExtendInputIfNeeded(LHS);
3254 SDValue Neg =
3255 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3256 ToExtend =
3257 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3258 Neg, S->getI64Imm(1, dl),
3259 S->getI64Imm(63, dl)), 0);
3260 } else {
3261 SDValue Addi =
3262 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3263 S->getI64Imm(~0ULL, dl)), 0);
3264 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3265 Addi, LHS), 0);
3266 }
3267 break;
3268 }
3269 }
3270
3271 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3272 if (!Is32Bit &&
3273 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3274 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3275 ToExtend, S->getI64Imm(1, dl),
3276 S->getI64Imm(63, dl)), 0);
3277 if (!Is32Bit &&
3278 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3279 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3280 S->getI64Imm(63, dl)), 0);
3281
3282 assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3283 // For 32-bit sequences, the extensions differ between GE/LE cases.
3284 switch (CmpTy) {
3285 case ZeroCompare::GEZExt: {
3286 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3287 S->getI32Imm(31, dl) };
3288 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3289 ShiftOps), 0);
3290 }
3291 case ZeroCompare::GESExt:
3292 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3293 S->getI32Imm(31, dl)), 0);
3294 case ZeroCompare::LEZExt:
3295 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3296 S->getI32Imm(1, dl)), 0);
3297 case ZeroCompare::LESExt:
3298 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3299 S->getI32Imm(-1, dl)), 0);
3300 }
3301
3302 // The above case covers all the enumerators so it can't have a default clause
3303 // to avoid compiler warnings.
3304 llvm_unreachable("Unknown zero-comparison type.");
3305}
3306
3307/// Produces a zero-extended result of comparing two 32-bit values according to
3308/// the passed condition code.
3309SDValue
3310IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3312 int64_t RHSValue, SDLoc dl) {
3315 return SDValue();
3316 bool IsRHSZero = RHSValue == 0;
3317 bool IsRHSOne = RHSValue == 1;
3318 bool IsRHSNegOne = RHSValue == -1LL;
3319 switch (CC) {
3320 default: return SDValue();
3321 case ISD::SETEQ: {
3322 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3323 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3324 SDValue Xor = IsRHSZero ? LHS :
3325 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3326 SDValue Clz =
3327 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3328 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3329 S->getI32Imm(31, dl) };
3330 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3331 ShiftOps), 0);
3332 }
3333 case ISD::SETNE: {
3334 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3335 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3336 SDValue Xor = IsRHSZero ? LHS :
3337 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3338 SDValue Clz =
3339 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3340 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3341 S->getI32Imm(31, dl) };
3342 SDValue Shift =
3343 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3344 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3345 S->getI32Imm(1, dl)), 0);
3346 }
3347 case ISD::SETGE: {
3348 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3349 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3350 if(IsRHSZero)
3351 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3352
3353 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3354 // by swapping inputs and falling through.
3355 std::swap(LHS, RHS);
3356 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3357 IsRHSZero = RHSConst && RHSConst->isZero();
3358 [[fallthrough]];
3359 }
3360 case ISD::SETLE: {
3361 if (CmpInGPR == ICGPR_NonExtIn)
3362 return SDValue();
3363 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3364 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3365 if(IsRHSZero) {
3366 if (CmpInGPR == ICGPR_NonExtIn)
3367 return SDValue();
3368 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3369 }
3370
3371 // The upper 32-bits of the register can't be undefined for this sequence.
3372 LHS = signExtendInputIfNeeded(LHS);
3373 RHS = signExtendInputIfNeeded(RHS);
3374 SDValue Sub =
3375 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3376 SDValue Shift =
3377 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3378 S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3379 0);
3380 return
3381 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3382 MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3383 }
3384 case ISD::SETGT: {
3385 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3386 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3387 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3388 // Handle SETLT -1 (which is equivalent to SETGE 0).
3389 if (IsRHSNegOne)
3390 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3391
3392 if (IsRHSZero) {
3393 if (CmpInGPR == ICGPR_NonExtIn)
3394 return SDValue();
3395 // The upper 32-bits of the register can't be undefined for this sequence.
3396 LHS = signExtendInputIfNeeded(LHS);
3397 RHS = signExtendInputIfNeeded(RHS);
3398 SDValue Neg =
3399 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3400 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3401 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3402 }
3403 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3404 // (%b < %a) by swapping inputs and falling through.
3405 std::swap(LHS, RHS);
3406 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3407 IsRHSZero = RHSConst && RHSConst->isZero();
3408 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3409 [[fallthrough]];
3410 }
3411 case ISD::SETLT: {
3412 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3413 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3414 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3415 // Handle SETLT 1 (which is equivalent to SETLE 0).
3416 if (IsRHSOne) {
3417 if (CmpInGPR == ICGPR_NonExtIn)
3418 return SDValue();
3419 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3420 }
3421
3422 if (IsRHSZero) {
3423 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3424 S->getI32Imm(31, dl) };
3425 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3426 ShiftOps), 0);
3427 }
3428
3429 if (CmpInGPR == ICGPR_NonExtIn)
3430 return SDValue();
3431 // The upper 32-bits of the register can't be undefined for this sequence.
3432 LHS = signExtendInputIfNeeded(LHS);
3433 RHS = signExtendInputIfNeeded(RHS);
3434 SDValue SUBFNode =
3435 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3436 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3437 SUBFNode, S->getI64Imm(1, dl),
3438 S->getI64Imm(63, dl)), 0);
3439 }
3440 case ISD::SETUGE:
3441 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3442 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3443 std::swap(LHS, RHS);
3444 [[fallthrough]];
3445 case ISD::SETULE: {
3446 if (CmpInGPR == ICGPR_NonExtIn)
3447 return SDValue();
3448 // The upper 32-bits of the register can't be undefined for this sequence.
3449 LHS = zeroExtendInputIfNeeded(LHS);
3450 RHS = zeroExtendInputIfNeeded(RHS);
3451 SDValue Subtract =
3452 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3453 SDValue SrdiNode =
3454 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3455 Subtract, S->getI64Imm(1, dl),
3456 S->getI64Imm(63, dl)), 0);
3457 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3458 S->getI32Imm(1, dl)), 0);
3459 }
3460 case ISD::SETUGT:
3461 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3462 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3463 std::swap(LHS, RHS);
3464 [[fallthrough]];
3465 case ISD::SETULT: {
3466 if (CmpInGPR == ICGPR_NonExtIn)
3467 return SDValue();
3468 // The upper 32-bits of the register can't be undefined for this sequence.
3469 LHS = zeroExtendInputIfNeeded(LHS);
3470 RHS = zeroExtendInputIfNeeded(RHS);
3471 SDValue Subtract =
3472 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3473 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3474 Subtract, S->getI64Imm(1, dl),
3475 S->getI64Imm(63, dl)), 0);
3476 }
3477 }
3478}
3479
3480/// Produces a sign-extended result of comparing two 32-bit values according to
3481/// the passed condition code.
3482SDValue
3483IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3485 int64_t RHSValue, SDLoc dl) {
3488 return SDValue();
3489 bool IsRHSZero = RHSValue == 0;
3490 bool IsRHSOne = RHSValue == 1;
3491 bool IsRHSNegOne = RHSValue == -1LL;
3492
3493 switch (CC) {
3494 default: return SDValue();
3495 case ISD::SETEQ: {
3496 // (sext (setcc %a, %b, seteq)) ->
3497 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3498 // (sext (setcc %a, 0, seteq)) ->
3499 // (ashr (shl (ctlz %a), 58), 63)
3500 SDValue CountInput = IsRHSZero ? LHS :
3501 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3502 SDValue Cntlzw =
3503 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3504 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3505 S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3506 SDValue Slwi =
3507 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3508 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3509 }
3510 case ISD::SETNE: {
3511 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3512 // flip the bit, finally take 2's complement.
3513 // (sext (setcc %a, %b, setne)) ->
3514 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3515 // Same as above, but the first xor is not needed.
3516 // (sext (setcc %a, 0, setne)) ->
3517 // (neg (xor (lshr (ctlz %a), 5), 1))
3518 SDValue Xor = IsRHSZero ? LHS :
3519 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3520 SDValue Clz =
3521 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3522 SDValue ShiftOps[] =
3523 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3524 SDValue Shift =
3525 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3526 SDValue Xori =
3527 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3528 S->getI32Imm(1, dl)), 0);
3529 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3530 }
3531 case ISD::SETGE: {
3532 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3533 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3534 if (IsRHSZero)
3535 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3536
3537 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3538 // by swapping inputs and falling through.
3539 std::swap(LHS, RHS);
3540 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3541 IsRHSZero = RHSConst && RHSConst->isZero();
3542 [[fallthrough]];
3543 }
3544 case ISD::SETLE: {
3545 if (CmpInGPR == ICGPR_NonExtIn)
3546 return SDValue();
3547 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3548 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3549 if (IsRHSZero)
3550 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3551
3552 // The upper 32-bits of the register can't be undefined for this sequence.
3553 LHS = signExtendInputIfNeeded(LHS);
3554 RHS = signExtendInputIfNeeded(RHS);
3555 SDValue SUBFNode =
3556 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3557 LHS, RHS), 0);
3558 SDValue Srdi =
3559 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3560 SUBFNode, S->getI64Imm(1, dl),
3561 S->getI64Imm(63, dl)), 0);
3562 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3563 S->getI32Imm(-1, dl)), 0);
3564 }
3565 case ISD::SETGT: {
3566 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3567 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3568 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3569 if (IsRHSNegOne)
3570 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3571 if (IsRHSZero) {
3572 if (CmpInGPR == ICGPR_NonExtIn)
3573 return SDValue();
3574 // The upper 32-bits of the register can't be undefined for this sequence.
3575 LHS = signExtendInputIfNeeded(LHS);
3576 RHS = signExtendInputIfNeeded(RHS);
3577 SDValue Neg =
3578 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3579 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3580 S->getI64Imm(63, dl)), 0);
3581 }
3582 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3583 // (%b < %a) by swapping inputs and falling through.
3584 std::swap(LHS, RHS);
3585 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3586 IsRHSZero = RHSConst && RHSConst->isZero();
3587 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3588 [[fallthrough]];
3589 }
3590 case ISD::SETLT: {
3591 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3592 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3593 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3594 if (IsRHSOne) {
3595 if (CmpInGPR == ICGPR_NonExtIn)
3596 return SDValue();
3597 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3598 }
3599 if (IsRHSZero)
3600 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3601 S->getI32Imm(31, dl)), 0);
3602
3603 if (CmpInGPR == ICGPR_NonExtIn)
3604 return SDValue();
3605 // The upper 32-bits of the register can't be undefined for this sequence.
3606 LHS = signExtendInputIfNeeded(LHS);
3607 RHS = signExtendInputIfNeeded(RHS);
3608 SDValue SUBFNode =
3609 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3610 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3611 SUBFNode, S->getI64Imm(63, dl)), 0);
3612 }
3613 case ISD::SETUGE:
3614 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3615 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3616 std::swap(LHS, RHS);
3617 [[fallthrough]];
3618 case ISD::SETULE: {
3619 if (CmpInGPR == ICGPR_NonExtIn)
3620 return SDValue();
3621 // The upper 32-bits of the register can't be undefined for this sequence.
3622 LHS = zeroExtendInputIfNeeded(LHS);
3623 RHS = zeroExtendInputIfNeeded(RHS);
3624 SDValue Subtract =
3625 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3626 SDValue Shift =
3627 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3628 S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3629 0);
3630 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3631 S->getI32Imm(-1, dl)), 0);
3632 }
3633 case ISD::SETUGT:
3634 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3635 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3636 std::swap(LHS, RHS);
3637 [[fallthrough]];
3638 case ISD::SETULT: {
3639 if (CmpInGPR == ICGPR_NonExtIn)
3640 return SDValue();
3641 // The upper 32-bits of the register can't be undefined for this sequence.
3642 LHS = zeroExtendInputIfNeeded(LHS);
3643 RHS = zeroExtendInputIfNeeded(RHS);
3644 SDValue Subtract =
3645 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3646 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3647 Subtract, S->getI64Imm(63, dl)), 0);
3648 }
3649 }
3650}
3651
3652/// Produces a zero-extended result of comparing two 64-bit values according to
3653/// the passed condition code.
3654SDValue
3655IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3657 int64_t RHSValue, SDLoc dl) {
3660 return SDValue();
3661 bool IsRHSZero = RHSValue == 0;
3662 bool IsRHSOne = RHSValue == 1;
3663 bool IsRHSNegOne = RHSValue == -1LL;
3664 switch (CC) {
3665 default: return SDValue();
3666 case ISD::SETEQ: {
3667 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3668 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3669 SDValue Xor = IsRHSZero ? LHS :
3670 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3671 SDValue Clz =
3672 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3673 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3674 S->getI64Imm(58, dl),
3675 S->getI64Imm(63, dl)), 0);
3676 }
3677 case ISD::SETNE: {
3678 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3679 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3680 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3681 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3682 SDValue Xor = IsRHSZero ? LHS :
3683 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3684 SDValue AC =
3685 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3686 Xor, S->getI32Imm(~0U, dl)), 0);
3687 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3688 Xor, AC.getValue(1)), 0);
3689 }
3690 case ISD::SETGE: {
3691 // {subc.reg, subc.CA} = (subcarry %a, %b)
3692 // (zext (setcc %a, %b, setge)) ->
3693 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3694 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3695 if (IsRHSZero)
3696 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3697 std::swap(LHS, RHS);
3698 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3699 IsRHSZero = RHSConst && RHSConst->isZero();
3700 [[fallthrough]];
3701 }
3702 case ISD::SETLE: {
3703 // {subc.reg, subc.CA} = (subcarry %b, %a)
3704 // (zext (setcc %a, %b, setge)) ->
3705 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3706 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3707 if (IsRHSZero)
3708 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3709 SDValue ShiftL =
3710 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3711 S->getI64Imm(1, dl),
3712 S->getI64Imm(63, dl)), 0);
3713 SDValue ShiftR =
3714 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3715 S->getI64Imm(63, dl)), 0);
3716 SDValue SubtractCarry =
3717 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3718 LHS, RHS), 1);
3719 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3720 ShiftR, ShiftL, SubtractCarry), 0);
3721 }
3722 case ISD::SETGT: {
3723 // {subc.reg, subc.CA} = (subcarry %b, %a)
3724 // (zext (setcc %a, %b, setgt)) ->
3725 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3726 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3727 if (IsRHSNegOne)
3728 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3729 if (IsRHSZero) {
3730 SDValue Addi =
3731 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3732 S->getI64Imm(~0ULL, dl)), 0);
3733 SDValue Nor =
3734 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3735 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3736 S->getI64Imm(1, dl),
3737 S->getI64Imm(63, dl)), 0);
3738 }
3739 std::swap(LHS, RHS);
3740 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3741 IsRHSZero = RHSConst && RHSConst->isZero();
3742 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3743 [[fallthrough]];
3744 }
3745 case ISD::SETLT: {
3746 // {subc.reg, subc.CA} = (subcarry %a, %b)
3747 // (zext (setcc %a, %b, setlt)) ->
3748 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3749 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3750 if (IsRHSOne)
3751 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3752 if (IsRHSZero)
3753 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3754 S->getI64Imm(1, dl),
3755 S->getI64Imm(63, dl)), 0);
3756 SDValue SRADINode =
3757 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3758 LHS, S->getI64Imm(63, dl)), 0);
3759 SDValue SRDINode =
3760 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3761 RHS, S->getI64Imm(1, dl),
3762 S->getI64Imm(63, dl)), 0);
3763 SDValue SUBFC8Carry =
3764 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3765 RHS, LHS), 1);
3766 SDValue ADDE8Node =
3767 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3768 SRDINode, SRADINode, SUBFC8Carry), 0);
3769 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3770 ADDE8Node, S->getI64Imm(1, dl)), 0);
3771 }
3772 case ISD::SETUGE:
3773 // {subc.reg, subc.CA} = (subcarry %a, %b)
3774 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3775 std::swap(LHS, RHS);
3776 [[fallthrough]];
3777 case ISD::SETULE: {
3778 // {subc.reg, subc.CA} = (subcarry %b, %a)
3779 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3780 SDValue SUBFC8Carry =
3781 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3782 LHS, RHS), 1);
3783 SDValue SUBFE8Node =
3784 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3785 LHS, LHS, SUBFC8Carry), 0);
3786 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3787 SUBFE8Node, S->getI64Imm(1, dl)), 0);
3788 }
3789 case ISD::SETUGT:
3790 // {subc.reg, subc.CA} = (subcarry %b, %a)
3791 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3792 std::swap(LHS, RHS);
3793 [[fallthrough]];
3794 case ISD::SETULT: {
3795 // {subc.reg, subc.CA} = (subcarry %a, %b)
3796 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3797 SDValue SubtractCarry =
3798 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3799 RHS, LHS), 1);
3800 SDValue ExtSub =
3801 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3802 LHS, LHS, SubtractCarry), 0);
3803 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3804 ExtSub), 0);
3805 }
3806 }
3807}
3808
3809/// Produces a sign-extended result of comparing two 64-bit values according to
3810/// the passed condition code.
3811SDValue
3812IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3814 int64_t RHSValue, SDLoc dl) {
3817 return SDValue();
3818 bool IsRHSZero = RHSValue == 0;
3819 bool IsRHSOne = RHSValue == 1;
3820 bool IsRHSNegOne = RHSValue == -1LL;
3821 switch (CC) {
3822 default: return SDValue();
3823 case ISD::SETEQ: {
3824 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3825 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3826 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3827 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3828 SDValue AddInput = IsRHSZero ? LHS :
3829 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3830 SDValue Addic =
3831 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3832 AddInput, S->getI32Imm(~0U, dl)), 0);
3833 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3834 Addic, Addic.getValue(1)), 0);
3835 }
3836 case ISD::SETNE: {
3837 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3838 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3839 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3840 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3841 SDValue Xor = IsRHSZero ? LHS :
3842 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3843 SDValue SC =
3844 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3845 Xor, S->getI32Imm(0, dl)), 0);
3846 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3847 SC, SC.getValue(1)), 0);
3848 }
3849 case ISD::SETGE: {
3850 // {subc.reg, subc.CA} = (subcarry %a, %b)
3851 // (zext (setcc %a, %b, setge)) ->
3852 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3853 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3854 if (IsRHSZero)
3855 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3856 std::swap(LHS, RHS);
3857 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3858 IsRHSZero = RHSConst && RHSConst->isZero();
3859 [[fallthrough]];
3860 }
3861 case ISD::SETLE: {
3862 // {subc.reg, subc.CA} = (subcarry %b, %a)
3863 // (zext (setcc %a, %b, setge)) ->
3864 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3865 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3866 if (IsRHSZero)
3867 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3868 SDValue ShiftR =
3869 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3870 S->getI64Imm(63, dl)), 0);
3871 SDValue ShiftL =
3872 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3873 S->getI64Imm(1, dl),
3874 S->getI64Imm(63, dl)), 0);
3875 SDValue SubtractCarry =
3876 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3877 LHS, RHS), 1);
3878 SDValue Adde =
3879 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3880 ShiftR, ShiftL, SubtractCarry), 0);
3881 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3882 }
3883 case ISD::SETGT: {
3884 // {subc.reg, subc.CA} = (subcarry %b, %a)
3885 // (zext (setcc %a, %b, setgt)) ->
3886 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3887 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3888 if (IsRHSNegOne)
3889 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3890 if (IsRHSZero) {
3891 SDValue Add =
3892 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3893 S->getI64Imm(-1, dl)), 0);
3894 SDValue Nor =
3895 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3896 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3897 S->getI64Imm(63, dl)), 0);
3898 }
3899 std::swap(LHS, RHS);
3900 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3901 IsRHSZero = RHSConst && RHSConst->isZero();
3902 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3903 [[fallthrough]];
3904 }
3905 case ISD::SETLT: {
3906 // {subc.reg, subc.CA} = (subcarry %a, %b)
3907 // (zext (setcc %a, %b, setlt)) ->
3908 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3909 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3910 if (IsRHSOne)
3911 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3912 if (IsRHSZero) {
3913 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3914 S->getI64Imm(63, dl)), 0);
3915 }
3916 SDValue SRADINode =
3917 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3918 LHS, S->getI64Imm(63, dl)), 0);
3919 SDValue SRDINode =
3920 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3921 RHS, S->getI64Imm(1, dl),
3922 S->getI64Imm(63, dl)), 0);
3923 SDValue SUBFC8Carry =
3924 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3925 RHS, LHS), 1);
3926 SDValue ADDE8Node =
3927 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3928 SRDINode, SRADINode, SUBFC8Carry), 0);
3929 SDValue XORI8Node =
3930 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3931 ADDE8Node, S->getI64Imm(1, dl)), 0);
3932 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3933 XORI8Node), 0);
3934 }
3935 case ISD::SETUGE:
3936 // {subc.reg, subc.CA} = (subcarry %a, %b)
3937 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3938 std::swap(LHS, RHS);
3939 [[fallthrough]];
3940 case ISD::SETULE: {
3941 // {subc.reg, subc.CA} = (subcarry %b, %a)
3942 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3943 SDValue SubtractCarry =
3944 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3945 LHS, RHS), 1);
3946 SDValue ExtSub =
3947 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3948 LHS, SubtractCarry), 0);
3949 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3950 ExtSub, ExtSub), 0);
3951 }
3952 case ISD::SETUGT:
3953 // {subc.reg, subc.CA} = (subcarry %b, %a)
3954 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3955 std::swap(LHS, RHS);
3956 [[fallthrough]];
3957 case ISD::SETULT: {
3958 // {subc.reg, subc.CA} = (subcarry %a, %b)
3959 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3960 SDValue SubCarry =
3961 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3962 RHS, LHS), 1);
3963 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3964 LHS, LHS, SubCarry), 0);
3965 }
3966 }
3967}
3968
3969/// Do all uses of this SDValue need the result in a GPR?
3970/// This is meant to be used on values that have type i1 since
3971/// it is somewhat meaningless to ask if values of other types
3972/// should be kept in GPR's.
3973static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3974 assert(Compare.getOpcode() == ISD::SETCC &&
3975 "An ISD::SETCC node required here.");
3976
3977 // For values that have a single use, the caller should obviously already have
3978 // checked if that use is an extending use. We check the other uses here.
3979 if (Compare.hasOneUse())
3980 return true;
3981 // We want the value in a GPR if it is being extended, used for a select, or
3982 // used in logical operations.
3983 for (auto *CompareUse : Compare.getNode()->uses())
3984 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3985 CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3986 CompareUse->getOpcode() != ISD::SELECT &&
3987 !ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {
3988 OmittedForNonExtendUses++;
3989 return false;
3990 }
3991 return true;
3992}
3993
3994/// Returns an equivalent of a SETCC node but with the result the same width as
3995/// the inputs. This can also be used for SELECT_CC if either the true or false
3996/// values is a power of two while the other is zero.
3997SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3998 SetccInGPROpts ConvOpts) {
3999 assert((Compare.getOpcode() == ISD::SETCC ||
4000 Compare.getOpcode() == ISD::SELECT_CC) &&
4001 "An ISD::SETCC node required here.");
4002
4003 // Don't convert this comparison to a GPR sequence because there are uses
4004 // of the i1 result (i.e. uses that require the result in the CR).
4005 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4006 return SDValue();
4007
4008 SDValue LHS = Compare.getOperand(0);
4009 SDValue RHS = Compare.getOperand(1);
4010
4011 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4012 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
4014 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
4015 EVT InputVT = LHS.getValueType();
4016 if (InputVT != MVT::i32 && InputVT != MVT::i64)
4017 return SDValue();
4018
4019 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4020 ConvOpts == SetccInGPROpts::SExtInvert)
4021 CC = ISD::getSetCCInverse(CC, InputVT);
4022
4023 bool Inputs32Bit = InputVT == MVT::i32;
4024
4025 SDLoc dl(Compare);
4026 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4027 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4028 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4029 ConvOpts == SetccInGPROpts::SExtInvert;
4030
4031 if (IsSext && Inputs32Bit)
4032 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4033 else if (Inputs32Bit)
4034 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4035 else if (IsSext)
4036 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4037 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4038}
4039
4040} // end anonymous namespace
4041
4042bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4043 if (N->getValueType(0) != MVT::i32 &&
4044 N->getValueType(0) != MVT::i64)
4045 return false;
4046
4047 // This optimization will emit code that assumes 64-bit registers
4048 // so we don't want to run it in 32-bit mode. Also don't run it
4049 // on functions that are not to be optimized.
4050 if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
4051 return false;
4052
4053 // For POWER10, it is more profitable to use the set boolean extension
4054 // instructions rather than the integer compare elimination codegen.
4055 // Users can override this via the command line option, `--ppc-gpr-icmps`.
4056 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4057 return false;
4058
4059 switch (N->getOpcode()) {
4060 default: break;
4061 case ISD::ZERO_EXTEND:
4062 case ISD::SIGN_EXTEND:
4063 case ISD::AND:
4064 case ISD::OR:
4065 case ISD::XOR: {
4066 IntegerCompareEliminator ICmpElim(CurDAG, this);
4067 if (SDNode *New = ICmpElim.Select(N)) {
4068 ReplaceNode(N, New);
4069 return true;
4070 }
4071 }
4072 }
4073 return false;
4074}
4075
4076bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4077 if (N->getValueType(0) != MVT::i32 &&
4078 N->getValueType(0) != MVT::i64)
4079 return false;
4080
4081 if (!UseBitPermRewriter)
4082 return false;
4083
4084 switch (N->getOpcode()) {
4085 default: break;
4086 case ISD::SRL:
4087 // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4088 // uses the BRH instruction.
4089 if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
4090 N->getOperand(0).getOpcode() == ISD::BSWAP) {
4091 auto &OpRight = N->getOperand(1);
4092 ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
4093 if (SRLConst && SRLConst->getSExtValue() == 16)
4094 return false;
4095 }
4096 [[fallthrough]];
4097 case ISD::ROTL:
4098 case ISD::SHL:
4099 case ISD::AND:
4100 case ISD::OR: {
4101 BitPermutationSelector BPS(CurDAG);
4102 if (SDNode *New = BPS.Select(N)) {
4103 ReplaceNode(N, New);
4104 return true;
4105 }
4106 return false;
4107 }
4108 }
4109
4110 return false;
4111}
4112
4113/// SelectCC - Select a comparison of the specified values with the specified
4114/// condition code, returning the CR# of the expression.
4115SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4116 const SDLoc &dl, SDValue Chain) {
4117 // Always select the LHS.
4118 unsigned Opc;
4119
4120 if (LHS.getValueType() == MVT::i32) {
4121 unsigned Imm;
4122 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4123 if (isInt32Immediate(RHS, Imm)) {
4124 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4125 if (isUInt<16>(Imm))
4126 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4127 getI32Imm(Imm & 0xFFFF, dl)),
4128 0);
4129 // If this is a 16-bit signed immediate, fold it.
4130 if (isInt<16>((int)Imm))
4131 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4132 getI32Imm(Imm & 0xFFFF, dl)),
4133 0);
4134
4135 // For non-equality comparisons, the default code would materialize the
4136 // constant, then compare against it, like this:
4137 // lis r2, 4660
4138 // ori r2, r2, 22136
4139 // cmpw cr0, r3, r2
4140 // Since we are just comparing for equality, we can emit this instead:
4141 // xoris r0,r3,0x1234
4142 // cmplwi cr0,r0,0x5678
4143 // beq cr0,L6
4144 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
4145 getI32Imm(Imm >> 16, dl)), 0);
4146 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
4147 getI32Imm(Imm & 0xFFFF, dl)), 0);
4148 }
4149 Opc = PPC::CMPLW;
4150 } else if (ISD::isUnsignedIntSetCC(CC)) {
4151 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
4152 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4153 getI32Imm(Imm & 0xFFFF, dl)), 0);
4154 Opc = PPC::CMPLW;
4155 } else {
4156 int16_t SImm;
4157 if (isIntS16Immediate(RHS, SImm))
4158 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4159 getI32Imm((int)SImm & 0xFFFF,
4160 dl)),
4161 0);
4162 Opc = PPC::CMPW;
4163 }
4164 } else if (LHS.getValueType() == MVT::i64) {
4165 uint64_t Imm;
4166 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4167 if (isInt64Immediate(RHS.getNode(), Imm)) {
4168 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4169 if (isUInt<16>(Imm))
4170 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4171 getI32Imm(Imm & 0xFFFF, dl)),
4172 0);
4173 // If this is a 16-bit signed immediate, fold it.
4174 if (isInt<16>(Imm))
4175 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4176 getI32Imm(Imm & 0xFFFF, dl)),
4177 0);
4178
4179 // For non-equality comparisons, the default code would materialize the
4180 // constant, then compare against it, like this:
4181 // lis r2, 4660
4182 // ori r2, r2, 22136
4183 // cmpd cr0, r3, r2
4184 // Since we are just comparing for equality, we can emit this instead:
4185 // xoris r0,r3,0x1234
4186 // cmpldi cr0,r0,0x5678
4187 // beq cr0,L6
4188 if (isUInt<32>(Imm)) {
4189 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4190 getI64Imm(Imm >> 16, dl)), 0);
4191 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4192 getI64Imm(Imm & 0xFFFF, dl)),
4193 0);
4194 }
4195 }
4196 Opc = PPC::CMPLD;
4197 } else if (ISD::isUnsignedIntSetCC(CC)) {
4198 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4199 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4200 getI64Imm(Imm & 0xFFFF, dl)), 0);
4201 Opc = PPC::CMPLD;
4202 } else {
4203 int16_t SImm;
4204 if (isIntS16Immediate(RHS, SImm))
4205 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4206 getI64Imm(SImm & 0xFFFF, dl)),
4207 0);
4208 Opc = PPC::CMPD;
4209 }
4210 } else if (LHS.getValueType() == MVT::f32) {
4211 if (Subtarget->hasSPE()) {
4212 switch (CC) {
4213 default:
4214 case ISD::SETEQ:
4215 case ISD::SETNE:
4216 Opc = PPC::EFSCMPEQ;
4217 break;
4218 case ISD::SETLT:
4219 case ISD::SETGE:
4220 case ISD::SETOLT:
4221 case ISD::SETOGE:
4222 case ISD::SETULT:
4223 case ISD::SETUGE:
4224 Opc = PPC::EFSCMPLT;
4225 break;
4226 case ISD::SETGT:
4227 case ISD::SETLE:
4228 case ISD::SETOGT:
4229 case ISD::SETOLE:
4230 case ISD::SETUGT:
4231 case ISD::SETULE:
4232 Opc = PPC::EFSCMPGT;
4233 break;
4234 }
4235 } else
4236 Opc = PPC::FCMPUS;
4237 } else if (LHS.getValueType() == MVT::f64) {
4238 if (Subtarget->hasSPE()) {
4239 switch (CC) {
4240 default:
4241 case ISD::SETEQ:
4242 case ISD::SETNE:
4243 Opc = PPC::EFDCMPEQ;
4244 break;
4245 case ISD::SETLT:
4246 case ISD::SETGE:
4247 case ISD::SETOLT:
4248 case ISD::SETOGE:
4249 case ISD::SETULT:
4250 case ISD::SETUGE:
4251 Opc = PPC::EFDCMPLT;
4252 break;
4253 case ISD::SETGT:
4254 case ISD::SETLE:
4255 case ISD::SETOGT:
4256 case ISD::SETOLE:
4257 case ISD::SETUGT:
4258 case ISD::SETULE:
4259 Opc = PPC::EFDCMPGT;
4260 break;
4261 }
4262 } else
4263 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4264 } else {
4265 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4266 assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4267 Opc = PPC::XSCMPUQP;
4268 }
4269 if (Chain)
4270 return SDValue(
4271 CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4272 0);
4273 else
4274 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4275}
4276
4278 const PPCSubtarget *Subtarget) {
4279 // For SPE instructions, the result is in GT bit of the CR
4280 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4281
4282 switch (CC) {
4283 case ISD::SETUEQ:
4284 case ISD::SETONE:
4285 case ISD::SETOLE:
4286 case ISD::SETOGE:
4287 llvm_unreachable("Should be lowered by legalize!");
4288 default: llvm_unreachable("Unknown condition!");
4289 case ISD::SETOEQ:
4290 case ISD::SETEQ:
4291 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4292 case ISD::SETUNE:
4293 case ISD::SETNE:
4294 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4295 case ISD::SETOLT:
4296 case ISD::SETLT:
4297 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4298 case ISD::SETULE:
4299 case ISD::SETLE:
4300 return PPC::PRED_LE;
4301 case ISD::SETOGT:
4302 case ISD::SETGT:
4303 return PPC::PRED_GT;
4304 case ISD::SETUGE:
4305 case ISD::SETGE:
4306 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4307 case ISD::SETO: return PPC::PRED_NU;
4308 case ISD::SETUO: return PPC::PRED_UN;
4309 // These two are invalid for floating point. Assume we have int.
4310 case ISD::SETULT: return PPC::PRED_LT;
4311 case ISD::SETUGT: return PPC::PRED_GT;
4312 }
4313}
4314
4315/// getCRIdxForSetCC - Return the index of the condition register field
4316/// associated with the SetCC condition, and whether or not the field is
4317/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4318static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4319 Invert = false;
4320 switch (CC) {
4321 default: llvm_unreachable("Unknown condition!");
4322 case ISD::SETOLT:
4323 case ISD::SETLT: return 0; // Bit #0 = SETOLT
4324 case ISD::SETOGT:
4325 case ISD::SETGT: return 1; // Bit #1 = SETOGT
4326 case ISD::SETOEQ:
4327 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4328 case ISD::SETUO: return 3; // Bit #3 = SETUO
4329 case ISD::SETUGE:
4330 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4331 case ISD::SETULE:
4332 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4333 case ISD::SETUNE:
4334 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4335 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4336 case ISD::SETUEQ:
4337 case ISD::SETOGE:
4338 case ISD::SETOLE:
4339 case ISD::SETONE:
4340 llvm_unreachable("Invalid branch code: should be expanded by legalize");
4341 // These are invalid for floating point. Assume integer.
4342 case ISD::SETULT: return 0;
4343 case ISD::SETUGT: return 1;
4344 }
4345}
4346
4347// getVCmpInst: return the vector compare instruction for the specified
4348// vector type and condition code. Since this is for altivec specific code,
4349// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4350// and v4f32).
4351static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4352 bool HasVSX, bool &Swap, bool &Negate) {
4353 Swap = false;
4354 Negate = false;
4355
4356 if (VecVT.isFloatingPoint()) {
4357 /* Handle some cases by swapping input operands. */
4358 switch (CC) {
4359 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4360 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4361 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4362 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4363 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4364 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4365 default: break;
4366 }
4367 /* Handle some cases by negating the result. */
4368 switch (CC) {
4369 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4370 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4371 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4372 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4373 default: break;
4374 }
4375 /* We have instructions implementing the remaining cases. */
4376 switch (CC) {
4377 case ISD::SETEQ:
4378 case ISD::SETOEQ:
4379 if (VecVT == MVT::v4f32)
4380 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4381 else if (VecVT == MVT::v2f64)
4382 return PPC::XVCMPEQDP;
4383 break;
4384 case ISD::SETGT:
4385 case ISD::SETOGT:
4386 if (VecVT == MVT::v4f32)
4387 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4388 else if (VecVT == MVT::v2f64)
4389 return PPC::XVCMPGTDP;
4390 break;
4391 case ISD::SETGE:
4392 case ISD::SETOGE:
4393 if (VecVT == MVT::v4f32)
4394 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4395 else if (VecVT == MVT::v2f64)
4396 return PPC::XVCMPGEDP;
4397 break;
4398 default:
4399 break;
4400 }
4401 llvm_unreachable("Invalid floating-point vector compare condition");
4402 } else {
4403 /* Handle some cases by swapping input operands. */
4404 switch (CC) {
4405 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4406 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4407 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4408 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4409 default: break;
4410 }
4411 /* Handle some cases by negating the result. */
4412 switch (CC) {
4413 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4414 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4415 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4416 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4417 default: break;
4418 }
4419 /* We have instructions implementing the remaining cases. */
4420 switch (CC) {
4421 case ISD::SETEQ:
4422 case ISD::SETUEQ:
4423 if (VecVT == MVT::v16i8)
4424 return PPC::VCMPEQUB;
4425 else if (VecVT == MVT::v8i16)
4426 return PPC::VCMPEQUH;
4427 else if (VecVT == MVT::v4i32)
4428 return PPC::VCMPEQUW;
4429 else if (VecVT == MVT::v2i64)
4430 return PPC::VCMPEQUD;
4431 else if (VecVT == MVT::v1i128)
4432 return PPC::VCMPEQUQ;
4433 break;
4434 case ISD::SETGT:
4435 if (VecVT == MVT::v16i8)
4436 return PPC::VCMPGTSB;
4437 else if (VecVT == MVT::v8i16)
4438 return PPC::VCMPGTSH;
4439 else if (VecVT == MVT::v4i32)
4440 return PPC::VCMPGTSW;
4441 else if (VecVT == MVT::v2i64)
4442 return PPC::VCMPGTSD;
4443 else if (VecVT == MVT::v1i128)
4444 return PPC::VCMPGTSQ;
4445 break;
4446 case ISD::SETUGT:
4447 if (VecVT == MVT::v16i8)
4448 return PPC::VCMPGTUB;
4449 else if (VecVT == MVT::v8i16)
4450 return PPC::VCMPGTUH;
4451 else if (VecVT == MVT::v4i32)
4452 return PPC::VCMPGTUW;
4453 else if (VecVT == MVT::v2i64)
4454 return PPC::VCMPGTUD;
4455 else if (VecVT == MVT::v1i128)
4456 return PPC::VCMPGTUQ;
4457 break;
4458 default:
4459 break;
4460 }
4461 llvm_unreachable("Invalid integer vector compare condition");
4462 }
4463}
4464
4465bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4466 SDLoc dl(N);
4467 unsigned Imm;
4468 bool IsStrict = N->isStrictFPOpcode();
4470 cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4471 EVT PtrVT =
4473 bool isPPC64 = (PtrVT == MVT::i64);
4474 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4475
4476 SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4477 SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4478
4479 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4480 // We can codegen setcc op, imm very efficiently compared to a brcond.
4481 // Check for those cases here.
4482 // setcc op, 0
4483 if (Imm == 0) {
4484 SDValue Op = LHS;
4485 switch (CC) {
4486 default: break;
4487 case ISD::SETEQ: {
4488 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4489 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4490 getI32Imm(31, dl) };
4491 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4492 return true;
4493 }
4494 case ISD::SETNE: {
4495 if (isPPC64) break;
4496 SDValue AD =
4497 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4498 Op, getI32Imm(~0U, dl)), 0);
4499 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4500 return true;
4501 }
4502 case ISD::SETLT: {
4503 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4504 getI32Imm(31, dl) };
4505 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4506 return true;
4507 }
4508 case ISD::SETGT: {
4509 SDValue T =
4510 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4511 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4512 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4513 getI32Imm(31, dl) };
4514 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4515 return true;
4516 }
4517 }
4518 } else if (Imm == ~0U) { // setcc op, -1
4519 SDValue Op = LHS;
4520 switch (CC) {
4521 default: break;
4522 case ISD::SETEQ:
4523 if (isPPC64) break;
4524 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4525 Op, getI32Imm(1, dl)), 0);
4526 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4527 SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4528 MVT::i32,
4529 getI32Imm(0, dl)),
4530 0), Op.getValue(1));
4531 return true;
4532 case ISD::SETNE: {
4533 if (isPPC64) break;
4534 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4535 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4536 Op, getI32Imm(~0U, dl));
4537 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4538 SDValue(AD, 1));
4539 return true;
4540 }
4541 case ISD::SETLT: {
4542 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4543 getI32Imm(1, dl)), 0);
4544 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4545 Op), 0);
4546 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4547 getI32Imm(31, dl) };
4548 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4549 return true;
4550 }
4551 case ISD::SETGT: {
4552 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4553 getI32Imm(31, dl) };
4554 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4555 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4556 return true;
4557 }
4558 }
4559 }
4560 }
4561
4562 // Altivec Vector compare instructions do not set any CR register by default and
4563 // vector compare operations return the same type as the operands.
4564 if (!IsStrict && LHS.getValueType().isVector()) {
4565 if (Subtarget->hasSPE())
4566 return false;
4567
4568 EVT VecVT = LHS.getValueType();
4569 bool Swap, Negate;
4570 unsigned int VCmpInst =
4571 getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4572 if (Swap)
4573 std::swap(LHS, RHS);
4574
4575 EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4576 if (Negate) {
4577 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4578 CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4579 ResVT, VCmp, VCmp);
4580 return true;
4581 }
4582
4583 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4584 return true;
4585 }
4586
4587 if (Subtarget->useCRBits())
4588 return false;
4589
4590 bool Inv;
4591 unsigned Idx = getCRIdxForSetCC(CC, Inv);
4592 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4593 if (IsStrict)
4594 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4595 SDValue IntCR;
4596
4597 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4598 // The correct compare instruction is already set by SelectCC()
4599 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4600 Idx = 1;
4601 }
4602
4603 // Force the ccreg into CR7.
4604 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4605
4606 SDValue InGlue; // Null incoming flag value.
4607 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4608 InGlue).getValue(1);
4609
4610 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4611 CCReg), 0);
4612
4613 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4614 getI32Imm(31, dl), getI32Imm(31, dl) };
4615 if (!Inv) {
4616 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4617 return true;
4618 }
4619
4620 // Get the specified bit.
4621 SDValue Tmp =
4622 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4623 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4624 return true;
4625}
4626
4627/// Does this node represent a load/store node whose address can be represented
4628/// with a register plus an immediate that's a multiple of \p Val:
4629bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4630 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4631 StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4632 MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4633 SDValue AddrOp;
4634 if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4635 AddrOp = N->getOperand(1);
4636 else if (STN)
4637 AddrOp = STN->getOperand(2);
4638
4639 // If the address points a frame object or a frame object with an offset,
4640 // we need to check the object alignment.
4641 short Imm = 0;
4642 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4643 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4644 AddrOp)) {
4645 // If op0 is a frame index that is under aligned, we can't do it either,
4646 // because it is translated to r31 or r1 + slot + offset. We won't know the
4647 // slot number until the stack frame is finalized.
4648 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4649 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4650 if ((SlotAlign % Val) != 0)
4651 return false;
4652
4653 // If we have an offset, we need further check on the offset.
4654 if (AddrOp.getOpcode() != ISD::ADD)
4655 return true;
4656 }
4657
4658 if (AddrOp.getOpcode() == ISD::ADD)
4659 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4660
4661 // If the address comes from the outside, the offset will be zero.
4662 return AddrOp.getOpcode() == ISD::CopyFromReg;
4663}
4664
4665void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4666 // Transfer memoperands.
4667 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4668 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4669}
4670
4672 bool &NeedSwapOps, bool &IsUnCmp) {
4673
4674 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4675
4676 SDValue LHS = N->getOperand(0);
4677 SDValue RHS = N->getOperand(1);
4678 SDValue TrueRes = N->getOperand(2);
4679 SDValue FalseRes = N->getOperand(3);
4680 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4681 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4682 N->getSimpleValueType(0) != MVT::i32))
4683 return false;
4684
4685 // We are looking for any of:
4686 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4687 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4688 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4689 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4690 int64_t TrueResVal = TrueConst->getSExtValue();
4691 if ((TrueResVal < -1 || TrueResVal > 1) ||
4692 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4693 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4694 (TrueResVal == 0 &&
4695 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4696 return false;
4697
4698 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4699 ? FalseRes
4700 : FalseRes.getOperand(0);
4701 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4702 if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4703 SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4704 return false;
4705
4706 // Without this setb optimization, the outer SELECT_CC will be manually
4707 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4708 // transforms pseudo instruction to isel instruction. When there are more than
4709 // one use for result like zext/sext, with current optimization we only see
4710 // isel is replaced by setb but can't see any significant gain. Since
4711 // setb has longer latency than original isel, we should avoid this. Another
4712 // point is that setb requires comparison always kept, it can break the
4713 // opportunity to get the comparison away if we have in future.
4714 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4715 return false;
4716
4717 SDValue InnerLHS = SetOrSelCC.getOperand(0);
4718 SDValue InnerRHS = SetOrSelCC.getOperand(1);
4719 ISD::CondCode InnerCC =
4720 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4721 // If the inner comparison is a select_cc, make sure the true/false values are
4722 // 1/-1 and canonicalize it if needed.
4723 if (InnerIsSel) {
4724 ConstantSDNode *SelCCTrueConst =
4725 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4726 ConstantSDNode *SelCCFalseConst =
4727 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4728 if (!SelCCTrueConst || !SelCCFalseConst)
4729 return false;
4730 int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4731 int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4732 // The values must be -1/1 (requiring a swap) or 1/-1.
4733 if (SelCCTVal == -1 && SelCCFVal == 1) {
4734 std::swap(InnerLHS, InnerRHS);
4735 } else if (SelCCTVal != 1 || SelCCFVal != -1)
4736 return false;
4737 }
4738
4739 // Canonicalize unsigned case
4740 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4741 IsUnCmp = true;
4742 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4743 }
4744
4745 bool InnerSwapped = false;
4746 if (LHS == InnerRHS && RHS == InnerLHS)
4747 InnerSwapped = true;
4748 else if (LHS != InnerLHS || RHS != InnerRHS)
4749 return false;
4750
4751 switch (CC) {
4752 // (select_cc lhs, rhs, 0, \
4753 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4754 case ISD::SETEQ:
4755 if (!InnerIsSel)
4756 return false;
4757 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4758 return false;
4759 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4760 break;
4761
4762 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4763 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4764 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4765 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4766 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4767 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4768 case ISD::SETULT:
4769 if (!IsUnCmp && InnerCC != ISD::SETNE)
4770 return false;
4771 IsUnCmp = true;
4772 [[fallthrough]];
4773 case ISD::SETLT:
4774 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4775 (InnerCC == ISD::SETLT && InnerSwapped))
4776 NeedSwapOps = (TrueResVal == 1);
4777 else
4778 return false;
4779 break;
4780
4781 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4782 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4783 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4784 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4785 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4786 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4787 case ISD::SETUGT:
4788 if (!IsUnCmp && InnerCC != ISD::SETNE)
4789 return false;
4790 IsUnCmp = true;
4791 [[fallthrough]];
4792 case ISD::SETGT:
4793 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4794 (InnerCC == ISD::SETGT && InnerSwapped))
4795 NeedSwapOps = (TrueResVal == -1);
4796 else
4797 return false;
4798 break;
4799
4800 default:
4801 return false;
4802 }
4803
4804 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4805 LLVM_DEBUG(N->dump());
4806
4807 return true;
4808}
4809
4810// Return true if it's a software square-root/divide operand.
4811static bool isSWTestOp(SDValue N) {
4812 if (N.getOpcode() == PPCISD::FTSQRT)
4813 return true;
4814 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
4815 N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4816 return false;
4817 switch (N.getConstantOperandVal(0)) {
4818 case Intrinsic::ppc_vsx_xvtdivdp:
4819 case Intrinsic::ppc_vsx_xvtdivsp:
4820 case Intrinsic::ppc_vsx_xvtsqrtdp:
4821 case Intrinsic::ppc_vsx_xvtsqrtsp:
4822 return true;
4823 }
4824 return false;
4825}
4826
4827bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4828 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4829 // We are looking for following patterns, where `truncate to i1` actually has
4830 // the same semantic with `and 1`.
4831 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4832 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4833 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4834 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4835 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4836 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4837 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4838 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4839 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4840 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4841 return false;
4842
4843 SDValue CmpRHS = N->getOperand(3);
4844 if (!isNullConstant(CmpRHS))
4845 return false;
4846
4847 SDValue CmpLHS = N->getOperand(2);
4848 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4849 return false;
4850
4851 unsigned PCC = 0;
4852 bool IsCCNE = CC == ISD::SETNE;
4853 if (CmpLHS.getOpcode() == ISD::AND &&
4854 isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4855 switch (CmpLHS.getConstantOperandVal(1)) {
4856 case 1:
4857 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4858 break;
4859 case 2:
4860 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4861 break;
4862 case 4:
4863 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4864 break;
4865 case 8:
4866 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4867 break;
4868 default:
4869 return false;
4870 }
4871 else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4872 CmpLHS.getValueType() == MVT::i1)
4873 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4874
4875 if (PCC) {
4876 SDLoc dl(N);
4877 SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4878 N->getOperand(0)};
4879 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4880 return true;
4881 }
4882 return false;
4883}
4884
4885bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4886 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4887 // value, for example when crbits is disabled. If so, select the
4888 // loop_decrement intrinsics now.
4889 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4890 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
4891
4892 if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||
4893 isNullConstant(LHS.getOperand(1)))
4894 return false;
4895
4896 if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4897 LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement)
4898 return false;
4899
4900 if (!isa<ConstantSDNode>(RHS))
4901 return false;
4902
4903 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
4904 "Counter decrement comparison is not EQ or NE");
4905
4906 SDValue OldDecrement = LHS.getOperand(0);
4907 assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4908
4909 SDLoc DecrementLoc(OldDecrement);
4910 SDValue ChainInput = OldDecrement.getOperand(0);
4911 SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)
4912 : getI32Imm(1, DecrementLoc)};
4913 unsigned DecrementOpcode =
4914 Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4915 SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
4916 MVT::i1, DecrementOps);
4917
4918 unsigned Val = RHS->getAsZExtVal();
4919 bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
4920 unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4921
4922 ReplaceUses(LHS.getValue(0), LHS.getOperand(1));
4923 CurDAG->RemoveDeadNode(LHS.getNode());
4924
4925 // Mark the old loop_decrement intrinsic as dead.
4926 ReplaceUses(OldDecrement.getValue(1), ChainInput);
4927 CurDAG->RemoveDeadNode(OldDecrement.getNode());
4928
4929 SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
4930 ChainInput, N->getOperand(0));
4931
4932 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),
4933 N->getOperand(4), Chain);
4934 return true;
4935}
4936
4937bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4938 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4939 unsigned Imm;
4940 if (!isInt32Immediate(N->getOperand(1), Imm))
4941 return false;
4942
4943 SDLoc dl(N);
4944 SDValue Val = N->getOperand(0);
4945 unsigned SH, MB, ME;
4946 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4947 // with a mask, emit rlwinm
4948 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4949 Val = Val.getOperand(0);
4950 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4951 getI32Imm(ME, dl)};
4952 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4953 return true;
4954 }
4955
4956 // If this is just a masked value where the input is not handled, and
4957 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4958 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4959 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4960 getI32Imm(ME, dl)};
4961 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4962 return true;
4963 }
4964
4965 // AND X, 0 -> 0, not "rlwinm 32".
4966 if (Imm == 0) {
4967 ReplaceUses(SDValue(N, 0), N->getOperand(1));
4968 return true;
4969 }
4970
4971 return false;
4972}
4973
4974bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4975 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4977 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4978 return false;
4979
4980 unsigned MB, ME;
4981 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
4982 // MB ME
4983 // +----------------------+
4984 // |xxxxxxxxxxx00011111000|
4985 // +----------------------+
4986 // 0 32 64
4987 // We can only do it if the MB is larger than 32 and MB <= ME
4988 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
4989 // we didn't rotate it.
4990 SDLoc dl(N);
4991 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
4992 getI64Imm(ME - 32, dl)};
4993 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
4994 return true;
4995 }
4996
4997 return false;
4998}
4999
5000bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5001 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5003 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5004 return false;
5005
5006 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
5007 // it well with "andi.".
5008 if (isUInt<16>(Imm64))
5009 return false;
5010
5011 SDLoc Loc(N);
5012 SDValue Val = N->getOperand(0);
5013
5014 // Optimized with two rldicl's as follows:
5015 // Add missing bits on left to the mask and check that the mask is a
5016 // wrapped run of ones, i.e.
5017 // Change pattern |0001111100000011111111|
5018 // to |1111111100000011111111|.
5019 unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);
5020 if (NumOfLeadingZeros != 0)
5021 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
5022
5023 unsigned MB, ME;
5024 if (!isRunOfOnes64(Imm64, MB, ME))
5025 return false;
5026
5027 // ME MB MB-ME+63
5028 // +----------------------+ +----------------------+
5029 // |1111111100000011111111| -> |0000001111111111111111|
5030 // +----------------------+ +----------------------+
5031 // 0 63 0 63
5032 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5033 unsigned OnesOnLeft = ME + 1;
5034 unsigned ZerosInBetween = (MB - ME + 63) & 63;
5035 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5036 // on the left the bits that are already zeros in the mask.
5037 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
5038 getI64Imm(OnesOnLeft, Loc),
5039 getI64Imm(ZerosInBetween, Loc)),
5040 0);
5041 // MB-ME+63 ME MB
5042 // +----------------------+ +----------------------+
5043 // |0000001111111111111111| -> |0001111100000011111111|
5044 // +----------------------+ +----------------------+
5045 // 0 63 0 63
5046 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5047 // left the number of ones we previously added.
5048 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
5049 getI64Imm(NumOfLeadingZeros, Loc)};
5050 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5051 return true;
5052}
5053
5054bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5055 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5056 unsigned Imm;
5057 if (!isInt32Immediate(N->getOperand(1), Imm))
5058 return false;
5059
5060 SDValue Val = N->getOperand(0);
5061 unsigned Imm2;
5062 // ISD::OR doesn't get all the bitfield insertion fun.
5063 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5064 // bitfield insert.
5065 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
5066 return false;
5067
5068 // The idea here is to check whether this is equivalent to:
5069 // (c1 & m) | (x & ~m)
5070 // where m is a run-of-ones mask. The logic here is that, for each bit in
5071 // c1 and c2:
5072 // - if both are 1, then the output will be 1.
5073 // - if both are 0, then the output will be 0.
5074 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5075 // come from x.
5076 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5077 // be 0.
5078 // If that last condition is never the case, then we can form m from the
5079 // bits that are the same between c1 and c2.
5080 unsigned MB, ME;
5081 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5082 SDLoc dl(N);
5083 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
5084 getI32Imm(MB, dl), getI32Imm(ME, dl)};
5085 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
5086 return true;
5087 }
5088
5089 return false;
5090}
5091
5092bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5093 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5094
5096 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5097 return false;
5098
5099 SDValue Val = N->getOperand(0);
5100
5101 if (Val.getOpcode() != ISD::ROTL)
5102 return false;
5103
5104 // Looking to try to avoid a situation like this one:
5105 // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5106 // %and1 = and i64 %2, 9223372036854775807
5107 // In this function we are looking to try to match RLDCL. However, the above
5108 // DAG would better match RLDICL instead which is not what we are looking
5109 // for here.
5110 SDValue RotateAmt = Val.getOperand(1);
5111 if (RotateAmt.getOpcode() == ISD::Constant)
5112 return false;
5113
5114 unsigned MB = 64 - llvm::countr_one(Imm64);
5115 SDLoc dl(N);
5116 SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
5117 CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5118 return true;
5119}
5120
5121bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5122 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5124 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5125 return false;
5126
5127 // If this is a 64-bit zero-extension mask, emit rldicl.
5128 unsigned MB = 64 - llvm::countr_one(Imm64);
5129 unsigned SH = 0;
5130 unsigned Imm;
5131 SDValue Val = N->getOperand(0);
5132 SDLoc dl(N);
5133
5134 if (Val.getOpcode() == ISD::ANY_EXTEND) {
5135 auto Op0 = Val.getOperand(0);
5136 if (Op0.getOpcode() == ISD::SRL &&
5137 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
5138
5139 auto ResultType = Val.getNode()->getValueType(0);
5140 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
5141 SDValue IDVal(ImDef, 0);
5142
5143 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
5144 IDVal, Op0.getOperand(0),
5145 getI32Imm(1, dl)),
5146 0);
5147 SH = 64 - Imm;
5148 }
5149 }
5150
5151 // If the operand is a logical right shift, we can fold it into this
5152 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5153 // for n <= mb. The right shift is really a left rotate followed by a
5154 // mask, and this mask is a more-restrictive sub-mask of the mask implied
5155 // by the shift.
5156 if (Val.getOpcode() == ISD::SRL &&
5157 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
5158 assert(Imm < 64 && "Illegal shift amount");
5159 Val = Val.getOperand(0);
5160 SH = 64 - Imm;
5161 }
5162
5163 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
5164 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5165 return true;
5166}
5167
5168bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5169 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5171 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5172 !isMask_64(~Imm64))
5173 return false;
5174
5175 // If this is a negated 64-bit zero-extension mask,
5176 // i.e. the immediate is a sequence of ones from most significant side
5177 // and all zero for reminder, we should use rldicr.
5178 unsigned MB = 63 - llvm::countr_one(~Imm64);
5179 unsigned SH = 0;
5180 SDLoc dl(N);
5181 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
5182 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5183 return true;
5184}
5185
5186bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5187 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5189 unsigned MB, ME;
5190 SDValue N0 = N->getOperand(0);
5191
5192 // We won't get fewer instructions if the imm is 32-bit integer.
5193 // rldimi requires the imm to have consecutive ones with both sides zero.
5194 // Also, make sure the first Op has only one use, otherwise this may increase
5195 // register pressure since rldimi is destructive.
5196 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5197 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
5198 return false;
5199
5200 unsigned SH = 63 - ME;
5201 SDLoc Dl(N);
5202 // Use select64Imm for making LI instr instead of directly putting Imm64
5203 SDValue Ops[] = {
5204 N->getOperand(0),
5205 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
5206 getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
5207 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
5208 return true;
5209}
5210
5211// Select - Convert the specified operand from a target-independent to a
5212// target-specific node if it hasn't already been changed.
5213void PPCDAGToDAGISel::Select(SDNode *N) {
5214 SDLoc dl(N);
5215 if (N->isMachineOpcode()) {
5216 N->setNodeId(-1);
5217 return; // Already selected.
5218 }
5219
5220 // In case any misguided DAG-level optimizations form an ADD with a
5221 // TargetConstant operand, crash here instead of miscompiling (by selecting
5222 // an r+r add instead of some kind of r+i add).
5223 if (N->getOpcode() == ISD::ADD &&
5224 N->getOperand(1).getOpcode() == ISD::TargetConstant)
5225 llvm_unreachable("Invalid ADD with TargetConstant operand");
5226
5227 // Try matching complex bit permutations before doing anything else.
5228 if (tryBitPermutation(N))
5229 return;
5230
5231 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5232 if (tryIntCompareInGPR(N))
5233 return;
5234
5235 switch (N->getOpcode()) {
5236 default: break;
5237
5238 case ISD::Constant:
5239 if (N->getValueType(0) == MVT::i64) {
5240 ReplaceNode(N, selectI64Imm(CurDAG, N));
5241 return;
5242 }
5243 break;
5244
5245 case ISD::INTRINSIC_VOID: {
5246 auto IntrinsicID = N->getConstantOperandVal(1);
5247 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5248 IntrinsicID != Intrinsic::ppc_trapd &&
5249 IntrinsicID != Intrinsic::ppc_trap)
5250 break;
5251 unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
5252 IntrinsicID == Intrinsic::ppc_trapd)
5253 ? PPC::TDI
5254 : PPC::TWI;
5255 SmallVector<SDValue, 4> OpsWithMD;
5256 unsigned MDIndex;
5257 if (IntrinsicID == Intrinsic::ppc_tdw ||
5258 IntrinsicID == Intrinsic::ppc_tw) {
5259 SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
5260 int16_t SImmOperand2;
5261 int16_t SImmOperand3;
5262 int16_t SImmOperand4;
5263 bool isOperand2IntS16Immediate =
5264 isIntS16Immediate(N->getOperand(2), SImmOperand2);
5265 bool isOperand3IntS16Immediate =
5266 isIntS16Immediate(N->getOperand(3), SImmOperand3);
5267 // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5268 // reg or imm + imm. The imm + imm form will be optimized to either an
5269 // unconditional trap or a nop in a later pass.
5270 if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5271 Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5272 else if (isOperand3IntS16Immediate)
5273 // The 2nd and 3rd operands are reg + imm.
5274 Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
5275 else {
5276 // The 2nd and 3rd operands are imm + reg.
5277 bool isOperand4IntS16Immediate =
5278 isIntS16Immediate(N->getOperand(4), SImmOperand4);
5279 (void)isOperand4IntS16Immediate;
5280 assert(isOperand4IntS16Immediate &&
5281 "The 4th operand is not an Immediate");
5282 // We need to flip the condition immediate TO.
5283 int16_t TO = int(SImmOperand4) & 0x1F;
5284 // We swap the first and second bit of TO if they are not same.
5285 if ((TO & 0x1) != ((TO & 0x2) >> 1))
5286 TO = (TO & 0x1) ? TO + 1 : TO - 1;
5287 // We swap the fourth and fifth bit of TO if they are not same.
5288 if ((TO & 0x8) != ((TO & 0x10) >> 1))
5289 TO = (TO & 0x8) ? TO + 8 : TO - 8;
5290 Ops[0] = getI32Imm(TO, dl);
5291 Ops[1] = N->getOperand(3);
5292 Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
5293 }
5294 OpsWithMD = {Ops[0], Ops[1], Ops[2]};
5295 MDIndex = 5;
5296 } else {
5297 OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};
5298 MDIndex = 3;
5299 }
5300
5301 if (N->getNumOperands() > MDIndex) {
5302 SDValue MDV = N->getOperand(MDIndex);
5303 const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
5304 assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
5305 assert((isa<MDString>(MD->getOperand(0)) && cast<MDString>(
5306 MD->getOperand(0))->getString().equals("ppc-trap-reason"))
5307 && "Unsupported annotation data type!");
5308 for (unsigned i = 1; i < MD->getNumOperands(); i++) {
5309 assert(isa<MDString>(MD->getOperand(i)) &&
5310 "Invalid data type for annotation ppc-trap-reason!");
5311 OpsWithMD.push_back(
5312 getI32Imm(std::stoi(cast<MDString>(
5313 MD->getOperand(i))->getString().str()), dl));
5314 }
5315 }
5316 OpsWithMD.push_back(N->getOperand(0)); // chain
5317 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
5318 return;
5319 }
5320
5322 // We emit the PPC::FSELS instruction here because of type conflicts with
5323 // the comparison operand. The FSELS instruction is defined to use an 8-byte
5324 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5325 // value for the comparison. When selecting through a .td file, a type
5326 // error is raised. Must check this first so we never break on the
5327 // !Subtarget->isISA3_1() check.
5328 auto IntID = N->getConstantOperandVal(0);
5329 if (IntID == Intrinsic::ppc_fsels) {
5330 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
5331 CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5332 return;
5333 }
5334
5335 if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5336 auto Pred = N->getConstantOperandVal(1);
5337 unsigned Opcode =
5338 IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5339 unsigned SubReg = 0;
5340 unsigned ShiftVal = 0;
5341 bool Reverse = false;
5342 switch (Pred) {
5343 case 0:
5344 SubReg = PPC::sub_eq;
5345 ShiftVal = 1;
5346 break;
5347 case 1:
5348 SubReg = PPC::sub_eq;
5349 ShiftVal = 1;
5350 Reverse = true;
5351 break;
5352 case 2:
5353 SubReg = PPC::sub_lt;
5354 ShiftVal = 3;
5355 break;
5356 case 3:
5357 SubReg = PPC::sub_lt;
5358 ShiftVal = 3;
5359 Reverse = true;
5360 break;
5361 case 4:
5362 SubReg = PPC::sub_gt;
5363 ShiftVal = 2;
5364 break;
5365 case 5:
5366 SubReg = PPC::sub_gt;
5367 ShiftVal = 2;
5368 Reverse = true;
5369 break;
5370 case 6:
5371 SubReg = PPC::sub_un;
5372 break;
5373 case 7:
5374 SubReg = PPC::sub_un;
5375 Reverse = true;
5376 break;
5377 }
5378
5379 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5380 SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
5381 CurDAG->getTargetConstant(0, dl, MVT::i32)};
5382 SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
5383 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5384 // On Power10, we can use SETBC[R]. On prior architectures, we have to use
5385 // MFOCRF and shift/negate the value.
5386 if (Subtarget->isISA3_1()) {
5387 SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5388 SDValue CRBit = SDValue(
5389 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5390 CR6Reg, SubRegIdx, BCDOp.getValue(1)),
5391 0);
5392 CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5393 CRBit);
5394 } else {
5395 SDValue Move =
5396 SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5397 BCDOp.getValue(1)),
5398 0);
5399 SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
5400 getI32Imm(31, dl), getI32Imm(31, dl)};
5401 if (!Reverse)
5402 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5403 else {
5404 SDValue Shift = SDValue(
5405 CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
5406 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
5407 }
5408 }
5409 return;
5410 }
5411
5412 if (!Subtarget->isISA3_1())
5413 break;
5414 unsigned Opcode = 0;
5415 switch (IntID) {
5416 default:
5417 break;
5418 case Intrinsic::ppc_altivec_vstribr_p:
5419 Opcode = PPC::VSTRIBR_rec;
5420 break;
5421 case Intrinsic::ppc_altivec_vstribl_p:
5422 Opcode = PPC::VSTRIBL_rec;
5423 break;
5424 case Intrinsic::ppc_altivec_vstrihr_p:
5425 Opcode = PPC::VSTRIHR_rec;
5426 break;
5427 case Intrinsic::ppc_altivec_vstrihl_p:
5428 Opcode = PPC::VSTRIHL_rec;
5429 break;
5430 }
5431 if (!Opcode)
5432 break;
5433
5434 // Generate the appropriate vector string isolate intrinsic to match.
5435 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5436 SDValue VecStrOp =
5437 SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
5438 // Vector string isolate instructions update the EQ bit of CR6.
5439 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5440 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5441 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5442 SDValue CRBit = SDValue(
5443 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5444 CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
5445 0);
5446 CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5447 return;
5448 }
5449
5450 case ISD::SETCC:
5451 case ISD::STRICT_FSETCC:
5453 if (trySETCC(N))
5454 return;
5455 break;
5456 // These nodes will be transformed into GETtlsADDR32 node, which
5457 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5460 const Module *Mod = MF->getFunction().getParent();
5461 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5462 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5464 break;
5465 // Attach global base pointer on GETtlsADDR32 node in order to
5466 // generate secure plt code for TLS symbols.
5467 getGlobalBaseReg();
5468 } break;
5469 case PPCISD::CALL: {
5470 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5471 !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
5472 !Subtarget->isTargetELF())
5473 break;
5474
5475 SDValue Op = N->getOperand(1);
5476
5477 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5478 if (GA->getTargetFlags() == PPCII::MO_PLT)
5479 getGlobalBaseReg();
5480 }
5481 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
5482 if (ES->getTargetFlags() == PPCII::MO_PLT)
5483 getGlobalBaseReg();
5484 }
5485 }
5486 break;
5487
5489 ReplaceNode(N, getGlobalBaseReg());
5490 return;
5491
5492 case ISD::FrameIndex:
5493 selectFrameIndex(N, N);
5494 return;
5495
5496 case PPCISD::MFOCRF: {
5497 SDValue InGlue = N->getOperand(1);
5498 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5499 N->getOperand(0), InGlue));
5500 return;
5501 }
5502
5504 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5505 MVT::Other, N->getOperand(0)));
5506 return;
5507
5508 case PPCISD::SRA_ADDZE: {
5509 SDValue N0 = N->getOperand(0);
5510 SDValue ShiftAmt =
5511 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
5512 getConstantIntValue(), dl,
5513 N->getValueType(0));
5514 if (N->getValueType(0) == MVT::i64) {
5515 SDNode *Op =
5516 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5517 N0, ShiftAmt);
5518 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
5519 SDValue(Op, 1));
5520 return;
5521 } else {
5522 assert(N->getValueType(0) == MVT::i32 &&
5523 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5524 SDNode *Op =
5525 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5526 N0, ShiftAmt);
5527 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
5528 SDValue(Op, 1));
5529 return;
5530 }
5531 }
5532
5533 case ISD::STORE: {
5534 // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5535 // X-form stores.
5536 StoreSDNode *ST = cast<StoreSDNode>(N);
5537 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
5538 ST->getAddressingMode() != ISD::PRE_INC)
5539 if (tryTLSXFormStore(ST))
5540 return;
5541 break;
5542 }
5543 case ISD::LOAD: {
5544 // Handle preincrement loads.
5545 LoadSDNode *LD = cast<LoadSDNode>(N);
5546 EVT LoadedVT = LD->getMemoryVT();
5547
5548 // Normal loads are handled by code generated from the .td file.
5549 if (LD->getAddressingMode() != ISD::PRE_INC) {
5550 // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5551 // X-form loads.
5552 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
5553 if (tryTLSXFormLoad(LD))
5554 return;
5555 break;
5556 }
5557
5558 SDValue Offset = LD->getOffset();
5559 if (Offset.getOpcode() == ISD::TargetConstant ||
5560 Offset.getOpcode() == ISD::TargetGlobalAddress) {
5561
5562 unsigned Opcode;
5563 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5564 if (LD->getValueType(0) != MVT::i64) {
5565 // Handle PPC32 integer and normal FP loads.
5566 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5567 switch (LoadedVT.getSimpleVT().SimpleTy) {
5568 default: llvm_unreachable("Invalid PPC load type!");
5569 case MVT::f64: Opcode = PPC::LFDU; break;
5570 case MVT::f32: Opcode = PPC::LFSU; break;
5571 case MVT::i32: Opcode = PPC::LWZU; break;
5572 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5573 case MVT::i1:
5574 case MVT::i8: Opcode = PPC::LBZU; break;
5575 }
5576 } else {
5577 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5578 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5579 switch (LoadedVT.getSimpleVT().SimpleTy) {
5580 default: llvm_unreachable("Invalid PPC load type!");
5581 case MVT::i64: Opcode = PPC::LDU; break;
5582 case MVT::i32: Opcode = PPC::LWZU8; break;
5583 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5584 case MVT::i1:
5585 case MVT::i8: Opcode = PPC::LBZU8; break;
5586 }
5587 }
5588
5589 SDValue Chain = LD->getChain();
5590 SDValue Base = LD->getBasePtr();
5591 SDValue Ops[] = { Offset, Base, Chain };
5592 SDNode *MN = CurDAG->getMachineNode(
5593 Opcode, dl, LD->getValueType(0),
5594 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5595 transferMemOperands(N, MN);
5596 ReplaceNode(N, MN);
5597 return;
5598 } else {
5599 unsigned Opcode;
5600 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5601 if (LD->getValueType(0) != MVT::i64) {
5602 // Handle PPC32 integer and normal FP loads.
5603 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5604 switch (LoadedVT.getSimpleVT().SimpleTy) {
5605 default: llvm_unreachable("Invalid PPC load type!");
5606 case MVT::f64: Opcode = PPC::LFDUX; break;
5607 case MVT::f32: Opcode = PPC::LFSUX; break;
5608 case MVT::i32: Opcode = PPC::LWZUX; break;
5609 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5610 case MVT::i1:
5611 case MVT::i8: Opcode = PPC::LBZUX; break;
5612 }
5613 } else {
5614 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5615 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
5616 "Invalid sext update load");
5617 switch (LoadedVT.getSimpleVT().SimpleTy) {
5618 default: llvm_unreachable("Invalid PPC load type!");
5619 case MVT::i64: Opcode = PPC::LDUX; break;
5620 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5621 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5622 case MVT::i1:
5623 case MVT::i8: Opcode = PPC::LBZUX8; break;
5624 }
5625 }
5626
5627 SDValue Chain = LD->getChain();
5628 SDValue Base = LD->getBasePtr();
5629 SDValue Ops[] = { Base, Offset, Chain };
5630 SDNode *MN = CurDAG->getMachineNode(
5631 Opcode, dl, LD->getValueType(0),
5632 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5633 transferMemOperands(N, MN);
5634 ReplaceNode(N, MN);
5635 return;
5636 }
5637 }
5638
5639 case ISD::AND:
5640 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5641 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5642 tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5643 tryAsPairOfRLDICL(N))
5644 return;
5645
5646 // Other cases are autogenerated.
5647 break;
5648 case ISD::OR: {
5649 if (N->getValueType(0) == MVT::i32)
5650 if (tryBitfieldInsert(N))
5651 return;
5652
5653 int16_t Imm;
5654 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5655 isIntS16Immediate(N->getOperand(1), Imm)) {
5656 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
5657
5658 // If this is equivalent to an add, then we can fold it with the
5659 // FrameIndex calculation.
5660 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5661 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5662 return;
5663 }
5664 }
5665
5666 // If this is 'or' against an imm with consecutive ones and both sides zero,
5667 // try to emit rldimi
5668 if (tryAsSingleRLDIMI(N))
5669 return;
5670
5671 // OR with a 32-bit immediate can be handled by ori + oris
5672 // without creating an immediate in a GPR.
5673 uint64_t Imm64 = 0;
5674 bool IsPPC64 = Subtarget->isPPC64();
5675 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5676 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5677 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5678 uint64_t ImmHi = Imm64 >> 16;
5679 uint64_t ImmLo = Imm64 & 0xFFFF;
5680 if (ImmHi != 0 && ImmLo != 0) {
5681 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5682 N->getOperand(0),
5683 getI16Imm(ImmLo, dl));
5684 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5685 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5686 return;
5687 }
5688 }
5689
5690 // Other cases are autogenerated.
5691 break;
5692 }
5693 case ISD::XOR: {
5694 // XOR with a 32-bit immediate can be handled by xori + xoris
5695 // without creating an immediate in a GPR.
5696 uint64_t Imm64 = 0;
5697 bool IsPPC64 = Subtarget->isPPC64();
5698 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5699 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5700 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5701 uint64_t ImmHi = Imm64 >> 16;
5702 uint64_t ImmLo = Imm64 & 0xFFFF;
5703 if (ImmHi != 0 && ImmLo != 0) {
5704 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5705 N->getOperand(0),
5706 getI16Imm(ImmLo, dl));
5707 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5708 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5709 return;
5710 }
5711 }
5712
5713 break;
5714 }
5715 case ISD::ADD: {
5716 int16_t Imm;
5717 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5718 isIntS16Immediate(N->getOperand(1), Imm)) {
5719 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5720 return;
5721 }
5722
5723 break;
5724 }
5725 case ISD::SHL: {
5726 unsigned Imm, SH, MB, ME;
5727 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5728 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5729 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5730 getI32Imm(SH, dl), getI32Imm(MB, dl),
5731 getI32Imm(ME, dl) };
5732 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5733 return;
5734 }
5735
5736 // Other cases are autogenerated.
5737 break;
5738 }
5739 case ISD::SRL: {
5740 unsigned Imm, SH, MB, ME;
5741 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5742 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5743 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5744 getI32Imm(SH, dl), getI32Imm(MB, dl),
5745 getI32Imm(ME, dl) };
5746 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5747 return;
5748 }
5749
5750 // Other cases are autogenerated.
5751 break;
5752 }
5753 case ISD::MUL: {
5754 SDValue Op1 = N->getOperand(1);
5755 if (Op1.getOpcode() != ISD::Constant ||
5756 (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5757 break;
5758
5759 // If the multiplier fits int16, we can handle it with mulli.
5760 int64_t Imm = Op1->getAsZExtVal();
5761 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
5762 if (isInt<16>(Imm) || !Shift)
5763 break;
5764
5765 // If the shifted value fits int16, we can do this transformation:
5766 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5767 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5768 uint64_t ImmSh = Imm >> Shift;
5769 if (!isInt<16>(ImmSh))
5770 break;
5771
5772 uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
5773 if (Op1.getValueType() == MVT::i64) {
5774 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5775 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5776 N->getOperand(0), SDImm);
5777
5778 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5779 getI32Imm(63 - Shift, dl)};
5780 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5781 return;
5782 } else {
5783 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
5784 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
5785 N->getOperand(0), SDImm);
5786
5787 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5788 getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
5789 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5790 return;
5791 }
5792 break;
5793 }
5794 // FIXME: Remove this once the ANDI glue bug is fixed:
5797 if (!ANDIGlueBug)
5798 break;
5799
5800 EVT InVT = N->getOperand(0).getValueType();
5801 assert((InVT == MVT::i64 || InVT == MVT::i32) &&
5802 "Invalid input type for ANDI_rec_1_EQ_BIT");
5803
5804 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5805 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5806 N->getOperand(0),
5807 CurDAG->getTargetConstant(1, dl, InVT)),
5808 0);
5809 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5810 SDValue SRIdxVal = CurDAG->getTargetConstant(
5811 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5812 dl, MVT::i32);
5813
5814 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5815 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
5816 return;
5817 }
5818 case ISD::SELECT_CC: {
5819 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5820 EVT PtrVT =
5822 bool isPPC64 = (PtrVT == MVT::i64);
5823
5824 // If this is a select of i1 operands, we'll pattern match it.
5825 if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
5826 break;
5827
5828 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5829 bool NeedSwapOps = false;
5830 bool IsUnCmp = false;
5831 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
5832 SDValue LHS = N->getOperand(0);
5833 SDValue RHS = N->getOperand(1);
5834 if (NeedSwapOps)
5835 std::swap(LHS, RHS);
5836
5837 // Make use of SelectCC to generate the comparison to set CR bits, for
5838 // equality comparisons having one literal operand, SelectCC probably
5839 // doesn't need to materialize the whole literal and just use xoris to
5840 // check it first, it leads the following comparison result can't
5841 // exactly represent GT/LT relationship. So to avoid this we specify
5842 // SETGT/SETUGT here instead of SETEQ.
5843 SDValue GenCC =
5844 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5845 CurDAG->SelectNodeTo(
5846 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5847 N->getValueType(0), GenCC);
5848 NumP9Setb++;
5849 return;
5850 }
5851 }
5852
5853 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5854 if (!isPPC64 && isNullConstant(N->getOperand(1)) &&
5855 isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&
5856 CC == ISD::SETNE &&
5857 // FIXME: Implement this optzn for PPC64.
5858 N->getValueType(0) == MVT::i32) {
5859 SDNode *Tmp =
5860 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5861 N->getOperand(0), getI32Imm(~0U, dl));
5862 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
5863 N->getOperand(0), SDValue(Tmp, 1));
5864 return;
5865 }
5866
5867 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
5868
5869 if (N->getValueType(0) == MVT::i1) {
5870 // An i1 select is: (c & t) | (!c & f).
5871 bool Inv;
5872 unsigned Idx = getCRIdxForSetCC(CC, Inv);
5873
5874 unsigned SRI;
5875 switch (Idx) {
5876 default: llvm_unreachable("Invalid CC index");
5877 case 0: SRI = PPC::sub_lt; break;
5878 case 1: SRI = PPC::sub_gt; break;
5879 case 2: SRI = PPC::sub_eq; break;
5880 case 3: SRI = PPC::sub_un; break;
5881 }
5882
5883 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5884
5885 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5886 CCBit, CCBit), 0);
5887 SDValue C = Inv ? NotCCBit : CCBit,
5888 NotC = Inv ? CCBit : NotCCBit;
5889
5890 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5891 C, N->getOperand(2)), 0);
5892 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5893 NotC, N->getOperand(3)), 0);
5894
5895 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5896 return;
5897 }
5898
5899 unsigned BROpc =
5900 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
5901
5902 unsigned SelectCCOp;
5903 if (N->getValueType(0) == MVT::i32)
5904 SelectCCOp = PPC::SELECT_CC_I4;
5905 else if (N->getValueType(0) == MVT::i64)
5906 SelectCCOp = PPC::SELECT_CC_I8;
5907 else if (N->getValueType(0) == MVT::f32) {
5908 if (Subtarget->hasP8Vector())
5909 SelectCCOp = PPC::SELECT_CC_VSSRC;
5910 else if (Subtarget->hasSPE())
5911 SelectCCOp = PPC::SELECT_CC_SPE4;
5912 else
5913 SelectCCOp = PPC::SELECT_CC_F4;
5914 } else if (N->getValueType(0) == MVT::f64) {
5915 if (Subtarget->hasVSX())
5916 SelectCCOp = PPC::SELECT_CC_VSFRC;
5917 else if (Subtarget->hasSPE())
5918 SelectCCOp = PPC::SELECT_CC_SPE;
5919 else
5920 SelectCCOp = PPC::SELECT_CC_F8;
5921 } else if (N->getValueType(0) == MVT::f128)
5922 SelectCCOp = PPC::SELECT_CC_F16;
5923 else if (Subtarget->hasSPE())
5924 SelectCCOp = PPC::SELECT_CC_SPE;
5925 else if (N->getValueType(0) == MVT::v2f64 ||
5926 N->getValueType(0) == MVT::v2i64)
5927 SelectCCOp = PPC::SELECT_CC_VSRC;
5928 else
5929 SelectCCOp = PPC::SELECT_CC_VRRC;
5930
5931 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
5932 getI32Imm(BROpc, dl) };
5933 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
5934 return;
5935 }
5937 if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
5938 N->getValueType(0) == MVT::v2i64)) {
5939 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5940
5941 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
5942 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
5943 unsigned DM[2];
5944
5945 for (int i = 0; i < 2; ++i)
5946 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5947 DM[i] = 0;
5948 else
5949 DM[i] = 1;
5950
5951 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5953 isa<LoadSDNode>(Op1.getOperand(0))) {
5954 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5956
5957 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5958 (LD->getMemoryVT() == MVT::f64 ||
5959 LD->getMemoryVT() == MVT::i64) &&
5960 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5961 SDValue Chain = LD->getChain();
5962 SDValue Ops[] = { Base, Offset, Chain };
5963 MachineMemOperand *MemOp = LD->getMemOperand();
5964 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
5965 N->getValueType(0), Ops);
5966 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
5967 return;
5968 }
5969 }
5970
5971 // For little endian, we must swap the input operands and adjust
5972 // the mask elements (reverse and invert them).
5973 if (Subtarget->isLittleEndian()) {
5974 std::swap(Op1, Op2);
5975 unsigned tmp = DM[0];
5976 DM[0] = 1 - DM[1];
5977 DM[1] = 1 - tmp;
5978 }
5979
5980 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
5981 MVT::i32);
5982 SDValue Ops[] = { Op1, Op2, DMV };
5983 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
5984 return;
5985 }
5986
5987 break;
5988 case PPCISD::BDNZ:
5989 case PPCISD::BDZ: {
5990 bool IsPPC64 = Subtarget->isPPC64();
5991 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
5992 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
5993 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
5994 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
5995 MVT::Other, Ops);
5996 return;
5997 }
5998 case PPCISD::COND_BRANCH: {
5999 // Op #0 is the Chain.
6000 // Op #1 is the PPC::PRED_* number.
6001 // Op #2 is the CR#
6002 // Op #3 is the Dest MBB
6003 // Op #4 is the Flag.
6004 // Prevent PPC::PRED_* from being selected into LI.
6005 unsigned PCC = N->getConstantOperandVal(1);
6006 if (EnableBranchHint)
6007 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
6008
6009 SDValue Pred = getI32Imm(PCC, dl);
6010 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
6011 N->getOperand(0), N->getOperand(4) };
6012 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6013 return;
6014 }
6015 case ISD::BR_CC: {
6016 if (tryFoldSWTestBRCC(N))
6017 return;
6018 if (trySelectLoopCountIntrinsic(N))
6019 return;
6020 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
6021 unsigned PCC =
6022 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
6023
6024 if (N->getOperand(2).getValueType() == MVT::i1) {
6025 unsigned Opc;
6026 bool Swap;
6027 switch (PCC) {
6028 default: llvm_unreachable("Unexpected Boolean-operand predicate");
6029 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6030 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6031 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6032 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6033 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6034 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6035 }
6036
6037 // A signed comparison of i1 values produces the opposite result to an
6038 // unsigned one if the condition code includes less-than or greater-than.
6039 // This is because 1 is the most negative signed i1 number and the most
6040 // positive unsigned i1 number. The CR-logical operations used for such
6041 // comparisons are non-commutative so for signed comparisons vs. unsigned
6042 // ones, the input operands just need to be swapped.
6044 Swap = !Swap;
6045
6046 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
6047 N->getOperand(Swap ? 3 : 2),
6048 N->getOperand(Swap ? 2 : 3)), 0);
6049 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
6050 N->getOperand(0));
6051 return;
6052 }
6053
6054 if (EnableBranchHint)
6055 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
6056
6057 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
6058 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
6059 N->getOperand(4), N->getOperand(0) };
6060 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6061 return;
6062 }
6063 case ISD::BRIND: {
6064 // FIXME: Should custom lower this.
6065 SDValue Chain = N->getOperand(0);
6066 SDValue Target = N->getOperand(1);
6067 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6068 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6069 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
6070 Chain), 0);
6071 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
6072 return;
6073 }
6074 case PPCISD::TOC_ENTRY: {
6075 const bool isPPC64 = Subtarget->isPPC64();
6076 const bool isELFABI = Subtarget->isSVR4ABI();
6077 const bool isAIXABI = Subtarget->isAIXABI();
6078
6079 // PowerPC only support small, medium and large code model.
6080 const CodeModel::Model CModel = getCodeModel(*Subtarget, TM, N);
6081
6082 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
6083 "PowerPC doesn't support tiny or kernel code models.");
6084
6085 if (isAIXABI && CModel == CodeModel::Medium)
6086 report_fatal_error("Medium code model is not supported on AIX.");
6087
6088 // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6089 // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6090 // small code model, we need to check for a toc-data attribute.
6091 if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6092 break;
6093
6094 auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6095 EVT OperandTy) {
6096 SDValue GA = TocEntry->getOperand(0);
6097 SDValue TocBase = TocEntry->getOperand(1);
6098 SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
6099 transferMemOperands(TocEntry, MN);
6100 ReplaceNode(TocEntry, MN);
6101 };
6102
6103 // Handle 32-bit small code model.
6104 if (!isPPC64 && CModel == CodeModel::Small) {
6105 // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6106 // PPC::ADDItoc, or PPC::LWZtoc
6107 if (isELFABI) {
6108 assert(TM.isPositionIndependent() &&
6109 "32-bit ELF can only have TOC entries in position independent"
6110 " code.");
6111 // 32-bit ELF always uses a small code model toc access.
6112 replaceWith(PPC::LWZtoc, N, MVT::i32);
6113 return;
6114 }
6115
6116 assert(isAIXABI && "ELF ABI already handled");
6117
6118 if (hasTocDataAttr(N->getOperand(0))) {
6119 replaceWith(PPC::ADDItoc, N, MVT::i32);
6120 return;
6121 }
6122
6123 replaceWith(PPC::LWZtoc, N, MVT::i32);
6124 return;
6125 }
6126
6127 if (isPPC64 && CModel == CodeModel::Small) {
6128 assert(isAIXABI && "ELF ABI handled in common SelectCode");
6129
6130 if (hasTocDataAttr(N->getOperand(0))) {
6131 replaceWith(PPC::ADDItoc8, N, MVT::i64);
6132 return;
6133 }
6134 // Break if it doesn't have toc data attribute. Proceed with common
6135 // SelectCode.
6136 break;
6137 }
6138
6139 assert(CModel != CodeModel::Small && "All small code models handled.");
6140
6141 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6142 " ELF/AIX or 32-bit AIX in the following.");
6143
6144 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
6145 // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code non
6146 // toc-data symbols.
6147 // We generate two instructions as described below. The first source
6148 // operand is a symbol reference. If it must be toc-referenced according to
6149 // Subtarget, we generate:
6150 // [32-bit AIX]
6151 // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6152 // [64-bit ELF/AIX]
6153 // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6154 // Otherwise we generate:
6155 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6156
6157 // For large code model toc-data symbols we generate:
6158 // [32-bit AIX]
6159 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
6160 // [64-bit AIX]
6161 // Currently not supported.
6162
6163 SDValue GA = N->getOperand(0);
6164 SDValue TOCbase = N->getOperand(1);
6165
6166 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
6167 SDNode *Tmp = CurDAG->getMachineNode(
6168 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
6169
6170 // On AIX if the symbol has the toc-data attribute it will be defined
6171 // in the TOC entry, so we use an ADDItocL similar to the medium code
6172 // model ELF abi.
6173 if (isAIXABI && hasTocDataAttr(GA)) {
6174 if (isPPC64)
6176 "64-bit large code model toc-data not yet supported");
6177
6178 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, VT,
6179 SDValue(Tmp, 0), GA));
6180 return;
6181 }
6182
6183 if (PPCLowering->isAccessedAsGotIndirect(GA)) {
6184 // If it is accessed as got-indirect, we need an extra LWZ/LD to load
6185 // the address.
6186 SDNode *MN = CurDAG->getMachineNode(
6187 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
6188
6189 transferMemOperands(N, MN);
6190 ReplaceNode(N, MN);
6191 return;
6192 }
6193
6194 // Build the address relative to the TOC-pointer.
6195 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
6196 SDValue(Tmp, 0), GA));
6197 return;
6198 }
6200 // Generate a PIC-safe GOT reference.
6201 assert(Subtarget->is32BitELFABI() &&
6202 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6203 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
6204 PPCLowering->getPointerTy(CurDAG->getDataLayout()),
6205 MVT::i32);
6206 return;
6207
6208 case PPCISD::VADD_SPLAT: {
6209 // This expands into one of three sequences, depending on whether
6210 // the first operand is odd or even, positive or negative.
6211 assert(isa<ConstantSDNode>(N->getOperand(0)) &&
6212 isa<ConstantSDNode>(N->getOperand(1)) &&
6213 "Invalid operand on VADD_SPLAT!");
6214
6215 int Elt = N->getConstantOperandVal(0);
6216 int EltSize = N->getConstantOperandVal(1);
6217 unsigned Opc1, Opc2, Opc3;
6218 EVT VT;
6219
6220 if (EltSize == 1) {
6221 Opc1 = PPC::VSPLTISB;
6222 Opc2 = PPC::VADDUBM;
6223 Opc3 = PPC::VSUBUBM;
6224 VT = MVT::v16i8;
6225 } else if (EltSize == 2) {
6226 Opc1 = PPC::VSPLTISH;
6227 Opc2 = PPC::VADDUHM;
6228 Opc3 = PPC::VSUBUHM;
6229 VT = MVT::v8i16;
6230 } else {
6231 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
6232 Opc1 = PPC::VSPLTISW;
6233 Opc2 = PPC::VADDUWM;
6234 Opc3 = PPC::VSUBUWM;
6235 VT = MVT::v4i32;
6236 }
6237
6238 if ((Elt & 1) == 0) {
6239 // Elt is even, in the range [-32,-18] + [16,30].
6240 //
6241 // Convert: VADD_SPLAT elt, size
6242 // Into: tmp = VSPLTIS[BHW] elt
6243 // VADDU[BHW]M tmp, tmp
6244 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6245 SDValue EltVal = getI32Imm(Elt >> 1, dl);
6246 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6247 SDValue TmpVal = SDValue(Tmp, 0);
6248 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
6249 return;
6250 } else if (Elt > 0) {
6251 // Elt is odd and positive, in the range [17,31].
6252 //
6253 // Convert: VADD_SPLAT elt, size
6254 // Into: tmp1 = VSPLTIS[BHW] elt-16
6255 // tmp2 = VSPLTIS[BHW] -16
6256 // VSUBU[BHW]M tmp1, tmp2
6257 SDValue EltVal = getI32Imm(Elt - 16, dl);
6258 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6259 EltVal = getI32Imm(-16, dl);
6260 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6261 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
6262 SDValue(Tmp2, 0)));
6263 return;
6264 } else {
6265 // Elt is odd and negative, in the range [-31,-17].
6266 //
6267 // Convert: VADD_SPLAT elt, size
6268 // Into: tmp1 = VSPLTIS[BHW] elt+16
6269 // tmp2 = VSPLTIS[BHW] -16
6270 // VADDU[BHW]M tmp1, tmp2
6271 SDValue EltVal = getI32Imm(Elt + 16, dl);
6272 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6273 EltVal = getI32Imm(-16, dl);
6274 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6275 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
6276 SDValue(Tmp2, 0)));
6277 return;
6278 }
6279 }
6280 case PPCISD::LD_SPLAT: {
6281 // Here we want to handle splat load for type v16i8 and v8i16 when there is
6282 // no direct move, we don't need to use stack for this case. If target has
6283 // direct move, we should be able to get the best selection in the .td file.
6284 if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
6285 break;
6286
6287 EVT Type = N->getValueType(0);
6288 if (Type != MVT::v16i8 && Type != MVT::v8i16)
6289 break;
6290
6291 // If the alignment for the load is 16 or bigger, we don't need the
6292 // permutated mask to get the required value. The value must be the 0
6293 // element in big endian target or 7/15 in little endian target in the
6294 // result vsx register of lvx instruction.
6295 // Select the instruction in the .td file.
6296 if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
6297 isOffsetMultipleOf(N, 16))
6298 break;
6299
6300 SDValue ZeroReg =
6301 CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6302 Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
6303 unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6304 // v16i8 LD_SPLAT addr
6305 // ======>
6306 // Mask = LVSR/LVSL 0, addr
6307 // LoadLow = LVX 0, addr
6308 // Perm = VPERM LoadLow, LoadLow, Mask
6309 // Splat = VSPLTB 15/0, Perm
6310 //
6311 // v8i16 LD_SPLAT addr
6312 // ======>
6313 // Mask = LVSR/LVSL 0, addr
6314 // LoadLow = LVX 0, addr
6315 // LoadHigh = LVX (LI, 1), addr
6316 // Perm = VPERM LoadLow, LoadHigh, Mask
6317 // Splat = VSPLTH 7/0, Perm
6318 unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6319 unsigned SplatElemIndex =
6320 Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
6321
6322 SDNode *Mask = CurDAG->getMachineNode(
6323 Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
6324 N->getOperand(1));
6325
6326 SDNode *LoadLow =
6327 CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
6328 {ZeroReg, N->getOperand(1), N->getOperand(0)});
6329
6330 SDNode *LoadHigh = LoadLow;
6331 if (Type == MVT::v8i16) {
6332 LoadHigh = CurDAG->getMachineNode(
6333 PPC::LVX, dl, MVT::v16i8, MVT::Other,
6334 {SDValue(CurDAG->getMachineNode(
6335 LIOpcode, dl, MVT::i32,
6336 CurDAG->getTargetConstant(1, dl, MVT::i8)),
6337 0),
6338 N->getOperand(1), SDValue(LoadLow, 1)});
6339 }
6340
6341 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
6342 transferMemOperands(N, LoadHigh);
6343
6344 SDNode *Perm =
6345 CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
6346 SDValue(LoadHigh, 0), SDValue(Mask, 0));
6347 CurDAG->SelectNodeTo(N, SplatOp, Type,
6348 CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
6349 SDValue(Perm, 0));
6350 return;
6351 }
6352 }
6353
6354 SelectCode(N);
6355}
6356
6357// If the target supports the cmpb instruction, do the idiom recognition here.
6358// We don't do this as a DAG combine because we don't want to do it as nodes
6359// are being combined (because we might miss part of the eventual idiom). We
6360// don't want to do it during instruction selection because we want to reuse
6361// the logic for lowering the masking operations already part of the
6362// instruction selector.
6363SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6364 SDLoc dl(N);
6365
6366 assert(N->getOpcode() == ISD::OR &&
6367 "Only OR nodes are supported for CMPB");
6368
6369 SDValue Res;
6370 if (!Subtarget->hasCMPB())
6371 return Res;
6372
6373 if (N->getValueType(0) != MVT::i32 &&
6374 N->getValueType(0) != MVT::i64)
6375 return Res;
6376
6377 EVT VT = N->getValueType(0);
6378
6379 SDValue RHS, LHS;
6380 bool BytesFound[8] = {false, false, false, false, false, false, false, false};
6381 uint64_t Mask = 0, Alt = 0;
6382
6383 auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6384 uint64_t &Mask, uint64_t &Alt,
6385 SDValue &LHS, SDValue &RHS) {
6386 if (O.getOpcode() != ISD::SELECT_CC)
6387 return false;
6388 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
6389
6390 if (!isa<ConstantSDNode>(O.getOperand(2)) ||
6391 !isa<ConstantSDNode>(O.getOperand(3)))
6392 return false;
6393
6394 uint64_t PM = O.getConstantOperandVal(2);
6395 uint64_t PAlt = O.getConstantOperandVal(3);
6396 for (b = 0; b < 8; ++b) {
6397 uint64_t Mask = UINT64_C(0xFF) << (8*b);
6398 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6399 break;
6400 }
6401
6402 if (b == 8)
6403 return false;
6404 Mask |= PM;
6405 Alt |= PAlt;
6406
6407 if (!isa<ConstantSDNode>(O.getOperand(1)) ||
6408 O.getConstantOperandVal(1) != 0) {
6409 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
6410 if (Op0.getOpcode() == ISD::TRUNCATE)
6411 Op0 = Op0.getOperand(0);
6412 if (Op1.getOpcode() == ISD::TRUNCATE)
6413 Op1 = Op1.getOperand(0);
6414
6415 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6416 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
6417 isa<ConstantSDNode>(Op0.getOperand(1))) {
6418
6419 unsigned Bits = Op0.getValueSizeInBits();
6420 if (b != Bits/8-1)
6421 return false;
6422 if (Op0.getConstantOperandVal(1) != Bits-8)
6423 return false;
6424
6425 LHS = Op0.getOperand(0);
6426 RHS = Op1.getOperand(0);
6427 return true;
6428 }
6429
6430 // When we have small integers (i16 to be specific), the form present
6431 // post-legalization uses SETULT in the SELECT_CC for the
6432 // higher-order byte, depending on the fact that the
6433 // even-higher-order bytes are known to all be zero, for example:
6434 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6435 // (so when the second byte is the same, because all higher-order
6436 // bits from bytes 3 and 4 are known to be zero, the result of the
6437 // xor can be at most 255)
6438 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6439 isa<ConstantSDNode>(O.getOperand(1))) {
6440
6441 uint64_t ULim = O.getConstantOperandVal(1);
6442 if (ULim != (UINT64_C(1) << b*8))
6443 return false;
6444
6445 // Now we need to make sure that the upper bytes are known to be
6446 // zero.
6447 unsigned Bits = Op0.getValueSizeInBits();
6448 if (!CurDAG->MaskedValueIsZero(
6449 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
6450 return false;
6451
6452 LHS = Op0.getOperand(0);
6453 RHS = Op0.getOperand(1);
6454 return true;
6455 }
6456
6457 return false;
6458 }
6459
6460 if (CC != ISD::SETEQ)
6461 return false;
6462
6463 SDValue Op = O.getOperand(0);
6464 if (Op.getOpcode() == ISD::AND) {
6465 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6466 return false;
6467 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
6468 return false;
6469
6470 SDValue XOR = Op.getOperand(0);
6471 if (XOR.getOpcode() == ISD::TRUNCATE)
6472 XOR = XOR.getOperand(0);
6473 if (XOR.getOpcode() != ISD::XOR)
6474 return false;
6475
6476 LHS = XOR.getOperand(0);
6477 RHS = XOR.getOperand(1);
6478 return true;
6479 } else if (Op.getOpcode() == ISD::SRL) {
6480 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6481 return false;
6482 unsigned Bits = Op.getValueSizeInBits();
6483 if (b != Bits/8-1)
6484 return false;
6485 if (Op.getConstantOperandVal(1) != Bits-8)
6486 return false;
6487
6488 SDValue XOR = Op.getOperand(0);
6489 if (XOR.getOpcode() == ISD::TRUNCATE)
6490 XOR = XOR.getOperand(0);
6491 if (XOR.getOpcode() != ISD::XOR)
6492 return false;
6493
6494 LHS = XOR.getOperand(0);
6495 RHS = XOR.getOperand(1);
6496 return true;
6497 }
6498
6499 return false;
6500 };
6501
6503 while (!Queue.empty()) {
6504 SDValue V = Queue.pop_back_val();
6505
6506 for (const SDValue &O : V.getNode()->ops()) {
6507 unsigned b = 0;
6508 uint64_t M = 0, A = 0;
6509 SDValue OLHS, ORHS;
6510 if (O.getOpcode() == ISD::OR) {
6511 Queue.push_back(O);
6512 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
6513 if (!LHS) {
6514 LHS = OLHS;
6515 RHS = ORHS;
6516 BytesFound[b] = true;
6517 Mask |= M;
6518 Alt |= A;
6519 } else if ((LHS == ORHS && RHS == OLHS) ||
6520 (RHS == ORHS && LHS == OLHS)) {
6521 BytesFound[b] = true;
6522 Mask |= M;
6523 Alt |= A;
6524 } else {
6525 return Res;
6526 }
6527 } else {
6528 return Res;
6529 }
6530 }
6531 }
6532
6533 unsigned LastB = 0, BCnt = 0;
6534 for (unsigned i = 0; i < 8; ++i)
6535 if (BytesFound[LastB]) {
6536 ++BCnt;
6537 LastB = i;
6538 }
6539
6540 if (!LastB || BCnt < 2)
6541 return Res;
6542
6543 // Because we'll be zero-extending the output anyway if don't have a specific
6544 // value for each input byte (via the Mask), we can 'anyext' the inputs.
6545 if (LHS.getValueType() != VT) {
6546 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
6547 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
6548 }
6549
6550 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
6551
6552 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
6553 if (NonTrivialMask && !Alt) {
6554 // Res = Mask & CMPB
6555 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6556 CurDAG->getConstant(Mask, dl, VT));
6557 } else if (Alt) {
6558 // Res = (CMPB & Mask) | (~CMPB & Alt)
6559 // Which, as suggested here:
6560 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6561 // can be written as:
6562 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
6563 // useful because the (Alt ^ Mask) can be pre-computed.
6564 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6565 CurDAG->getConstant(Mask ^ Alt, dl, VT));
6566 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
6567 CurDAG->getConstant(Alt, dl, VT));
6568 }
6569
6570 return Res;
6571}
6572
6573// When CR bit registers are enabled, an extension of an i1 variable to a i32
6574// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6575// involves constant materialization of a 0 or a 1 or both. If the result of
6576// the extension is then operated upon by some operator that can be constant
6577// folded with a constant 0 or 1, and that constant can be materialized using
6578// only one instruction (like a zero or one), then we should fold in those
6579// operations with the select.
6580void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6581 if (!Subtarget->useCRBits())
6582 return;
6583
6584 if (N->getOpcode() != ISD::ZERO_EXTEND &&
6585 N->getOpcode() != ISD::SIGN_EXTEND &&
6586 N->getOpcode() != ISD::ANY_EXTEND)
6587 return;
6588
6589 if (N->getOperand(0).getValueType() != MVT::i1)
6590 return;
6591
6592 if (!N->hasOneUse())
6593 return;
6594
6595 SDLoc dl(N);
6596 EVT VT = N->getValueType(0);
6597 SDValue Cond = N->getOperand(0);
6598 SDValue ConstTrue =
6599 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
6600 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
6601
6602 do {
6603 SDNode *User = *N->use_begin();
6604 if (User->getNumOperands() != 2)
6605 break;
6606
6607 auto TryFold = [this, N, User, dl](SDValue Val) {
6608 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
6609 SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6610 SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6611
6612 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
6613 User->getValueType(0), {O0, O1});
6614 };
6615
6616 // FIXME: When the semantics of the interaction between select and undef
6617 // are clearly defined, it may turn out to be unnecessary to break here.
6618 SDValue TrueRes = TryFold(ConstTrue);
6619 if (!TrueRes || TrueRes.isUndef())
6620 break;
6621 SDValue FalseRes = TryFold(ConstFalse);
6622 if (!FalseRes || FalseRes.isUndef())
6623 break;
6624
6625 // For us to materialize these using one instruction, we must be able to
6626 // represent them as signed 16-bit integers.
6627 uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal();
6628 if (!isInt<16>(True) || !isInt<16>(False))
6629 break;
6630
6631 // We can replace User with a new SELECT node, and try again to see if we
6632 // can fold the select with its user.
6633 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
6634 N = User;
6635 ConstTrue = TrueRes;
6636 ConstFalse = FalseRes;
6637 } while (N->hasOneUse());
6638}
6639
6640void PPCDAGToDAGISel::PreprocessISelDAG() {
6641 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6642
6643 bool MadeChange = false;
6644 while (Position != CurDAG->allnodes_begin()) {
6645 SDNode *N = &*--Position;
6646 if (N->use_empty())
6647 continue;
6648
6649 SDValue Res;
6650 switch (N->getOpcode()) {
6651 default: break;
6652 case ISD::OR:
6653 Res = combineToCMPB(N);
6654 break;
6655 }
6656
6657 if (!Res)
6658 foldBoolExts(Res, N);
6659
6660 if (Res) {
6661 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6662 LLVM_DEBUG(N->dump(CurDAG));
6663 LLVM_DEBUG(dbgs() << "\nNew: ");
6664 LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6665 LLVM_DEBUG(dbgs() << "\n");
6666
6667 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
6668 MadeChange = true;
6669 }
6670 }
6671
6672 if (MadeChange)
6673 CurDAG->RemoveDeadNodes();
6674}
6675
6676/// PostprocessISelDAG - Perform some late peephole optimizations
6677/// on the DAG representation.
6678void PPCDAGToDAGISel::PostprocessISelDAG() {
6679 // Skip peepholes at -O0.
6680 if (TM.getOptLevel() == CodeGenOptLevel::None)
6681 return;
6682
6683 PeepholePPC64();
6684 PeepholeCROps();
6685 PeepholePPC64ZExt();
6686}
6687
6688// Check if all users of this node will become isel where the second operand
6689// is the constant zero. If this is so, and if we can negate the condition,
6690// then we can flip the true and false operands. This will allow the zero to
6691// be folded with the isel so that we don't need to materialize a register
6692// containing zero.
6693bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6694 for (const SDNode *User : N->uses()) {
6695 if (!User->isMachineOpcode())
6696 return false;
6697 if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6698 User->getMachineOpcode() != PPC::SELECT_I8)
6699 return false;
6700
6701 SDNode *Op1 = User->getOperand(1).getNode();
6702 SDNode *Op2 = User->getOperand(2).getNode();
6703 // If we have a degenerate select with two equal operands, swapping will
6704 // not do anything, and we may run into an infinite loop.
6705 if (Op1 == Op2)
6706 return false;
6707
6708 if (!Op2->isMachineOpcode())
6709 return false;
6710
6711 if (Op2->getMachineOpcode() != PPC::LI &&
6712 Op2->getMachineOpcode() != PPC::LI8)
6713 return false;
6714
6715 if (!isNullConstant(Op2->getOperand(0)))
6716 return false;
6717 }
6718
6719 return true;
6720}
6721
6722void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6723 SmallVector<SDNode *, 4> ToReplace;
6724 for (SDNode *User : N->uses()) {
6725 assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
6726 User->getMachineOpcode() == PPC::SELECT_I8) &&
6727 "Must have all select users");
6728 ToReplace.push_back(User);
6729 }
6730
6731 for (SDNode *User : ToReplace) {
6732 SDNode *ResNode =
6733 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
6734 User->getValueType(0), User->getOperand(0),
6735 User->getOperand(2),
6736 User->getOperand(1));
6737
6738 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6739 LLVM_DEBUG(User->dump(CurDAG));
6740 LLVM_DEBUG(dbgs() << "\nNew: ");
6741 LLVM_DEBUG(ResNode->dump(CurDAG));
6742 LLVM_DEBUG(dbgs() << "\n");
6743
6744 ReplaceUses(User, ResNode);
6745 }
6746}
6747
6748void PPCDAGToDAGISel::PeepholeCROps() {
6749 bool IsModified;
6750 do {
6751 IsModified = false;
6752 for (SDNode &Node : CurDAG->allnodes()) {
6753 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
6754 if (!MachineNode || MachineNode->use_empty())
6755 continue;
6756 SDNode *ResNode = MachineNode;
6757
6758 bool Op1Set = false, Op1Unset = false,
6759 Op1Not = false,
6760 Op2Set = false, Op2Unset = false,
6761 Op2Not = false;
6762
6763 unsigned Opcode = MachineNode->getMachineOpcode();
6764 switch (Opcode) {
6765 default: break;
6766 case PPC::CRAND:
6767 case PPC::CRNAND:
6768 case PPC::CROR:
6769 case PPC::CRXOR:
6770 case PPC::CRNOR:
6771 case PPC::CREQV:
6772 case PPC::CRANDC:
6773 case PPC::CRORC: {
6774 SDValue Op = MachineNode->getOperand(1);
6775 if (Op.isMachineOpcode()) {
6776 if (Op.getMachineOpcode() == PPC::CRSET)
6777 Op2Set = true;
6778 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6779 Op2Unset = true;
6780 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6781 Op.getOperand(0) == Op.getOperand(1)) ||
6782 Op.getMachineOpcode() == PPC::CRNOT)
6783 Op2Not = true;
6784 }
6785 [[fallthrough]];
6786 }
6787 case PPC::BC:
6788 case PPC::BCn:
6789 case PPC::SELECT_I4:
6790 case PPC::SELECT_I8:
6791 case PPC::SELECT_F4:
6792 case PPC::SELECT_F8:
6793 case PPC::SELECT_SPE:
6794 case PPC::SELECT_SPE4:
6795 case PPC::SELECT_VRRC:
6796 case PPC::SELECT_VSFRC:
6797 case PPC::SELECT_VSSRC:
6798 case PPC::SELECT_VSRC: {
6799 SDValue Op = MachineNode->getOperand(0);
6800 if (Op.isMachineOpcode()) {
6801 if (Op.getMachineOpcode() == PPC::CRSET)
6802 Op1Set = true;
6803 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6804 Op1Unset = true;
6805 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6806 Op.getOperand(0) == Op.getOperand(1)) ||
6807 Op.getMachineOpcode() == PPC::CRNOT)
6808 Op1Not = true;
6809 }
6810 }
6811 break;
6812 }
6813
6814 bool SelectSwap = false;
6815 switch (Opcode) {
6816 default: break;
6817 case PPC::CRAND:
6818 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6819 // x & x = x
6820 ResNode = MachineNode->getOperand(0).getNode();
6821 else if (Op1Set)
6822 // 1 & y = y
6823 ResNode = MachineNode->getOperand(1).getNode();
6824 else if (Op2Set)
6825 // x & 1 = x
6826 ResNode = MachineNode->getOperand(0).getNode();
6827 else if (Op1Unset || Op2Unset)
6828 // x & 0 = 0 & y = 0
6829 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6830 MVT::i1);
6831 else if (Op1Not)
6832 // ~x & y = andc(y, x)
6833 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6834 MVT::i1, MachineNode->getOperand(1),
6835 MachineNode->getOperand(0).
6836 getOperand(0));
6837 else if (Op2Not)
6838 // x & ~y = andc(x, y)
6839 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6840 MVT::i1, MachineNode->getOperand(0),
6841 MachineNode->getOperand(1).
6842 getOperand(0));
6843 else if (AllUsersSelectZero(MachineNode)) {
6844 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6845 MVT::i1, MachineNode->getOperand(0),
6846 MachineNode->getOperand(1));
6847 SelectSwap = true;
6848 }
6849 break;
6850 case PPC::CRNAND:
6851 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6852 // nand(x, x) -> nor(x, x)
6853 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6854 MVT::i1, MachineNode->getOperand(0),
6855 MachineNode->getOperand(0));
6856 else if (Op1Set)
6857 // nand(1, y) -> nor(y, y)
6858 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6859 MVT::i1, MachineNode->getOperand(1),
6860 MachineNode->getOperand(1));
6861 else if (Op2Set)
6862 // nand(x, 1) -> nor(x, x)
6863 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6864 MVT::i1, MachineNode->getOperand(0),
6865 MachineNode->getOperand(0));
6866 else if (Op1Unset || Op2Unset)
6867 // nand(x, 0) = nand(0, y) = 1
6868 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6869 MVT::i1);
6870 else if (Op1Not)
6871 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6872 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6873 MVT::i1, MachineNode->getOperand(0).
6874 getOperand(0),
6875 MachineNode->getOperand(1));
6876 else if (Op2Not)
6877 // nand(x, ~y) = ~x | y = orc(y, x)
6878 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6879 MVT::i1, MachineNode->getOperand(1).
6880 getOperand(0),
6881 MachineNode->getOperand(0));
6882 else if (AllUsersSelectZero(MachineNode)) {
6883 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6884 MVT::i1, MachineNode->getOperand(0),
6885 MachineNode->getOperand(1));
6886 SelectSwap = true;
6887 }
6888 break;
6889 case PPC::CROR:
6890 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6891 // x | x = x
6892 ResNode = MachineNode->getOperand(0).getNode();
6893 else if (Op1Set || Op2Set)
6894 // x | 1 = 1 | y = 1
6895 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6896 MVT::i1);
6897 else if (Op1Unset)
6898 // 0 | y = y
6899 ResNode = MachineNode->getOperand(1).getNode();
6900 else if (Op2Unset)
6901 // x | 0 = x
6902 ResNode = MachineNode->getOperand(0).getNode();
6903 else if (Op1Not)
6904 // ~x | y = orc(y, x)
6905 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6906 MVT::i1, MachineNode->getOperand(1),
6907 MachineNode->getOperand(0).
6908 getOperand(0));
6909 else if (Op2Not)
6910 // x | ~y = orc(x, y)
6911 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6912 MVT::i1, MachineNode->getOperand(0),
6913 MachineNode->getOperand(1).
6914 getOperand(0));
6915 else if (AllUsersSelectZero(MachineNode)) {
6916 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6917 MVT::i1, MachineNode->getOperand(0),
6918 MachineNode->getOperand(1));
6919 SelectSwap = true;
6920 }
6921 break;
6922 case PPC::CRXOR:
6923 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6924 // xor(x, x) = 0
6925 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6926 MVT::i1);
6927 else if (Op1Set)
6928 // xor(1, y) -> nor(y, y)
6929 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6930 MVT::i1, MachineNode->getOperand(1),
6931 MachineNode->getOperand(1));
6932 else if (Op2Set)
6933 // xor(x, 1) -> nor(x, x)
6934 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6935 MVT::i1, MachineNode->getOperand(0),
6936 MachineNode->getOperand(0));
6937 else if (Op1Unset)
6938 // xor(0, y) = y
6939 ResNode = MachineNode->getOperand(1).getNode();
6940 else if (Op2Unset)
6941 // xor(x, 0) = x
6942 ResNode = MachineNode->getOperand(0).getNode();
6943 else if (Op1Not)
6944 // xor(~x, y) = eqv(x, y)
6945 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6946 MVT::i1, MachineNode->getOperand(0).
6947 getOperand(0),
6948 MachineNode->getOperand(1));
6949 else if (Op2Not)
6950 // xor(x, ~y) = eqv(x, y)
6951 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6952 MVT::i1, MachineNode->getOperand(0),
6953 MachineNode->getOperand(1).
6954 getOperand(0));
6955 else if (AllUsersSelectZero(MachineNode)) {
6956 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6957 MVT::i1, MachineNode->getOperand(0),
6958 MachineNode->getOperand(1));
6959 SelectSwap = true;
6960 }
6961 break;
6962 case PPC::CRNOR:
6963 if (Op1Set || Op2Set)
6964 // nor(1, y) -> 0
6965 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6966 MVT::i1);
6967 else if (Op1Unset)
6968 // nor(0, y) = ~y -> nor(y, y)
6969 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6970 MVT::i1, MachineNode->getOperand(1),
6971 MachineNode->getOperand(1));
6972 else if (Op2Unset)
6973 // nor(x, 0) = ~x
6974 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6975 MVT::i1, MachineNode->getOperand(0),
6976 MachineNode->getOperand(0));
6977 else if (Op1Not)
6978 // nor(~x, y) = andc(x, y)
6979 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6980 MVT::i1, MachineNode->getOperand(0).
6981 getOperand(0),
6982 MachineNode->getOperand(1));
6983 else if (Op2Not)
6984 // nor(x, ~y) = andc(y, x)
6985 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6986 MVT::i1, MachineNode->getOperand(1).
6987 getOperand(0),
6988 MachineNode->getOperand(0));
6989 else if (AllUsersSelectZero(MachineNode)) {
6990 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
6991 MVT::i1, MachineNode->getOperand(0),
6992 MachineNode->getOperand(1));
6993 SelectSwap = true;
6994 }
6995 break;
6996 case PPC::CREQV:
6997 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6998 // eqv(x, x) = 1
6999 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7000 MVT::i1);
7001 else if (Op1Set)
7002 // eqv(1, y) = y
7003 ResNode = MachineNode->getOperand(1).getNode();
7004 else if (Op2Set)
7005 // eqv(x, 1) = x
7006 ResNode = MachineNode->getOperand(0).getNode();
7007 else if (Op1Unset)
7008 // eqv(0, y) = ~y -> nor(y, y)
7009 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7010 MVT::i1, MachineNode->getOperand(1),
7011 MachineNode->getOperand(1));
7012 else if (Op2Unset)
7013 // eqv(x, 0) = ~x
7014 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7015 MVT::i1, MachineNode->getOperand(0),
7016 MachineNode->getOperand(0));
7017 else if (Op1Not)
7018 // eqv(~x, y) = xor(x, y)
7019 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7020 MVT::i1, MachineNode->getOperand(0).
7021 getOperand(0),
7022 MachineNode->getOperand(1));
7023 else if (Op2Not)
7024 // eqv(x, ~y) = xor(x, y)
7025 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7026 MVT::i1, MachineNode->getOperand(0),
7027 MachineNode->getOperand(1).
7028 getOperand(0));
7029 else if (AllUsersSelectZero(MachineNode)) {
7030 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7031 MVT::i1, MachineNode->getOperand(0),
7032 MachineNode->getOperand(1));
7033 SelectSwap = true;
7034 }
7035 break;
7036 case PPC::CRANDC:
7037 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7038 // andc(x, x) = 0
7039 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7040 MVT::i1);
7041 else if (Op1Set)
7042 // andc(1, y) = ~y
7043 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7044 MVT::i1, MachineNode->getOperand(1),
7045 MachineNode->getOperand(1));
7046 else if (Op1Unset || Op2Set)
7047 // andc(0, y) = andc(x, 1) = 0
7048 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7049 MVT::i1);
7050 else if (Op2Unset)
7051 // andc(x, 0) = x
7052 ResNode = MachineNode->getOperand(0).getNode();
7053 else if (Op1Not)
7054 // andc(~x, y) = ~(x | y) = nor(x, y)
7055 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7056 MVT::i1, MachineNode->getOperand(0).
7057 getOperand(0),
7058 MachineNode->getOperand(1));
7059 else if (Op2Not)
7060 // andc(x, ~y) = x & y
7061 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
7062 MVT::i1, MachineNode->getOperand(0),
7063 MachineNode->getOperand(1).
7064 getOperand(0));
7065 else if (AllUsersSelectZero(MachineNode)) {
7066 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
7067 MVT::i1, MachineNode->getOperand(1),
7068 MachineNode->getOperand(0));
7069 SelectSwap = true;
7070 }
7071 break;
7072 case PPC::CRORC:
7073 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7074 // orc(x, x) = 1
7075 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7076 MVT::i1);
7077 else if (Op1Set || Op2Unset)
7078 // orc(1, y) = orc(x, 0) = 1
7079 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7080 MVT::i1);
7081 else if (Op2Set)
7082 // orc(x, 1) = x
7083 ResNode = MachineNode->getOperand(0).getNode();
7084 else if (Op1Unset)
7085 // orc(0, y) = ~y
7086 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7087 MVT::i1, MachineNode->getOperand(1),
7088 MachineNode->getOperand(1));
7089 else if (Op1Not)
7090 // orc(~x, y) = ~(x & y) = nand(x, y)
7091 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
7092 MVT::i1, MachineNode->getOperand(0).
7093 getOperand(0),
7094 MachineNode->getOperand(1));
7095 else if (Op2Not)
7096 // orc(x, ~y) = x | y
7097 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7098 MVT::i1, MachineNode->getOperand(0),
7099 MachineNode->getOperand(1).
7100 getOperand(0));
7101 else if (AllUsersSelectZero(MachineNode)) {
7102 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7103 MVT::i1, MachineNode->getOperand(1),
7104 MachineNode->getOperand(0));
7105 SelectSwap = true;
7106 }
7107 break;
7108 case PPC::SELECT_I4:
7109 case PPC::SELECT_I8:
7110 case PPC::SELECT_F4:
7111 case PPC::SELECT_F8:
7112 case PPC::SELECT_SPE:
7113 case PPC::SELECT_SPE4:
7114 case PPC::SELECT_VRRC:
7115 case PPC::SELECT_VSFRC:
7116 case PPC::SELECT_VSSRC:
7117 case PPC::SELECT_VSRC:
7118 if (Op1Set)
7119 ResNode = MachineNode->getOperand(1).getNode();
7120 else if (Op1Unset)
7121 ResNode = MachineNode->getOperand(2).getNode();
7122 else if (Op1Not)
7123 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
7124 SDLoc(MachineNode),
7125 MachineNode->getValueType(0),
7126 MachineNode->getOperand(0).
7127 getOperand(0),
7128 MachineNode->getOperand(2),
7129 MachineNode->getOperand(1));
7130 break;
7131 case PPC::BC:
7132 case PPC::BCn:
7133 if (Op1Not)
7134 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
7135 PPC::BC,
7136 SDLoc(MachineNode),
7137 MVT::Other,
7138 MachineNode->getOperand(0).
7139 getOperand(0),
7140 MachineNode->getOperand(1),
7141 MachineNode->getOperand(2));
7142 // FIXME: Handle Op1Set, Op1Unset here too.
7143 break;
7144 }
7145
7146 // If we're inverting this node because it is used only by selects that
7147 // we'd like to swap, then swap the selects before the node replacement.
7148 if (SelectSwap)
7149 SwapAllSelectUsers(MachineNode);
7150
7151 if (ResNode != MachineNode) {
7152 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7153 LLVM_DEBUG(MachineNode->dump(CurDAG));
7154 LLVM_DEBUG(dbgs() << "\nNew: ");
7155 LLVM_DEBUG(ResNode->dump(CurDAG));
7156 LLVM_DEBUG(dbgs() << "\n");
7157
7158 ReplaceUses(MachineNode, ResNode);
7159 IsModified = true;
7160 }
7161 }
7162 if (IsModified)
7163 CurDAG->RemoveDeadNodes();
7164 } while (IsModified);
7165}
7166
7167// Gather the set of 32-bit operations that are known to have their
7168// higher-order 32 bits zero, where ToPromote contains all such operations.
7170 SmallPtrSetImpl<SDNode *> &ToPromote) {
7171 if (!Op32.isMachineOpcode())
7172 return false;
7173
7174 // First, check for the "frontier" instructions (those that will clear the
7175 // higher-order 32 bits.
7176
7177 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7178 // around. If it does not, then these instructions will clear the
7179 // higher-order bits.
7180 if ((Op32.getMachineOpcode() == PPC::RLWINM ||
7181 Op32.getMachineOpcode() == PPC::RLWNM) &&
7182 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
7183 ToPromote.insert(Op32.getNode());
7184 return true;
7185 }
7186
7187 // SLW and SRW always clear the higher-order bits.
7188 if (Op32.getMachineOpcode() == PPC::SLW ||
7189 Op32.getMachineOpcode() == PPC::SRW) {
7190 ToPromote.insert(Op32.getNode());
7191 return true;
7192 }
7193
7194 // For LI and LIS, we need the immediate to be positive (so that it is not
7195 // sign extended).
7196 if (Op32.getMachineOpcode() == PPC::LI ||
7197 Op32.getMachineOpcode() == PPC::LIS) {
7198 if (!isUInt<15>(Op32.getConstantOperandVal(0)))
7199 return false;
7200
7201 ToPromote.insert(Op32.getNode());
7202 return true;
7203 }
7204
7205 // LHBRX and LWBRX always clear the higher-order bits.
7206 if (Op32.getMachineOpcode() == PPC::LHBRX ||
7207 Op32.getMachineOpcode() == PPC::LWBRX) {
7208 ToPromote.insert(Op32.getNode());
7209 return true;
7210 }
7211
7212 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7213 if (Op32.getMachineOpcode() == PPC::CNTLZW ||
7214 Op32.getMachineOpcode() == PPC::CNTTZW) {
7215 ToPromote.insert(Op32.getNode());
7216 return true;
7217 }
7218
7219 // Next, check for those instructions we can look through.
7220
7221 // Assuming the mask does not wrap around, then the higher-order bits are
7222 // taken directly from the first operand.
7223 if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7224 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
7225 SmallPtrSet<SDNode *, 16> ToPromote1;
7226 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7227 return false;
7228
7229 ToPromote.insert(Op32.getNode());
7230 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7231 return true;
7232 }
7233
7234 // For OR, the higher-order bits are zero if that is true for both operands.
7235 // For SELECT_I4, the same is true (but the relevant operand numbers are
7236 // shifted by 1).
7237 if (Op32.getMachineOpcode() == PPC::OR ||
7238 Op32.getMachineOpcode() == PPC::SELECT_I4) {
7239 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
7240 SmallPtrSet<SDNode *, 16> ToPromote1;
7241 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
7242 return false;
7243 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
7244 return false;
7245
7246 ToPromote.insert(Op32.getNode());
7247 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7248 return true;
7249 }
7250
7251 // For ORI and ORIS, we need the higher-order bits of the first operand to be
7252 // zero, and also for the constant to be positive (so that it is not sign
7253 // extended).
7254 if (Op32.getMachineOpcode() == PPC::ORI ||
7255 Op32.getMachineOpcode() == PPC::ORIS) {
7256 SmallPtrSet<SDNode *, 16> ToPromote1;
7257 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7258 return false;
7259 if (!isUInt<15>(Op32.getConstantOperandVal(1)))
7260 return false;
7261
7262 ToPromote.insert(Op32.getNode());
7263 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7264 return true;
7265 }
7266
7267 // The higher-order bits of AND are zero if that is true for at least one of
7268 // the operands.
7269 if (Op32.getMachineOpcode() == PPC::AND) {
7270 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
7271 bool Op0OK =
7272 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7273 bool Op1OK =
7274 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
7275 if (!Op0OK && !Op1OK)
7276 return false;
7277
7278 ToPromote.insert(Op32.getNode());
7279
7280 if (Op0OK)
7281 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7282
7283 if (Op1OK)
7284 ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
7285
7286 return true;
7287 }
7288
7289 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
7290 // of the first operand, or if the second operand is positive (so that it is
7291 // not sign extended).
7292 if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
7293 Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7294 SmallPtrSet<SDNode *, 16> ToPromote1;
7295 bool Op0OK =
7296 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7297 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
7298 if (!Op0OK && !Op1OK)
7299 return false;
7300
7301 ToPromote.insert(Op32.getNode());
7302
7303 if (Op0OK)
7304 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7305
7306 return true;
7307 }
7308
7309 return false;
7310}
7311
7312void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7313 if (!Subtarget->isPPC64())
7314 return;
7315
7316 // When we zero-extend from i32 to i64, we use a pattern like this:
7317 // def : Pat<(i64 (zext i32:$in)),
7318 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7319 // 0, 32)>;
7320 // There are several 32-bit shift/rotate instructions, however, that will
7321 // clear the higher-order bits of their output, rendering the RLDICL
7322 // unnecessary. When that happens, we remove it here, and redefine the
7323 // relevant 32-bit operation to be a 64-bit operation.
7324
7325 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7326
7327 bool MadeChange = false;
7328 while (Position != CurDAG->allnodes_begin()) {
7329 SDNode *N = &*--Position;
7330 // Skip dead nodes and any non-machine opcodes.
7331 if (N->use_empty() || !N->isMachineOpcode())
7332 continue;
7333
7334 if (N->getMachineOpcode() != PPC::RLDICL)
7335 continue;
7336
7337 if (N->getConstantOperandVal(1) != 0 ||
7338 N->getConstantOperandVal(2) != 32)
7339 continue;
7340
7341 SDValue ISR = N->getOperand(0);
7342 if (!ISR.isMachineOpcode() ||
7343 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7344 continue;
7345
7346 if (!ISR.hasOneUse())
7347 continue;
7348
7349 if (ISR.getConstantOperandVal(2) != PPC::sub_32)
7350 continue;
7351
7352 SDValue IDef = ISR.getOperand(0);
7353 if (!IDef.isMachineOpcode() ||
7354 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7355 continue;
7356
7357 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
7358 // can get rid of it.
7359
7360 SDValue Op32 = ISR->getOperand(1);
7361 if (!Op32.isMachineOpcode())
7362 continue;
7363
7364 // There are some 32-bit instructions that always clear the high-order 32
7365 // bits, there are also some instructions (like AND) that we can look
7366 // through.
7367 SmallPtrSet<SDNode *, 16> ToPromote;
7368 if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7369 continue;
7370
7371 // If the ToPromote set contains nodes that have uses outside of the set
7372 // (except for the original INSERT_SUBREG), then abort the transformation.
7373 bool OutsideUse = false;
7374 for (SDNode *PN : ToPromote) {
7375 for (SDNode *UN : PN->uses()) {
7376 if (!ToPromote.count(UN) && UN != ISR.getNode()) {
7377 OutsideUse = true;
7378 break;
7379 }
7380 }
7381
7382 if (OutsideUse)
7383 break;
7384 }
7385 if (OutsideUse)
7386 continue;
7387
7388 MadeChange = true;
7389
7390 // We now know that this zero extension can be removed by promoting to
7391 // nodes in ToPromote to 64-bit operations, where for operations in the
7392 // frontier of the set, we need to insert INSERT_SUBREGs for their
7393 // operands.
7394 for (SDNode *PN : ToPromote) {
7395 unsigned NewOpcode;
7396 switch (PN->getMachineOpcode()) {
7397 default:
7398 llvm_unreachable("Don't know the 64-bit variant of this instruction");
7399 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7400 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7401 case PPC::SLW: NewOpcode = PPC::SLW8; break;
7402 case PPC::SRW: NewOpcode = PPC::SRW8; break;
7403 case PPC::LI: NewOpcode = PPC::LI8; break;
7404 case PPC::LIS: NewOpcode = PPC::LIS8; break;
7405 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7406 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7407 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7408 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7409 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7410 case PPC::OR: NewOpcode = PPC::OR8; break;
7411 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7412 case PPC::ORI: NewOpcode = PPC::ORI8; break;
7413 case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7414 case PPC::AND: NewOpcode = PPC::AND8; break;
7415 case PPC::ANDI_rec:
7416 NewOpcode = PPC::ANDI8_rec;
7417 break;
7418 case PPC::ANDIS_rec:
7419 NewOpcode = PPC::ANDIS8_rec;
7420 break;
7421 }
7422
7423 // Note: During the replacement process, the nodes will be in an
7424 // inconsistent state (some instructions will have operands with values
7425 // of the wrong type). Once done, however, everything should be right
7426 // again.
7427
7429 for (const SDValue &V : PN->ops()) {
7430 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
7431 !isa<ConstantSDNode>(V)) {
7432 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
7433 SDNode *ReplOp =
7434 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
7435 ISR.getNode()->getVTList(), ReplOpOps);
7436 Ops.push_back(SDValue(ReplOp, 0));
7437 } else {
7438 Ops.push_back(V);
7439 }
7440 }
7441
7442 // Because all to-be-promoted nodes only have users that are other
7443 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
7444 // the i32 result value type with i64.
7445
7446 SmallVector<EVT, 2> NewVTs;
7447 SDVTList VTs = PN->getVTList();
7448 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
7449 if (VTs.VTs[i] == MVT::i32)
7450 NewVTs.push_back(MVT::i64);
7451 else
7452 NewVTs.push_back(VTs.VTs[i]);
7453
7454 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7455 LLVM_DEBUG(PN->dump(CurDAG));
7456
7457 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
7458
7459 LLVM_DEBUG(dbgs() << "\nNew: ");
7460 LLVM_DEBUG(PN->dump(CurDAG));
7461 LLVM_DEBUG(dbgs() << "\n");
7462 }
7463
7464 // Now we replace the original zero extend and its associated INSERT_SUBREG
7465 // with the value feeding the INSERT_SUBREG (which has now been promoted to
7466 // return an i64).
7467
7468 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7469 LLVM_DEBUG(N->dump(CurDAG));
7470 LLVM_DEBUG(dbgs() << "\nNew: ");
7471 LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7472 LLVM_DEBUG(dbgs() << "\n");
7473
7474 ReplaceUses(N, Op32.getNode());
7475 }
7476
7477 if (MadeChange)
7478 CurDAG->RemoveDeadNodes();
7479}
7480
7481static bool isVSXSwap(SDValue N) {
7482 if (!N->isMachineOpcode())
7483 return false;
7484 unsigned Opc = N->getMachineOpcode();
7485
7486 // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7487 // operand is 2.
7488 if (Opc == PPC::XXPERMDIs) {
7489 return isa<ConstantSDNode>(N->getOperand(1)) &&
7490 N->getConstantOperandVal(1) == 2;
7491 } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
7492 return N->getOperand(0) == N->getOperand(1) &&
7493 isa<ConstantSDNode>(N->getOperand(2)) &&
7494 N->getConstantOperandVal(2) == 2;
7495 }
7496
7497 return false;
7498}
7499
7500// TODO: Make this complete and replace with a table-gen bit.
7502 if (!N->isMachineOpcode())
7503 return false;
7504 unsigned Opc = N->getMachineOpcode();
7505
7506 switch (Opc) {
7507 default:
7508 return false;
7509 case PPC::VAVGSB:
7510 case PPC::VAVGUB:
7511 case PPC::VAVGSH:
7512 case PPC::VAVGUH:
7513 case PPC::VAVGSW:
7514 case PPC::VAVGUW:
7515 case PPC::VMAXFP:
7516 case PPC::VMAXSB:
7517 case PPC::VMAXUB:
7518 case PPC::VMAXSH:
7519 case PPC::VMAXUH:
7520 case PPC::VMAXSW:
7521 case PPC::VMAXUW:
7522 case PPC::VMINFP:
7523 case PPC::VMINSB:
7524 case PPC::VMINUB:
7525 case PPC::VMINSH:
7526 case PPC::VMINUH:
7527 case PPC::VMINSW:
7528 case PPC::VMINUW:
7529 case PPC::VADDFP:
7530 case PPC::VADDUBM:
7531 case PPC::VADDUHM:
7532 case PPC::VADDUWM:
7533 case PPC::VSUBFP:
7534 case PPC::VSUBUBM:
7535 case PPC::VSUBUHM:
7536 case PPC::VSUBUWM:
7537 case PPC::VAND:
7538 case PPC::VANDC:
7539 case PPC::VOR:
7540 case PPC::VORC:
7541 case PPC::VXOR:
7542 case PPC::VNOR:
7543 case PPC::VMULUWM:
7544 return true;
7545 }
7546}
7547
7548// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7549// lane-insensitive.
7550static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
7551 // Our desired xxswap might be source of COPY_TO_REGCLASS.
7552 // TODO: Can we put this a common method for DAG?
7553 auto SkipRCCopy = [](SDValue V) {
7554 while (V->isMachineOpcode() &&
7555 V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7556 // All values in the chain should have single use.
7557 if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
7558 return SDValue();
7559 V = V->getOperand(0);
7560 }
7561 return V.hasOneUse() ? V : SDValue();
7562 };
7563
7564 SDValue VecOp = SkipRCCopy(N->getOperand(0));
7565 if (!VecOp || !isLaneInsensitive(VecOp))
7566 return;
7567
7568 SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
7569 RHS = SkipRCCopy(VecOp.getOperand(1));
7570 if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
7571 return;
7572
7573 // These swaps may still have chain-uses here, count on dead code elimination
7574 // in following passes to remove them.
7575 DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
7576 DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
7577 DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
7578}
7579
7580// Check if an SDValue has the 'aix-small-tls' global variable attribute.
7581static bool hasAIXSmallTLSAttr(SDValue Val) {
7582 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val))
7583 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()))
7584 if (GV->hasAttribute("aix-small-tls"))
7585 return true;
7586
7587 return false;
7588}
7589
7590// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
7592 SDValue ADDIToFold) {
7593 // Check if ADDIToFold (the ADDI that we want to fold into local-exec
7594 // accesses), is truly an ADDI.
7595 if (!ADDIToFold.isMachineOpcode() ||
7596 (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7597 return false;
7598
7599 // Folding is only allowed for the AIX small-local-exec TLS target attribute
7600 // or when the 'aix-small-tls' global variable attribute is present.
7601 const PPCSubtarget &Subtarget =
7603 SDValue TLSVarNode = ADDIToFold.getOperand(1);
7604 if (!(Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
7605 return false;
7606
7607 // The first operand of the ADDIToFold should be the thread pointer.
7608 // This transformation is only performed if the first operand of the
7609 // addi is the thread pointer.
7610 SDValue TPRegNode = ADDIToFold.getOperand(0);
7611 RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7612 if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7613 return false;
7614
7615 // The second operand of the ADDIToFold should be the global TLS address
7616 // (the local-exec TLS variable). We only perform the folding if the TLS
7617 // variable is the second operand.
7618 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7619 if (!GA)
7620 return false;
7621
7622 // The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
7623 // so this optimization is not performed otherwise if the flag is not set.
7624 unsigned TargetFlags = GA->getTargetFlags();
7625 if (TargetFlags != PPCII::MO_TPREL_FLAG)
7626 return false;
7627
7628 // If all conditions are satisfied, the ADDI is valid for folding.
7629 return true;
7630}
7631
7632// For non-TOC-based local-exec access where an addi is feeding into another
7633// addi, fold this sequence into a single addi if possible.
7634// Before this optimization, the sequence appears as:
7635// addi rN, r13, sym@le
7636// addi rM, rN, imm
7637// After this optimization, we can fold the two addi into a single one:
7638// addi rM, r13, sym@le + imm
7640 if (N->getMachineOpcode() != PPC::ADDI8)
7641 return;
7642
7643 // InitialADDI is the addi feeding into N (also an addi), and the addi that
7644 // we want optimized out.
7645 SDValue InitialADDI = N->getOperand(0);
7646
7647 if (!isEligibleToFoldADDIForLocalExecAccesses(DAG, InitialADDI))
7648 return;
7649
7650 // At this point, InitialADDI can be folded into a non-TOC-based local-exec
7651 // access. The first operand of InitialADDI should be the thread pointer,
7652 // which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7653 SDValue TPRegNode = InitialADDI.getOperand(0);
7654 [[maybe_unused]] RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7655 [[maybe_unused]] const PPCSubtarget &Subtarget =
7657 assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
7658 "Expecting the first operand to be a thread pointer for folding addi "
7659 "in local-exec accesses!");
7660
7661 // The second operand of the InitialADDI should be the global TLS address
7662 // (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
7663 // This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7664 SDValue TLSVarNode = InitialADDI.getOperand(1);
7665 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7666 assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7667 "local-exec accesses!");
7668 unsigned TargetFlags = GA->getTargetFlags();
7669
7670 // The second operand of the addi that we want to preserve will be an
7671 // immediate. We add this immediate, together with the address of the TLS
7672 // variable found in InitialADDI, in order to preserve the correct TLS address
7673 // information during assembly printing. The offset is likely to be non-zero
7674 // when we end up in this case.
7675 int Offset = N->getConstantOperandVal(1);
7676 TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7677 Offset, TargetFlags);
7678
7679 (void)DAG->UpdateNodeOperands(N, TPRegNode, TLSVarNode);
7680 if (InitialADDI.getNode()->use_empty())
7681 DAG->RemoveDeadNode(InitialADDI.getNode());
7682}
7683
7684void PPCDAGToDAGISel::PeepholePPC64() {
7685 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7686
7687 while (Position != CurDAG->allnodes_begin()) {
7688 SDNode *N = &*--Position;
7689 // Skip dead nodes and any non-machine opcodes.
7690 if (N->use_empty() || !N->isMachineOpcode())
7691 continue;
7692
7693 if (isVSXSwap(SDValue(N, 0)))
7694 reduceVSXSwap(N, CurDAG);
7695
7696 // This optimization is performed for non-TOC-based local-exec accesses.
7698
7699 unsigned FirstOp;
7700 unsigned StorageOpcode = N->getMachineOpcode();
7701 bool RequiresMod4Offset = false;
7702
7703 switch (StorageOpcode) {
7704 default: continue;
7705
7706 case PPC::LWA:
7707 case PPC::LD:
7708 case PPC::DFLOADf64:
7709 case PPC::DFLOADf32:
7710 RequiresMod4Offset = true;
7711 [[fallthrough]];
7712 case PPC::LBZ:
7713 case PPC::LBZ8:
7714 case PPC::LFD:
7715 case PPC::LFS:
7716 case PPC::LHA:
7717 case PPC::LHA8:
7718 case PPC::LHZ:
7719 case PPC::LHZ8:
7720 case PPC::LWZ:
7721 case PPC::LWZ8:
7722 FirstOp = 0;
7723 break;
7724
7725 case PPC::STD:
7726 case PPC::DFSTOREf64:
7727 case PPC::DFSTOREf32:
7728 RequiresMod4Offset = true;
7729 [[fallthrough]];
7730 case PPC::STB:
7731 case PPC::STB8:
7732 case PPC::STFD:
7733 case PPC::STFS:
7734 case PPC::STH:
7735 case PPC::STH8:
7736 case PPC::STW:
7737 case PPC::STW8:
7738 FirstOp = 1;
7739 break;
7740 }
7741
7742 // If this is a load or store with a zero offset, or within the alignment,
7743 // we may be able to fold an add-immediate into the memory operation.
7744 // The check against alignment is below, as it can't occur until we check
7745 // the arguments to N
7746 if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
7747 continue;
7748
7749 SDValue Base = N->getOperand(FirstOp + 1);
7750 if (!Base.isMachineOpcode())
7751 continue;
7752
7753 unsigned Flags = 0;
7754 bool ReplaceFlags = true;
7755
7756 // When the feeding operation is an add-immediate of some sort,
7757 // determine whether we need to add relocation information to the
7758 // target flags on the immediate operand when we fold it into the
7759 // load instruction.
7760 //
7761 // For something like ADDItocL8, the relocation information is
7762 // inferred from the opcode; when we process it in the AsmPrinter,
7763 // we add the necessary relocation there. A load, though, can receive
7764 // relocation from various flavors of ADDIxxx, so we need to carry
7765 // the relocation information in the target flags.
7766 switch (Base.getMachineOpcode()) {
7767 default: continue;
7768
7769 case PPC::ADDI8:
7770 case PPC::ADDI:
7771 // In some cases (such as TLS) the relocation information
7772 // is already in place on the operand, so copying the operand
7773 // is sufficient.
7774 ReplaceFlags = false;
7775 break;
7776 case PPC::ADDIdtprelL:
7778 break;
7779 case PPC::ADDItlsldL:
7781 break;
7782 case PPC::ADDItocL8:
7784 break;
7785 }
7786
7787 SDValue ImmOpnd = Base.getOperand(1);
7788
7789 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7790 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7791 // we might have needed different @ha relocation values for the offset
7792 // pointers).
7793 int MaxDisplacement = 7;
7794 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7795 const GlobalValue *GV = GA->getGlobal();
7796 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7797 MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
7798 }
7799
7800 bool UpdateHBase = false;
7801 SDValue HBase = Base.getOperand(0);
7802
7803 int Offset = N->getConstantOperandVal(FirstOp);
7804 if (ReplaceFlags) {
7805 if (Offset < 0 || Offset > MaxDisplacement) {
7806 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7807 // one use, then we can do this for any offset, we just need to also
7808 // update the offset (i.e. the symbol addend) on the addis also.
7809 if (Base.getMachineOpcode() != PPC::ADDItocL8)
7810 continue;
7811
7812 if (!HBase.isMachineOpcode() ||
7813 HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7814 continue;
7815
7816 if (!Base.hasOneUse() || !HBase.hasOneUse())
7817 continue;
7818
7819 SDValue HImmOpnd = HBase.getOperand(1);
7820 if (HImmOpnd != ImmOpnd)
7821 continue;
7822
7823 UpdateHBase = true;
7824 }
7825 } else {
7826 // Global addresses can be folded, but only if they are sufficiently
7827 // aligned.
7828 if (RequiresMod4Offset) {
7829 if (GlobalAddressSDNode *GA =
7830 dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7831 const GlobalValue *GV = GA->getGlobal();
7832 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7833 if (Alignment < 4)
7834 continue;
7835 }
7836 }
7837
7838 // If we're directly folding the addend from an addi instruction, then:
7839 // 1. In general, the offset on the memory access must be zero.
7840 // 2. If the addend is a constant, then it can be combined with a
7841 // non-zero offset, but only if the result meets the encoding
7842 // requirements.
7843 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
7844 Offset += C->getSExtValue();
7845
7846 if (RequiresMod4Offset && (Offset % 4) != 0)
7847 continue;
7848
7849 if (!isInt<16>(Offset))
7850 continue;
7851
7852 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
7853 ImmOpnd.getValueType());
7854 } else if (Offset != 0) {
7855 // This optimization is performed for non-TOC-based local-exec accesses.
7857 // Add the non-zero offset information into the load or store
7858 // instruction to be used for non-TOC-based local-exec accesses.
7859 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
7860 assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7861 "addi into local-exec accesses!");
7862 ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7863 MVT::i64, Offset,
7864 GA->getTargetFlags());
7865 } else
7866 continue;
7867 }
7868 }
7869
7870 // We found an opportunity. Reverse the operands from the add
7871 // immediate and substitute them into the load or store. If
7872 // needed, update the target flags for the immediate operand to
7873 // reflect the necessary relocation information.
7874 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7875 LLVM_DEBUG(Base->dump(CurDAG));
7876 LLVM_DEBUG(dbgs() << "\nN: ");
7877 LLVM_DEBUG(N->dump(CurDAG));
7878 LLVM_DEBUG(dbgs() << "\n");
7879
7880 // If the relocation information isn't already present on the
7881 // immediate operand, add it now.
7882 if (ReplaceFlags) {
7883 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7884 SDLoc dl(GA);
7885 const GlobalValue *GV = GA->getGlobal();
7886 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7887 // We can't perform this optimization for data whose alignment
7888 // is insufficient for the instruction encoding.
7889 if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
7890 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7891 continue;
7892 }
7893 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
7894 } else if (ConstantPoolSDNode *CP =
7895 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
7896 const Constant *C = CP->getConstVal();
7897 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
7898 Offset, Flags);
7899 }
7900 }
7901
7902 if (FirstOp == 1) // Store
7903 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7904 Base.getOperand(0), N->getOperand(3));
7905 else // Load
7906 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7907 N->getOperand(2));
7908
7909 if (UpdateHBase)
7910 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
7911 ImmOpnd);
7912
7913 // The add-immediate may now be dead, in which case remove it.
7914 if (Base.getNode()->use_empty())
7915 CurDAG->RemoveDeadNode(Base.getNode());
7916 }
7917}
7918
7919/// createPPCISelDag - This pass converts a legalized DAG into a
7920/// PowerPC-specific DAG, ready for instruction scheduling.
7921///
7923 CodeGenOptLevel OptLevel) {
7924 return new PPCDAGToDAGISel(TM, OptLevel);
7925}
unsigned SubReg
MachineBasicBlock MachineBasicBlock::iterator MBBI
amdgpu AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:530
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base)
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
static bool hasTocDataAttr(SDValue Val)
static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG)
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode * > &ToPromote)
static bool isLaneInsensitive(SDValue N)
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N)
static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget, const TargetMachine &TM, const SDNode *Node)
static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, const PPCSubtarget *Subtarget)
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG, SDValue ADDIToFold)
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG)
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num)
static unsigned getBranchHint(unsigned PCC, const FunctionLoweringInfo &FuncInfo, const SDValue &DestMBB)
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp)
static cl::opt< bool > EnableTLSOpt("ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden)
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
static cl::opt< ICmpInGPRType > CmpInGPR("ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result.")))
static SDNode * selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
#define PASS_NAME
#define DEBUG_TYPE
static SDNode * selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static bool hasAIXSmallTLSAttr(SDValue Val)
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden)
static bool isSWTestOp(SDValue N)
static SDNode * selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned *InstCnt=nullptr)
ICmpInGPRType
@ ICGPR_ZextI32
@ ICGPR_I64
@ ICGPR_All
@ ICGPR_None
@ ICGPR_NonExtIn
@ ICGPR_Sext
@ ICGPR_I32
@ ICGPR_SextI64
@ ICGPR_ZextI64
@ ICGPR_SextI32
@ ICGPR_Zext
static bool isVSXSwap(SDValue N)
Module * Mod
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition: APInt.cpp:1124
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
MachineBasicBlock * getBasicBlock() const
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
BranchProbabilityInfo * BPI
MachineBasicBlock * MBB
MBB - The current block.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists.
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition: Module.cpp:595
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setROPProtectionHashSaveIndex(int Idx)
static int getRecordFormOpcode(unsigned Opcode)
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:146
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
MCRegister getThreadPointerRegister() const
Definition: PPCSubtarget.h:283
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetELF() const
Definition: PPCSubtarget.h:210
CodeModel::Model getCodeModel(const TargetMachine &TM, const GlobalValue *GV) const
Calculates the effective code model for argument GV.
bool isELFv2ABI() const
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
Common code between 32-bit and 64-bit PowerPC targets.
Register getReg() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
void dump() const
Dump this node, for debugging.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual void PostprocessISelDAG()
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:477
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:530
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:531
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:471
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:472
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:772
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:542
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:468
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:553
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:533
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
iterator end() const
Definition: SmallPtrSet.h:385
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
iterator begin() const
Definition: SmallPtrSet.h:380
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5239
An efficient, type-erasing, non-owning reference to a callable.
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1052
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1415
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1556
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1562
@ MO_TLSLD_LO
Definition: PPC.h:184
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_DTPREL_LO
These values identify relocations on immediates folded into memory operations.
Definition: PPC.h:183
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TOC_LO
Definition: PPC.h:185
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ SRL
These nodes represent PPC shifts.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ CALL
CALL - A direct function call.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ FTSQRT
Test instruction for software square root.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ BR_NONTAKEN_HINT
Definition: PPCPredicates.h:64
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:136
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:246
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:141
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOptLevel OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
unsigned int NumVTs