LLVM 23.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
16#include "PPC.h"
17#include "PPCISelLowering.h"
19#include "PPCSelectionDAGInfo.h"
20#include "PPCSubtarget.h"
21#include "PPCTargetMachine.h"
22#include "llvm/ADT/APInt.h"
23#include "llvm/ADT/APSInt.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/STLExtras.h"
28#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/DebugLoc.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/InlineAsm.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/IntrinsicsPowerPC.h"
51#include "llvm/IR/Module.h"
56#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <new>
68#include <tuple>
69#include <utility>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "ppc-isel"
74#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
75
76STATISTIC(NumSextSetcc,
77 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
78STATISTIC(NumZextSetcc,
79 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
80STATISTIC(SignExtensionsAdded,
81 "Number of sign extensions for compare inputs added.");
82STATISTIC(ZeroExtensionsAdded,
83 "Number of zero extensions for compare inputs added.");
84STATISTIC(NumLogicOpsOnComparison,
85 "Number of logical ops on i1 values calculated in GPR.");
86STATISTIC(OmittedForNonExtendUses,
87 "Number of compares not eliminated as they have non-extending uses.");
88STATISTIC(NumP9Setb,
89 "Number of compares lowered to setb.");
90
91// FIXME: Remove this once the bug has been fixed!
92cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
93cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
94
95static cl::opt<bool>
96 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
97 cl::desc("use aggressive ppc isel for bit permutations"),
100 "ppc-bit-perm-rewriter-stress-rotates",
101 cl::desc("stress rotate selection in aggressive ppc isel for "
102 "bit permutations"),
103 cl::Hidden);
104
106 "ppc-use-branch-hint", cl::init(true),
107 cl::desc("Enable static hinting of branches on ppc"),
108 cl::Hidden);
109
111 "ppc-tls-opt", cl::init(true),
112 cl::desc("Enable tls optimization peephole"),
113 cl::Hidden);
114
118
120 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
121 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
122 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
123 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
124 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
125 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
126 clEnumValN(ICGPR_NonExtIn, "nonextin",
127 "Only comparisons where inputs don't need [sz]ext."),
128 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
129 clEnumValN(ICGPR_ZextI32, "zexti32",
130 "Only i32 comparisons with zext result."),
131 clEnumValN(ICGPR_ZextI64, "zexti64",
132 "Only i64 comparisons with zext result."),
133 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
134 clEnumValN(ICGPR_SextI32, "sexti32",
135 "Only i32 comparisons with sext result."),
136 clEnumValN(ICGPR_SextI64, "sexti64",
137 "Only i64 comparisons with sext result.")));
138namespace {
139
140 //===--------------------------------------------------------------------===//
141 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
142 /// instructions for SelectionDAG operations.
143 ///
144 class PPCDAGToDAGISel : public SelectionDAGISel {
145 const PPCTargetMachine &TM;
146 const PPCSubtarget *Subtarget = nullptr;
147 const PPCTargetLowering *PPCLowering = nullptr;
148 unsigned GlobalBaseReg = 0;
149
150 public:
151 PPCDAGToDAGISel() = delete;
152
153 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
154 : SelectionDAGISel(tm, OptLevel), TM(tm) {}
155
156 bool runOnMachineFunction(MachineFunction &MF) override {
157 // Make sure we re-emit a set of the global base reg if necessary
158 GlobalBaseReg = 0;
159 Subtarget = &MF.getSubtarget<PPCSubtarget>();
160 PPCLowering = Subtarget->getTargetLowering();
161 if (Subtarget->hasROPProtect()) {
162 // Create a place on the stack for the ROP Protection Hash.
163 // The ROP Protection Hash will always be 8 bytes and aligned to 8
164 // bytes.
165 MachineFrameInfo &MFI = MF.getFrameInfo();
166 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
167 const int Result = MFI.CreateStackObject(8, Align(8), false);
169 }
171
172 return true;
173 }
174
175 void PreprocessISelDAG() override;
176 void PostprocessISelDAG() override;
177
178 /// getI16Imm - Return a target constant with the specified value, of type
179 /// i16.
180 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
181 return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
182 }
183
184 /// getI32Imm - Return a target constant with the specified value, of type
185 /// i32.
186 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
187 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
188 }
189
190 /// getI64Imm - Return a target constant with the specified value, of type
191 /// i64.
192 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
193 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
194 }
195
196 /// getSmallIPtrImm - Return a target constant of pointer type.
197 inline SDValue getSmallIPtrImm(int64_t Imm, const SDLoc &dl) {
198 return CurDAG->getSignedTargetConstant(
199 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
200 }
201
202 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
203 /// rotate and mask opcode and mask operation.
204 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
205 unsigned &SH, unsigned &MB, unsigned &ME);
206
207 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
208 /// base register. Return the virtual register that holds this value.
209 SDNode *getGlobalBaseReg();
210
211 void selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset = 0);
212
213 // Select - Convert the specified operand from a target-independent to a
214 // target-specific node if it hasn't already been changed.
215 void Select(SDNode *N) override;
216
217 bool tryBitfieldInsert(SDNode *N);
218 bool tryBitPermutation(SDNode *N);
219 bool tryIntCompareInGPR(SDNode *N);
220
221 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
222 // an X-Form load instruction with the offset being a relocation coming from
223 // the PPCISD::ADD_TLS.
224 bool tryTLSXFormLoad(LoadSDNode *N);
225 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
226 // an X-Form store instruction with the offset being a relocation coming from
227 // the PPCISD::ADD_TLS.
228 bool tryTLSXFormStore(StoreSDNode *N);
229 /// SelectCC - Select a comparison of the specified values with the
230 /// specified condition code, returning the CR# of the expression.
232 const SDLoc &dl, SDValue Chain = SDValue());
233
234 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
235 /// immediate field. Note that the operand at this point is already the
236 /// result of a prior SelectAddressRegImm call.
237 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
238 if (N.getOpcode() == ISD::TargetConstant ||
239 N.getOpcode() == ISD::TargetGlobalAddress) {
240 Out = N;
241 return true;
242 }
243
244 return false;
245 }
246
247 /// SelectDSForm - Returns true if address N can be represented by the
248 /// addressing mode of DSForm instructions (a base register, plus a signed
249 /// 16-bit displacement that is a multiple of 4.
250 bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
251 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
252 Align(4)) == PPC::AM_DSForm;
253 }
254
255 /// SelectDQForm - Returns true if address N can be represented by the
256 /// addressing mode of DQForm instructions (a base register, plus a signed
257 /// 16-bit displacement that is a multiple of 16.
258 bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
259 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
260 Align(16)) == PPC::AM_DQForm;
261 }
262
263 /// SelectDForm - Returns true if address N can be represented by
264 /// the addressing mode of DForm instructions (a base register, plus a
265 /// signed 16-bit immediate.
266 bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
267 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
268 std::nullopt) == PPC::AM_DForm;
269 }
270
271 /// SelectPCRelForm - Returns true if address N can be represented by
272 /// PC-Relative addressing mode.
273 bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
274 SDValue &Base) {
275 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
276 std::nullopt) == PPC::AM_PCRel;
277 }
278
279 /// SelectPDForm - Returns true if address N can be represented by Prefixed
280 /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
281 bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
282 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
283 std::nullopt) ==
285 }
286
287 /// SelectXForm - Returns true if address N can be represented by the
288 /// addressing mode of XForm instructions (an indexed [r+r] operation).
289 bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
290 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
291 std::nullopt) == PPC::AM_XForm;
292 }
293
294 /// SelectForceXForm - Given the specified address, force it to be
295 /// represented as an indexed [r+r] operation (an XForm instruction).
296 bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
297 SDValue &Base) {
298 return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
300 }
301
302 /// SelectAddrIdx - Given the specified address, check to see if it can be
303 /// represented as an indexed [r+r] operation.
304 /// This is for xform instructions whose associated displacement form is D.
305 /// The last parameter \p 0 means associated D form has no requirment for 16
306 /// bit signed displacement.
307 /// Returns false if it can be represented by [r+imm], which are preferred.
308 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
309 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
310 std::nullopt);
311 }
312
313 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
314 /// represented as an indexed [r+r] operation.
315 /// This is for xform instructions whose associated displacement form is DS.
316 /// The last parameter \p 4 means associated DS form 16 bit signed
317 /// displacement must be a multiple of 4.
318 /// Returns false if it can be represented by [r+imm], which are preferred.
319 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
320 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
321 Align(4));
322 }
323
324 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
325 /// represented as an indexed [r+r] operation.
326 /// This is for xform instructions whose associated displacement form is DQ.
327 /// The last parameter \p 16 means associated DQ form 16 bit signed
328 /// displacement must be a multiple of 16.
329 /// Returns false if it can be represented by [r+imm], which are preferred.
330 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
331 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
332 Align(16));
333 }
334
335 /// SelectAddrIdxOnly - Given the specified address, force it to be
336 /// represented as an indexed [r+r] operation.
337 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
338 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
339 }
340
341 /// SelectAddrImm - Returns true if the address N can be represented by
342 /// a base register plus a signed 16-bit displacement [r+imm].
343 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
344 /// displacement.
345 bool SelectAddrImm(SDValue N, SDValue &Disp,
346 SDValue &Base) {
347 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
348 std::nullopt);
349 }
350
351 /// SelectAddrImmX4 - Returns true if the address N can be represented by
352 /// a base register plus a signed 16-bit displacement that is a multiple of
353 /// 4 (last parameter). Suitable for use by STD and friends.
354 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
355 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
356 }
357
358 /// SelectAddrImmX16 - Returns true if the address N can be represented by
359 /// a base register plus a signed 16-bit displacement that is a multiple of
360 /// 16(last parameter). Suitable for use by STXV and friends.
361 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
362 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
363 Align(16));
364 }
365
366 /// SelectAddrImmX34 - Returns true if the address N can be represented by
367 /// a base register plus a signed 34-bit displacement. Suitable for use by
368 /// PSTXVP and friends.
369 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
370 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
371 }
372
373 // Select an address into a single register.
374 bool SelectAddr(SDValue N, SDValue &Base) {
375 Base = N;
376 return true;
377 }
378
379 bool SelectAddrPCRel(SDValue N, SDValue &Base) {
380 return PPCLowering->SelectAddressPCRel(N, Base);
381 }
382
383 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
384 /// inline asm expressions. It is always correct to compute the value into
385 /// a register. The case of adding a (possibly relocatable) constant to a
386 /// register can be improved, but it is wrong to substitute Reg+Reg for
387 /// Reg in an asm, because the load or store opcode would have to change.
388 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
389 InlineAsm::ConstraintCode ConstraintID,
390 std::vector<SDValue> &OutOps) override {
391 switch(ConstraintID) {
392 default:
393 errs() << "ConstraintID: "
394 << InlineAsm::getMemConstraintName(ConstraintID) << "\n";
395 llvm_unreachable("Unexpected asm memory constraint");
396 case InlineAsm::ConstraintCode::es:
397 case InlineAsm::ConstraintCode::m:
398 case InlineAsm::ConstraintCode::o:
399 case InlineAsm::ConstraintCode::Q:
400 case InlineAsm::ConstraintCode::Z:
401 case InlineAsm::ConstraintCode::Zy:
402 // We need to make sure that this one operand does not end up in r0
403 // (because we might end up lowering this as 0(%op)).
404 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
405 const TargetRegisterClass *TRC = TRI->getPointerRegClass(/*Kind=*/1);
406 SDLoc dl(Op);
407 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
408 SDValue NewOp =
409 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
410 dl, Op.getValueType(),
411 Op, RC), 0);
412
413 OutOps.push_back(NewOp);
414 return false;
415 }
416 return true;
417 }
418
419// Include the pieces autogenerated from the target description.
420#include "PPCGenDAGISel.inc"
421
422private:
423 bool trySETCC(SDNode *N);
424 bool tryFoldSWTestBRCC(SDNode *N);
425 bool trySelectLoopCountIntrinsic(SDNode *N);
426 bool tryAsSingleRLDICL(SDNode *N);
427 bool tryAsSingleRLDCL(SDNode *N);
428 bool tryAsSingleRLDICR(SDNode *N);
429 bool tryAsSingleRLWINM(SDNode *N);
430 bool tryAsSingleRLWINM8(SDNode *N);
431 bool tryAsSingleRLWIMI(SDNode *N);
432 bool tryAsPairOfRLDICL(SDNode *N);
433 bool tryAsSingleRLDIMI(SDNode *N);
434
435 void PeepholePPC64();
436 void PeepholePPC64ZExt();
437 void PeepholeCROps();
438
439 SDValue combineToCMPB(SDNode *N);
440 void foldBoolExts(SDValue &Res, SDNode *&N);
441
442 bool AllUsersSelectZero(SDNode *N);
443 void SwapAllSelectUsers(SDNode *N);
444
445 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
446 void transferMemOperands(SDNode *N, SDNode *Result);
447 };
448
449 class PPCDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
450 public:
451 static char ID;
452 explicit PPCDAGToDAGISelLegacy(PPCTargetMachine &tm,
453 CodeGenOptLevel OptLevel)
454 : SelectionDAGISelLegacy(
455 ID, std::make_unique<PPCDAGToDAGISel>(tm, OptLevel)) {}
456 };
457} // end anonymous namespace
458
459char PPCDAGToDAGISelLegacy::ID = 0;
460
461INITIALIZE_PASS(PPCDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
462
463/// getGlobalBaseReg - Output the instructions required to put the
464/// base address to use for accessing globals into a register.
465///
466SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
467 if (!GlobalBaseReg) {
468 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
469 // Insert the set of GlobalBaseReg into the first MBB of the function
470 MachineBasicBlock &FirstMBB = MF->front();
472 const Module *M = MF->getFunction().getParent();
473 DebugLoc dl;
474
475 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
476 if (Subtarget->isTargetELF()) {
477 GlobalBaseReg = PPC::R30;
478 if (!Subtarget->isSecurePlt() &&
479 M->getPICLevel() == PICLevel::SmallPIC) {
480 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
481 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
482 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
483 } else {
484 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
485 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
486 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
487 BuildMI(FirstMBB, MBBI, dl,
488 TII.get(PPC::UpdateGBR), GlobalBaseReg)
489 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
490 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
491 }
492 } else {
494 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
495 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
496 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
497 }
498 } else {
499 // We must ensure that this sequence is dominated by the prologue.
500 // FIXME: This is a bit of a big hammer since we don't get the benefits
501 // of shrink-wrapping whenever we emit this instruction. Considering
502 // this is used in any function where we emit a jump table, this may be
503 // a significant limitation. We should consider inserting this in the
504 // block where it is used and then commoning this sequence up if it
505 // appears in multiple places.
506 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
507 // MovePCtoLR8.
508 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
509 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
510 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
511 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
512 }
513 }
514 return CurDAG->getRegister(GlobalBaseReg,
515 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
516 .getNode();
517}
518
519// Check if a SDValue has the toc-data attribute.
520static bool hasTocDataAttr(SDValue Val) {
522 if (!GA)
523 return false;
524
526 if (!GV)
527 return false;
528
529 if (!GV->hasAttribute("toc-data"))
530 return false;
531 return true;
532}
533
535 const TargetMachine &TM,
536 const SDNode *Node) {
537 // If there isn't an attribute to override the module code model
538 // this will be the effective code model.
539 CodeModel::Model ModuleModel = TM.getCodeModel();
540
542 if (!GA)
543 return ModuleModel;
544
545 const GlobalValue *GV = GA->getGlobal();
546 if (!GV)
547 return ModuleModel;
548
549 return Subtarget.getCodeModel(TM, GV);
550}
551
552/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
553/// operand. If so Imm will receive the 32-bit value.
554static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
555 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
556 Imm = N->getAsZExtVal();
557 return true;
558 }
559 return false;
560}
561
562/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
563/// operand. If so Imm will receive the 64-bit value.
564static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
565 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
566 Imm = N->getAsZExtVal();
567 return true;
568 }
569 return false;
570}
571
572// isInt32Immediate - This method tests to see if a constant operand.
573// If so Imm will receive the 32 bit value.
574static bool isInt32Immediate(SDValue N, unsigned &Imm) {
575 return isInt32Immediate(N.getNode(), Imm);
576}
577
578/// isInt64Immediate - This method tests to see if the value is a 64-bit
579/// constant operand. If so Imm will receive the 64-bit value.
580static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
581 return isInt64Immediate(N.getNode(), Imm);
582}
583
584static unsigned getBranchHint(unsigned PCC,
585 const FunctionLoweringInfo &FuncInfo,
586 const SDValue &DestMBB) {
588
589 if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
590
591 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
592 const Instruction *BBTerm = BB->getTerminator();
593
594 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
595
596 const BasicBlock *TBB = BBTerm->getSuccessor(0);
597 const BasicBlock *FBB = BBTerm->getSuccessor(1);
598
599 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
600 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
601
602 // We only want to handle cases which are easy to predict at static time, e.g.
603 // C++ throw statement, that is very likely not taken, or calling never
604 // returned function, e.g. stdlib exit(). So we set Threshold to filter
605 // unwanted cases.
606 //
607 // Below is LLVM branch weight table, we only want to handle case 1, 2
608 //
609 // Case Taken:Nontaken Example
610 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
611 // 2. Invoke-terminating 1:1048575
612 // 3. Coldblock 4:64 __builtin_expect
613 // 4. Loop Branch 124:4 For loop
614 // 5. PH/ZH/FPH 20:12
615 const uint32_t Threshold = 10000;
616
617 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
618 return PPC::BR_NO_HINT;
619
620 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
621 << "::" << BB->getName() << "'\n"
622 << " -> " << TBB->getName() << ": " << TProb << "\n"
623 << " -> " << FBB->getName() << ": " << FProb << "\n");
624
625 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
626
627 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
628 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
629 if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
630 std::swap(TProb, FProb);
631
632 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
633}
634
635// isOpcWithIntImmediate - This method tests to see if the node is a specific
636// opcode and that it has a immediate integer right operand.
637// If so Imm will receive the 32 bit value.
638static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
639 return N->getOpcode() == Opc
640 && isInt32Immediate(N->getOperand(1).getNode(), Imm);
641}
642
643void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset) {
644 SDLoc dl(SN);
645 int FI = cast<FrameIndexSDNode>(N)->getIndex();
646 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
647 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
648 if (SN->hasOneUse())
649 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
650 getSmallIPtrImm(Offset, dl));
651 else
652 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
653 getSmallIPtrImm(Offset, dl)));
654}
655
656bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
657 bool isShiftMask, unsigned &SH,
658 unsigned &MB, unsigned &ME) {
659 // Don't even go down this path for i64, since different logic will be
660 // necessary for rldicl/rldicr/rldimi.
661 if (N->getValueType(0) != MVT::i32)
662 return false;
663
664 unsigned Shift = 32;
665 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
666 unsigned Opcode = N->getOpcode();
667 if (N->getNumOperands() != 2 ||
668 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
669 return false;
670
671 if (Opcode == ISD::SHL) {
672 // apply shift left to mask if it comes first
673 if (isShiftMask) Mask = Mask << Shift;
674 // determine which bits are made indeterminant by shift
675 Indeterminant = ~(0xFFFFFFFFu << Shift);
676 } else if (Opcode == ISD::SRL) {
677 // apply shift right to mask if it comes first
678 if (isShiftMask) Mask = Mask >> Shift;
679 // determine which bits are made indeterminant by shift
680 Indeterminant = ~(0xFFFFFFFFu >> Shift);
681 // adjust for the left rotate
682 Shift = 32 - Shift;
683 } else if (Opcode == ISD::ROTL) {
684 Indeterminant = 0;
685 } else {
686 return false;
687 }
688
689 // if the mask doesn't intersect any Indeterminant bits
690 if (Mask && !(Mask & Indeterminant)) {
691 SH = Shift & 31;
692 // make sure the mask is still a mask (wrap arounds may not be)
693 return isRunOfOnes(Mask, MB, ME);
694 }
695 return false;
696}
697
698// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
699// instruction use the thread pointer.
701 assert(
702 Base.getOpcode() == PPCISD::ADD_TLS &&
703 "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
704 const PPCSubtarget &Subtarget =
706 SDValue ADDTLSOp1 = Base.getOperand(0);
707 unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
708
709 // Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
710 //
711 // Although ADD_TLS does not explicitly use the thread pointer
712 // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
713 // instruction will have a relocation specifier, @got@tprel, that is used to
714 // generate a GOT entry. The linker replaces this entry with an offset for a
715 // for a thread local variable, which will be relative to the thread pointer.
716 if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
717 return true;
718 // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
719 // node is produced instead to represent the aforementioned situation.
720 LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
721 if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
722 return true;
723
724 // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
725 // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
726 // later returning it into R3.
727 if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
728 return true;
729
730 // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
731 RegisterSDNode *AddFirstOpReg =
732 dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
733 if (AddFirstOpReg &&
734 AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
735 return true;
736
737 return false;
738}
739
740// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
741// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
742// operation, can be optimized to use an X-Form load or store, allowing the
743// ADD_TLS node to be removed completely.
745
746 // Do not do this transformation at -O0.
748 return false;
749
750 // In order to perform this optimization inside tryTLSXForm[Load|Store],
751 // Base is expected to be an ADD_TLS node.
752 if (Base.getOpcode() != PPCISD::ADD_TLS)
753 return false;
754 for (auto *ADDTLSUse : Base.getNode()->users()) {
755 // The optimization to convert the D-Form load/store into its X-Form
756 // counterpart should only occur if the source value offset of the load/
757 // store is 0. This also means that The offset should always be undefined.
758 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
759 if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
760 return false;
761 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
762 if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
763 return false;
764 } else // Don't optimize if there are ADD_TLS users that aren't load/stores.
765 return false;
766 }
767
768 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
769 return false;
770
771 // Does the ADD_TLS node of the load/store use the thread pointer?
772 // If the thread pointer is not used as one of the operands of ADD_TLS,
773 // then this optimization is not valid.
774 return isThreadPointerAcquisitionNode(Base, CurDAG);
775}
776
777bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
778 SDValue Base = ST->getBasePtr();
779 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
780 return false;
781
782 SDLoc dl(ST);
783 EVT MemVT = ST->getMemoryVT();
784 EVT RegVT = ST->getValue().getValueType();
785
786 unsigned Opcode;
787 switch (MemVT.getSimpleVT().SimpleTy) {
788 default:
789 return false;
790 case MVT::i8: {
791 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
792 break;
793 }
794 case MVT::i16: {
795 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
796 break;
797 }
798 case MVT::i32: {
799 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
800 break;
801 }
802 case MVT::i64: {
803 Opcode = PPC::STDXTLS;
804 break;
805 }
806 case MVT::f32: {
807 Opcode = PPC::STFSXTLS;
808 break;
809 }
810 case MVT::f64: {
811 Opcode = PPC::STFDXTLS;
812 break;
813 }
814 }
815 SDValue Chain = ST->getChain();
816 SDVTList VTs = ST->getVTList();
817 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
818 Chain};
819 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
820 transferMemOperands(ST, MN);
821 ReplaceNode(ST, MN);
822 return true;
823}
824
825bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
826 SDValue Base = LD->getBasePtr();
827 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
828 return false;
829
830 SDLoc dl(LD);
831 EVT MemVT = LD->getMemoryVT();
832 EVT RegVT = LD->getValueType(0);
833 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
834 unsigned Opcode;
835 switch (MemVT.getSimpleVT().SimpleTy) {
836 default:
837 return false;
838 case MVT::i8: {
839 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
840 break;
841 }
842 case MVT::i16: {
843 if (RegVT == MVT::i32)
844 Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
845 else
846 Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
847 break;
848 }
849 case MVT::i32: {
850 if (RegVT == MVT::i32)
851 Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
852 else
853 Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
854 break;
855 }
856 case MVT::i64: {
857 Opcode = PPC::LDXTLS;
858 break;
859 }
860 case MVT::f32: {
861 Opcode = PPC::LFSXTLS;
862 break;
863 }
864 case MVT::f64: {
865 Opcode = PPC::LFDXTLS;
866 break;
867 }
868 }
869 SDValue Chain = LD->getChain();
870 SDVTList VTs = LD->getVTList();
871 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
872 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
873 transferMemOperands(LD, MN);
874 ReplaceNode(LD, MN);
875 return true;
876}
877
878/// Turn an or of two masked values into the rotate left word immediate then
879/// mask insert (rlwimi) instruction.
880bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
881 SDValue Op0 = N->getOperand(0);
882 SDValue Op1 = N->getOperand(1);
883 SDLoc dl(N);
884
885 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
886 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
887
888 unsigned TargetMask = LKnown.Zero.getZExtValue();
889 unsigned InsertMask = RKnown.Zero.getZExtValue();
890
891 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
892 unsigned Op0Opc = Op0.getOpcode();
893 unsigned Op1Opc = Op1.getOpcode();
894 unsigned Value, SH = 0;
895 TargetMask = ~TargetMask;
896 InsertMask = ~InsertMask;
897
898 // If the LHS has a foldable shift and the RHS does not, then swap it to the
899 // RHS so that we can fold the shift into the insert.
900 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
901 if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
902 Op0.getOperand(0).getOpcode() == ISD::SRL) {
903 if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
904 Op1.getOperand(0).getOpcode() != ISD::SRL) {
905 std::swap(Op0, Op1);
906 std::swap(Op0Opc, Op1Opc);
907 std::swap(TargetMask, InsertMask);
908 }
909 }
910 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
911 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
912 Op1.getOperand(0).getOpcode() != ISD::SRL) {
913 std::swap(Op0, Op1);
914 std::swap(Op0Opc, Op1Opc);
915 std::swap(TargetMask, InsertMask);
916 }
917 }
918
919 unsigned MB, ME;
920 if (isRunOfOnes(InsertMask, MB, ME)) {
921 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
923 Op1 = Op1.getOperand(0);
924 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
925 }
926 if (Op1Opc == ISD::AND) {
927 // The AND mask might not be a constant, and we need to make sure that
928 // if we're going to fold the masking with the insert, all bits not
929 // know to be zero in the mask are known to be one.
930 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
931 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
932
933 unsigned SHOpc = Op1.getOperand(0).getOpcode();
934 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
936 // Note that Value must be in range here (less than 32) because
937 // otherwise there would not be any bits set in InsertMask.
938 Op1 = Op1.getOperand(0).getOperand(0);
939 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
940 }
941 }
942
943 SH &= 31;
944 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
945 getI32Imm(ME, dl) };
946 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
947 return true;
948 }
949 }
950 return false;
951}
952
953static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
954 unsigned MaxTruncation = 0;
955 // Cannot use range-based for loop here as we need the actual use (i.e. we
956 // need the operand number corresponding to the use). A range-based for
957 // will unbox the use and provide an SDNode*.
958 for (SDUse &Use : N->uses()) {
959 SDNode *User = Use.getUser();
960 unsigned Opc =
961 User->isMachineOpcode() ? User->getMachineOpcode() : User->getOpcode();
962 switch (Opc) {
963 default: return 0;
964 case ISD::TRUNCATE:
965 if (User->isMachineOpcode())
966 return 0;
967 MaxTruncation = std::max(MaxTruncation,
968 (unsigned)User->getValueType(0).getSizeInBits());
969 continue;
970 case ISD::STORE: {
971 if (User->isMachineOpcode())
972 return 0;
974 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
975 if (MemVTSize == 64 || Use.getOperandNo() != 0)
976 return 0;
977 MaxTruncation = std::max(MaxTruncation, MemVTSize);
978 continue;
979 }
980 case PPC::STW8:
981 case PPC::STWX8:
982 case PPC::STWU8:
983 case PPC::STWUX8:
984 if (Use.getOperandNo() != 0)
985 return 0;
986 MaxTruncation = std::max(MaxTruncation, 32u);
987 continue;
988 case PPC::STH8:
989 case PPC::STHX8:
990 case PPC::STHU8:
991 case PPC::STHUX8:
992 if (Use.getOperandNo() != 0)
993 return 0;
994 MaxTruncation = std::max(MaxTruncation, 16u);
995 continue;
996 case PPC::STB8:
997 case PPC::STBX8:
998 case PPC::STBU8:
999 case PPC::STBUX8:
1000 if (Use.getOperandNo() != 0)
1001 return 0;
1002 MaxTruncation = std::max(MaxTruncation, 8u);
1003 continue;
1004 }
1005 }
1006 return MaxTruncation;
1007}
1008
1009// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1010// zeros and return the number of bits by the left of these consecutive zeros.
1011static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1012 unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
1013 unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
1014 if ((HiTZ + LoLZ) >= Num)
1015 return (32 + HiTZ);
1016 return 0;
1017}
1018
1019// Direct materialization of 64-bit constants by enumerated patterns.
1020static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
1021 uint64_t Imm, unsigned &InstCnt) {
1022 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1023 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1024 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1025 unsigned LO = llvm::countl_one<uint64_t>(Imm);
1026 unsigned Hi32 = Hi_32(Imm);
1027 unsigned Lo32 = Lo_32(Imm);
1028 SDNode *Result = nullptr;
1029 unsigned Shift = 0;
1030
1031 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1032 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1033 };
1034
1035 // Following patterns use 1 instructions to materialize the Imm.
1036 InstCnt = 1;
1037 // 1-1) Patterns : {zeros}{15-bit valve}
1038 // {ones}{15-bit valve}
1039 if (isInt<16>(Imm)) {
1040 SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1041 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1042 }
1043 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1044 // {ones}{15-bit valve}{16 zeros}
1045 if (TZ > 15 && (LZ > 32 || LO > 32))
1046 return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1047 getI32Imm((Imm >> 16) & 0xffff));
1048
1049 // Following patterns use 2 instructions to materialize the Imm.
1050 InstCnt = 2;
1051 assert(LZ < 64 && "Unexpected leading zeros here.");
1052 // Count of ones follwing the leading zeros.
1053 unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
1054 // 2-1) Patterns : {zeros}{31-bit value}
1055 // {ones}{31-bit value}
1056 if (isInt<32>(Imm)) {
1057 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1058 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1059 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1060 return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1061 getI32Imm(Imm & 0xffff));
1062 }
1063 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1064 // {zeros}{15-bit value}{zeros}
1065 // {zeros}{ones}{15-bit value}
1066 // {ones}{15-bit value}{zeros}
1067 // We can take advantage of LI's sign-extension semantics to generate leading
1068 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
1069 if ((LZ + FO + TZ) > 48) {
1070 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1071 getI32Imm((Imm >> TZ) & 0xffff));
1072 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1073 getI32Imm(TZ), getI32Imm(LZ));
1074 }
1075 // 2-3) Pattern : {zeros}{15-bit value}{ones}
1076 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1077 // therefore we can take advantage of LI's sign-extension semantics, and then
1078 // mask them off after rotation.
1079 //
1080 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
1081 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1082 // +------------------------+ +------------------------+
1083 // 63 0 63 0
1084 // Imm (Imm >> (48 - LZ) & 0xffff)
1085 // +----sext-----|--16-bit--+ +clear-|-----------------+
1086 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1087 // +------------------------+ +------------------------+
1088 // 63 0 63 0
1089 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1090 if ((LZ + TO) > 48) {
1091 // Since the immediates with (LZ > 32) have been handled by previous
1092 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1093 // the Imm by a negative value.
1094 assert(LZ <= 32 && "Unexpected shift value.");
1095 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1096 getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1097 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1098 getI32Imm(48 - LZ), getI32Imm(LZ));
1099 }
1100 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1101 // {ones}{15-bit value}{ones}
1102 // We can take advantage of LI's sign-extension semantics to generate leading
1103 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1104 // after rotation.
1105 //
1106 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1107 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1108 // +------------------------+ +------------------------+
1109 // 63 0 63 0
1110 // Imm (Imm >> TO) & 0xffff
1111 // +----sext-----|--16-bit--+ +LZ|---------------------+
1112 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1113 // +------------------------+ +------------------------+
1114 // 63 0 63 0
1115 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1116 if ((LZ + FO + TO) > 48) {
1117 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1118 getI32Imm((Imm >> TO) & 0xffff));
1119 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1120 getI32Imm(TO), getI32Imm(LZ));
1121 }
1122 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1123 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1124 // value, we can use LI for Lo16 without generating leading ones then add the
1125 // Hi16(in Lo32).
1126 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1127 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1128 getI32Imm(Lo32 & 0xffff));
1129 return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1130 getI32Imm(Lo32 >> 16));
1131 }
1132 // 2-6) Patterns : {******}{49 zeros}{******}
1133 // {******}{49 ones}{******}
1134 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1135 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1136 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1137 // it back.
1138 //
1139 // 1) findContiguousZerosAtLeast(Imm, 49)
1140 // +------|--zeros-|------+ +---ones--||---15 bit--+
1141 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1142 // +----------------------+ +----------------------+
1143 // 63 0 63 0
1144 //
1145 // 2) findContiguousZerosAtLeast(~Imm, 49)
1146 // +------|--ones--|------+ +---ones--||---15 bit--+
1147 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1148 // +----------------------+ +----------------------+
1149 // 63 0 63 0
1150 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1151 (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1152 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1153 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1154 getI32Imm(RotImm & 0xffff));
1155 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1156 getI32Imm(Shift), getI32Imm(0));
1157 }
1158 // 2-7) Patterns : High word == Low word
1159 // This may require 2 to 3 instructions, depending on whether Lo32 can be
1160 // materialized in 1 instruction.
1161 if (Hi32 == Lo32) {
1162 // Handle the first 32 bits.
1163 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1164 uint64_t ImmLo16 = Lo32 & 0xffff;
1165 if (isInt<16>(Lo32))
1166 Result =
1167 CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1168 else if (!ImmLo16)
1169 Result =
1170 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1171 else {
1172 InstCnt = 3;
1173 Result =
1174 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1175 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1176 SDValue(Result, 0), getI32Imm(ImmLo16));
1177 }
1178 // Use rldimi to insert the Low word into High word.
1179 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1180 getI32Imm(0)};
1181 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1182 }
1183
1184 // Following patterns use 3 instructions to materialize the Imm.
1185 InstCnt = 3;
1186 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1187 // {zeros}{31-bit value}{zeros}
1188 // {zeros}{ones}{31-bit value}
1189 // {ones}{31-bit value}{zeros}
1190 // We can take advantage of LIS's sign-extension semantics to generate leading
1191 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1192 // ones in both sides after rotation.
1193 if ((LZ + FO + TZ) > 32) {
1194 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1195 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1196 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1197 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1198 getI32Imm((Imm >> TZ) & 0xffff));
1199 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1200 getI32Imm(TZ), getI32Imm(LZ));
1201 }
1202 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1203 // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1204 // value, therefore we can take advantage of LIS's sign-extension semantics,
1205 // add the remaining bits with ORI, and then mask them off after rotation.
1206 // This is similar to Pattern 2-3, please refer to the diagram there.
1207 if ((LZ + TO) > 32) {
1208 // Since the immediates with (LZ > 32) have been handled by previous
1209 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1210 // the Imm by a negative value.
1211 assert(LZ <= 32 && "Unexpected shift value.");
1212 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1213 getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1214 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1215 getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1216 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1217 getI32Imm(32 - LZ), getI32Imm(LZ));
1218 }
1219 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1220 // {ones}{31-bit value}{ones}
1221 // We can take advantage of LIS's sign-extension semantics to generate leading
1222 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1223 // ones in left sides (if required) after rotation.
1224 // This is similar to Pattern 2-4, please refer to the diagram there.
1225 if ((LZ + FO + TO) > 32) {
1226 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1227 getI32Imm((Imm >> (TO + 16)) & 0xffff));
1228 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1229 getI32Imm((Imm >> TO) & 0xffff));
1230 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1231 getI32Imm(TO), getI32Imm(LZ));
1232 }
1233 // 3-4) Patterns : {******}{33 zeros}{******}
1234 // {******}{33 ones}{******}
1235 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1236 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1237 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1238 // rotate it back.
1239 // This is similar to Pattern 2-6, please refer to the diagram there.
1240 if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1241 (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1242 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1243 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1244 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1245 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1246 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1247 getI32Imm(RotImm & 0xffff));
1248 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1249 getI32Imm(Shift), getI32Imm(0));
1250 }
1251
1252 InstCnt = 0;
1253 return nullptr;
1254}
1255
1256// Try to select instructions to generate a 64 bit immediate using prefix as
1257// well as non prefix instructions. The function will return the SDNode
1258// to materialize that constant or it will return nullptr if it does not
1259// find one. The variable InstCnt is set to the number of instructions that
1260// were selected.
1262 uint64_t Imm, unsigned &InstCnt) {
1263 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1264 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1265 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1266 unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1267 unsigned Hi32 = Hi_32(Imm);
1268 unsigned Lo32 = Lo_32(Imm);
1269
1270 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1271 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1272 };
1273
1274 auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1275 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1276 };
1277
1278 // Following patterns use 1 instruction to materialize Imm.
1279 InstCnt = 1;
1280
1281 // The pli instruction can materialize up to 34 bits directly.
1282 // If a constant fits within 34-bits, emit the pli instruction here directly.
1283 if (isInt<34>(Imm))
1284 return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1285 CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1286
1287 // Require at least two instructions.
1288 InstCnt = 2;
1289 SDNode *Result = nullptr;
1290 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1291 // {zeros}{33-bit value}{zeros}
1292 // {zeros}{ones}{33-bit value}
1293 // {ones}{33-bit value}{zeros}
1294 // We can take advantage of PLI's sign-extension semantics to generate leading
1295 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1296 if ((LZ + FO + TZ) > 30) {
1297 APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1298 APInt Extended = SignedInt34.sext(64);
1299 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1300 getI64Imm(Extended.getZExtValue()));
1301 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1302 getI32Imm(TZ), getI32Imm(LZ));
1303 }
1304 // Pattern : {zeros}{33-bit value}{ones}
1305 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1306 // therefore we can take advantage of PLI's sign-extension semantics, and then
1307 // mask them off after rotation.
1308 //
1309 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1310 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1311 // +------------------------+ +------------------------+
1312 // 63 0 63 0
1313 //
1314 // +----sext-----|--34-bit--+ +clear-|-----------------+
1315 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1316 // +------------------------+ +------------------------+
1317 // 63 0 63 0
1318 if ((LZ + TO) > 30) {
1319 APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1320 APInt Extended = SignedInt34.sext(64);
1321 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1322 getI64Imm(Extended.getZExtValue()));
1323 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1324 getI32Imm(30 - LZ), getI32Imm(LZ));
1325 }
1326 // Patterns : {zeros}{ones}{33-bit value}{ones}
1327 // {ones}{33-bit value}{ones}
1328 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1329 // generate leading ones, and then use RLDICL to mask off the ones in left
1330 // sides (if required) after rotation.
1331 if ((LZ + FO + TO) > 30) {
1332 APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1333 APInt Extended = SignedInt34.sext(64);
1334 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1335 getI64Imm(Extended.getZExtValue()));
1336 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1337 getI32Imm(TO), getI32Imm(LZ));
1338 }
1339 // Patterns : {******}{31 zeros}{******}
1340 // : {******}{31 ones}{******}
1341 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1342 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1343 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1344 //
1345 // +------|--ones--|------+ +---ones--||---33 bit--+
1346 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1347 // +----------------------+ +----------------------+
1348 // 63 0 63 0
1349 for (unsigned Shift = 0; Shift < 63; ++Shift) {
1350 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1351 if (isInt<34>(RotImm)) {
1352 Result =
1353 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1354 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1355 SDValue(Result, 0), getI32Imm(Shift),
1356 getI32Imm(0));
1357 }
1358 }
1359
1360 // Patterns : High word == Low word
1361 // This is basically a splat of a 32 bit immediate.
1362 if (Hi32 == Lo32) {
1363 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1364 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1365 getI32Imm(0)};
1366 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1367 }
1368
1369 InstCnt = 3;
1370 // Catch-all
1371 // This pattern can form any 64 bit immediate in 3 instructions.
1372 SDNode *ResultHi =
1373 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1374 SDNode *ResultLo =
1375 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1376 SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1377 getI32Imm(0)};
1378 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1379}
1380
1381static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1382 unsigned *InstCnt = nullptr) {
1383 unsigned InstCntDirect = 0;
1384 // No more than 3 instructions are used if we can select the i64 immediate
1385 // directly.
1386 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1387
1388 const PPCSubtarget &Subtarget =
1390
1391 // If we have prefixed instructions and there is a chance we can
1392 // materialize the constant with fewer prefixed instructions than
1393 // non-prefixed, try that.
1394 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1395 unsigned InstCntDirectP = 0;
1396 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1397 // Use the prefix case in either of two cases:
1398 // 1) We have no result from the non-prefix case to use.
1399 // 2) The non-prefix case uses more instructions than the prefix case.
1400 // If the prefix and non-prefix cases use the same number of instructions
1401 // we will prefer the non-prefix case.
1402 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1403 if (InstCnt)
1404 *InstCnt = InstCntDirectP;
1405 return ResultP;
1406 }
1407 }
1408
1409 if (Result) {
1410 if (InstCnt)
1411 *InstCnt = InstCntDirect;
1412 return Result;
1413 }
1414 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1415 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1416 };
1417
1418 uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
1419 uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
1420
1421 // Try to use 4 instructions to materialize the immediate which is "almost" a
1422 // splat of a 32 bit immediate.
1423 if (Hi16OfLo32 && Lo16OfLo32) {
1424 uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
1425 uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
1426 bool IsSelected = false;
1427
1428 auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1429 SDNode *Result =
1430 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1431 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1432 SDValue(Result, 0), getI32Imm(Lo16));
1433 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1434 getI32Imm(0)};
1435 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1436 };
1437
1438 if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1439 IsSelected = true;
1440 Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1441 // Modify Hi16OfHi32.
1442 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
1443 getI32Imm(0)};
1444 Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1445 } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1446 IsSelected = true;
1447 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1448 // Modify Lo16OfLo32.
1449 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1450 getI32Imm(16), getI32Imm(31)};
1451 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1452 } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1453 IsSelected = true;
1454 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1455 // Modify Hi16OfLo32.
1456 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1457 getI32Imm(0), getI32Imm(15)};
1458 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1459 }
1460 if (IsSelected == true) {
1461 if (InstCnt)
1462 *InstCnt = 4;
1463 return Result;
1464 }
1465 }
1466
1467 // Handle the upper 32 bit value.
1468 Result =
1469 selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1470 // Add in the last bits as required.
1471 if (Hi16OfLo32) {
1472 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1473 SDValue(Result, 0), getI32Imm(Hi16OfLo32));
1474 ++InstCntDirect;
1475 }
1476 if (Lo16OfLo32) {
1477 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1478 getI32Imm(Lo16OfLo32));
1479 ++InstCntDirect;
1480 }
1481 if (InstCnt)
1482 *InstCnt = InstCntDirect;
1483 return Result;
1484}
1485
1486// Select a 64-bit constant.
1488 SDLoc dl(N);
1489
1490 // Get 64 bit value.
1491 int64_t Imm = N->getAsZExtVal();
1492 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1493 uint64_t SextImm = SignExtend64(Imm, MinSize);
1494 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1495 if (isInt<16>(SextImm))
1496 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1497 }
1498 return selectI64Imm(CurDAG, dl, Imm);
1499}
1500
1501namespace {
1502
1503class BitPermutationSelector {
1504 struct ValueBit {
1505 SDValue V;
1506
1507 // The bit number in the value, using a convention where bit 0 is the
1508 // lowest-order bit.
1509 unsigned Idx;
1510
1511 // ConstZero means a bit we need to mask off.
1512 // Variable is a bit comes from an input variable.
1513 // VariableKnownToBeZero is also a bit comes from an input variable,
1514 // but it is known to be already zero. So we do not need to mask them.
1515 enum Kind {
1516 ConstZero,
1517 Variable,
1518 VariableKnownToBeZero
1519 } K;
1520
1521 ValueBit(SDValue V, unsigned I, Kind K = Variable)
1522 : V(V), Idx(I), K(K) {}
1523 ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1524
1525 bool isZero() const {
1526 return K == ConstZero || K == VariableKnownToBeZero;
1527 }
1528
1529 bool hasValue() const {
1530 return K == Variable || K == VariableKnownToBeZero;
1531 }
1532
1533 SDValue getValue() const {
1534 assert(hasValue() && "Cannot get the value of a constant bit");
1535 return V;
1536 }
1537
1538 unsigned getValueBitIndex() const {
1539 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1540 return Idx;
1541 }
1542 };
1543
1544 // A bit group has the same underlying value and the same rotate factor.
1545 struct BitGroup {
1546 SDValue V;
1547 unsigned RLAmt;
1548 unsigned StartIdx, EndIdx;
1549
1550 // This rotation amount assumes that the lower 32 bits of the quantity are
1551 // replicated in the high 32 bits by the rotation operator (which is done
1552 // by rlwinm and friends in 64-bit mode).
1553 bool Repl32;
1554 // Did converting to Repl32 == true change the rotation factor? If it did,
1555 // it decreased it by 32.
1556 bool Repl32CR;
1557 // Was this group coalesced after setting Repl32 to true?
1558 bool Repl32Coalesced;
1559
1560 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1561 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1562 Repl32Coalesced(false) {
1563 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1564 << " [" << S << ", " << E << "]\n");
1565 }
1566 };
1567
1568 // Information on each (Value, RLAmt) pair (like the number of groups
1569 // associated with each) used to choose the lowering method.
1570 struct ValueRotInfo {
1571 SDValue V;
1572 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1573 unsigned NumGroups = 0;
1574 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1575 bool Repl32 = false;
1576
1577 ValueRotInfo() = default;
1578
1579 // For sorting (in reverse order) by NumGroups, and then by
1580 // FirstGroupStartIdx.
1581 bool operator < (const ValueRotInfo &Other) const {
1582 // We need to sort so that the non-Repl32 come first because, when we're
1583 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1584 // masking operation.
1585 if (Repl32 < Other.Repl32)
1586 return true;
1587 else if (Repl32 > Other.Repl32)
1588 return false;
1589 else if (NumGroups > Other.NumGroups)
1590 return true;
1591 else if (NumGroups < Other.NumGroups)
1592 return false;
1593 else if (RLAmt == 0 && Other.RLAmt != 0)
1594 return true;
1595 else if (RLAmt != 0 && Other.RLAmt == 0)
1596 return false;
1597 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1598 return true;
1599 return false;
1600 }
1601 };
1602
1603 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1604 using ValueBitsMemoizer =
1605 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1606 ValueBitsMemoizer Memoizer;
1607
1608 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1609 // The bool is true if something interesting was deduced, otherwise if we're
1610 // providing only a generic representation of V (or something else likewise
1611 // uninteresting for instruction selection) through the SmallVector.
1612 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1613 unsigned NumBits) {
1614 auto &ValueEntry = Memoizer[V];
1615 if (ValueEntry)
1616 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1617 ValueEntry.reset(new ValueBitsMemoizedValue());
1618 bool &Interesting = ValueEntry->first;
1619 SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1620 Bits.resize(NumBits);
1621
1622 switch (V.getOpcode()) {
1623 default: break;
1624 case ISD::ROTL:
1625 if (isa<ConstantSDNode>(V.getOperand(1))) {
1626 assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1627 unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
1628
1629 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1630
1631 for (unsigned i = 0; i < NumBits; ++i)
1632 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1633
1634 return std::make_pair(Interesting = true, &Bits);
1635 }
1636 break;
1637 case ISD::SHL:
1638 case PPCISD::SHL:
1639 if (isa<ConstantSDNode>(V.getOperand(1))) {
1640 // sld takes 7 bits, slw takes 6.
1641 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1642
1643 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1644
1645 if (ShiftAmt >= NumBits) {
1646 for (unsigned i = 0; i < NumBits; ++i)
1647 Bits[i] = ValueBit(ValueBit::ConstZero);
1648 } else {
1649 for (unsigned i = ShiftAmt; i < NumBits; ++i)
1650 Bits[i] = LHSBits[i - ShiftAmt];
1651 for (unsigned i = 0; i < ShiftAmt; ++i)
1652 Bits[i] = ValueBit(ValueBit::ConstZero);
1653 }
1654
1655 return std::make_pair(Interesting = true, &Bits);
1656 }
1657 break;
1658 case ISD::SRL:
1659 case PPCISD::SRL:
1660 if (isa<ConstantSDNode>(V.getOperand(1))) {
1661 // srd takes lowest 7 bits, srw takes 6.
1662 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1663
1664 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1665
1666 if (ShiftAmt >= NumBits) {
1667 for (unsigned i = 0; i < NumBits; ++i)
1668 Bits[i] = ValueBit(ValueBit::ConstZero);
1669 } else {
1670 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1671 Bits[i] = LHSBits[i + ShiftAmt];
1672 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1673 Bits[i] = ValueBit(ValueBit::ConstZero);
1674 }
1675
1676 return std::make_pair(Interesting = true, &Bits);
1677 }
1678 break;
1679 case ISD::AND:
1680 if (isa<ConstantSDNode>(V.getOperand(1))) {
1681 uint64_t Mask = V.getConstantOperandVal(1);
1682
1683 const SmallVector<ValueBit, 64> *LHSBits;
1684 // Mark this as interesting, only if the LHS was also interesting. This
1685 // prevents the overall procedure from matching a single immediate 'and'
1686 // (which is non-optimal because such an and might be folded with other
1687 // things if we don't select it here).
1688 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1689
1690 for (unsigned i = 0; i < NumBits; ++i)
1691 if (((Mask >> i) & 1) == 1)
1692 Bits[i] = (*LHSBits)[i];
1693 else {
1694 // AND instruction masks this bit. If the input is already zero,
1695 // we have nothing to do here. Otherwise, make the bit ConstZero.
1696 if ((*LHSBits)[i].isZero())
1697 Bits[i] = (*LHSBits)[i];
1698 else
1699 Bits[i] = ValueBit(ValueBit::ConstZero);
1700 }
1701
1702 return std::make_pair(Interesting, &Bits);
1703 }
1704 break;
1705 case ISD::OR: {
1706 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1707 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1708
1709 bool AllDisjoint = true;
1710 SDValue LastVal = SDValue();
1711 unsigned LastIdx = 0;
1712 for (unsigned i = 0; i < NumBits; ++i) {
1713 if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1714 // If both inputs are known to be zero and one is ConstZero and
1715 // another is VariableKnownToBeZero, we can select whichever
1716 // we like. To minimize the number of bit groups, we select
1717 // VariableKnownToBeZero if this bit is the next bit of the same
1718 // input variable from the previous bit. Otherwise, we select
1719 // ConstZero.
1720 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1721 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1722 Bits[i] = LHSBits[i];
1723 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1724 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1725 Bits[i] = RHSBits[i];
1726 else
1727 Bits[i] = ValueBit(ValueBit::ConstZero);
1728 }
1729 else if (LHSBits[i].isZero())
1730 Bits[i] = RHSBits[i];
1731 else if (RHSBits[i].isZero())
1732 Bits[i] = LHSBits[i];
1733 else {
1734 AllDisjoint = false;
1735 break;
1736 }
1737 // We remember the value and bit index of this bit.
1738 if (Bits[i].hasValue()) {
1739 LastVal = Bits[i].getValue();
1740 LastIdx = Bits[i].getValueBitIndex();
1741 }
1742 else {
1743 if (LastVal) LastVal = SDValue();
1744 LastIdx = 0;
1745 }
1746 }
1747
1748 if (!AllDisjoint)
1749 break;
1750
1751 return std::make_pair(Interesting = true, &Bits);
1752 }
1753 case ISD::ZERO_EXTEND: {
1754 // We support only the case with zero extension from i32 to i64 so far.
1755 if (V.getValueType() != MVT::i64 ||
1756 V.getOperand(0).getValueType() != MVT::i32)
1757 break;
1758
1759 const SmallVector<ValueBit, 64> *LHSBits;
1760 const unsigned NumOperandBits = 32;
1761 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1762 NumOperandBits);
1763
1764 for (unsigned i = 0; i < NumOperandBits; ++i)
1765 Bits[i] = (*LHSBits)[i];
1766
1767 for (unsigned i = NumOperandBits; i < NumBits; ++i)
1768 Bits[i] = ValueBit(ValueBit::ConstZero);
1769
1770 return std::make_pair(Interesting, &Bits);
1771 }
1772 case ISD::TRUNCATE: {
1773 EVT FromType = V.getOperand(0).getValueType();
1774 EVT ToType = V.getValueType();
1775 // We support only the case with truncate from i64 to i32.
1776 if (FromType != MVT::i64 || ToType != MVT::i32)
1777 break;
1778 const unsigned NumAllBits = FromType.getSizeInBits();
1780 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1781 NumAllBits);
1782 const unsigned NumValidBits = ToType.getSizeInBits();
1783
1784 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1785 // So, we cannot include this truncate.
1786 bool UseUpper32bit = false;
1787 for (unsigned i = 0; i < NumValidBits; ++i)
1788 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1789 UseUpper32bit = true;
1790 break;
1791 }
1792 if (UseUpper32bit)
1793 break;
1794
1795 for (unsigned i = 0; i < NumValidBits; ++i)
1796 Bits[i] = (*InBits)[i];
1797
1798 return std::make_pair(Interesting, &Bits);
1799 }
1800 case ISD::AssertZext: {
1801 // For AssertZext, we look through the operand and
1802 // mark the bits known to be zero.
1803 const SmallVector<ValueBit, 64> *LHSBits;
1804 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1805 NumBits);
1806
1807 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1808 const unsigned NumValidBits = FromType.getSizeInBits();
1809 for (unsigned i = 0; i < NumValidBits; ++i)
1810 Bits[i] = (*LHSBits)[i];
1811
1812 // These bits are known to be zero but the AssertZext may be from a value
1813 // that already has some constant zero bits (i.e. from a masking and).
1814 for (unsigned i = NumValidBits; i < NumBits; ++i)
1815 Bits[i] = (*LHSBits)[i].hasValue()
1816 ? ValueBit((*LHSBits)[i].getValue(),
1817 (*LHSBits)[i].getValueBitIndex(),
1818 ValueBit::VariableKnownToBeZero)
1819 : ValueBit(ValueBit::ConstZero);
1820
1821 return std::make_pair(Interesting, &Bits);
1822 }
1823 case ISD::LOAD:
1824 LoadSDNode *LD = cast<LoadSDNode>(V);
1825 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1826 EVT VT = LD->getMemoryVT();
1827 const unsigned NumValidBits = VT.getSizeInBits();
1828
1829 for (unsigned i = 0; i < NumValidBits; ++i)
1830 Bits[i] = ValueBit(V, i);
1831
1832 // These bits are known to be zero.
1833 for (unsigned i = NumValidBits; i < NumBits; ++i)
1834 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1835
1836 // Zero-extending load itself cannot be optimized. So, it is not
1837 // interesting by itself though it gives useful information.
1838 return std::make_pair(Interesting = false, &Bits);
1839 }
1840 break;
1841 }
1842
1843 for (unsigned i = 0; i < NumBits; ++i)
1844 Bits[i] = ValueBit(V, i);
1845
1846 return std::make_pair(Interesting = false, &Bits);
1847 }
1848
1849 // For each value (except the constant ones), compute the left-rotate amount
1850 // to get it from its original to final position.
1851 void computeRotationAmounts() {
1852 NeedMask = false;
1853 RLAmt.resize(Bits.size());
1854 for (unsigned i = 0; i < Bits.size(); ++i)
1855 if (Bits[i].hasValue()) {
1856 unsigned VBI = Bits[i].getValueBitIndex();
1857 if (i >= VBI)
1858 RLAmt[i] = i - VBI;
1859 else
1860 RLAmt[i] = Bits.size() - (VBI - i);
1861 } else if (Bits[i].isZero()) {
1862 NeedMask = true;
1863 RLAmt[i] = UINT32_MAX;
1864 } else {
1865 llvm_unreachable("Unknown value bit type");
1866 }
1867 }
1868
1869 // Collect groups of consecutive bits with the same underlying value and
1870 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1871 // they break up groups.
1872 void collectBitGroups(bool LateMask) {
1873 BitGroups.clear();
1874
1875 unsigned LastRLAmt = RLAmt[0];
1876 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1877 unsigned LastGroupStartIdx = 0;
1878 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1879 for (unsigned i = 1; i < Bits.size(); ++i) {
1880 unsigned ThisRLAmt = RLAmt[i];
1881 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1882 if (LateMask && !ThisValue) {
1883 ThisValue = LastValue;
1884 ThisRLAmt = LastRLAmt;
1885 // If we're doing late masking, then the first bit group always starts
1886 // at zero (even if the first bits were zero).
1887 if (BitGroups.empty())
1888 LastGroupStartIdx = 0;
1889 }
1890
1891 // If this bit is known to be zero and the current group is a bit group
1892 // of zeros, we do not need to terminate the current bit group even the
1893 // Value or RLAmt does not match here. Instead, we terminate this group
1894 // when the first non-zero bit appears later.
1895 if (IsGroupOfZeros && Bits[i].isZero())
1896 continue;
1897
1898 // If this bit has the same underlying value and the same rotate factor as
1899 // the last one, then they're part of the same group.
1900 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1901 // We cannot continue the current group if this bits is not known to
1902 // be zero in a bit group of zeros.
1903 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1904 continue;
1905
1906 if (LastValue.getNode())
1907 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1908 i-1));
1909 LastRLAmt = ThisRLAmt;
1910 LastValue = ThisValue;
1911 LastGroupStartIdx = i;
1912 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1913 }
1914 if (LastValue.getNode())
1915 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1916 Bits.size()-1));
1917
1918 if (BitGroups.empty())
1919 return;
1920
1921 // We might be able to combine the first and last groups.
1922 if (BitGroups.size() > 1) {
1923 // If the first and last groups are the same, then remove the first group
1924 // in favor of the last group, making the ending index of the last group
1925 // equal to the ending index of the to-be-removed first group.
1926 if (BitGroups[0].StartIdx == 0 &&
1927 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1928 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1929 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1930 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1931 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1932 BitGroups.erase(BitGroups.begin());
1933 }
1934 }
1935 }
1936
1937 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1938 // associated with each. If the number of groups are same, we prefer a group
1939 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1940 // instruction. If there is a degeneracy, pick the one that occurs
1941 // first (in the final value).
1942 void collectValueRotInfo() {
1943 ValueRots.clear();
1944
1945 for (auto &BG : BitGroups) {
1946 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1947 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1948 VRI.V = BG.V;
1949 VRI.RLAmt = BG.RLAmt;
1950 VRI.Repl32 = BG.Repl32;
1951 VRI.NumGroups += 1;
1952 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1953 }
1954
1955 // Now that we've collected the various ValueRotInfo instances, we need to
1956 // sort them.
1957 ValueRotsVec.clear();
1958 for (auto &I : ValueRots) {
1959 ValueRotsVec.push_back(I.second);
1960 }
1961 llvm::sort(ValueRotsVec);
1962 }
1963
1964 // In 64-bit mode, rlwinm and friends have a rotation operator that
1965 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1966 // indices of these instructions can only be in the lower 32 bits, so they
1967 // can only represent some 64-bit bit groups. However, when they can be used,
1968 // the 32-bit replication can be used to represent, as a single bit group,
1969 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1970 // groups when possible. Returns true if any of the bit groups were
1971 // converted.
1972 void assignRepl32BitGroups() {
1973 // If we have bits like this:
1974 //
1975 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1976 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1977 // Groups: | RLAmt = 8 | RLAmt = 40 |
1978 //
1979 // But, making use of a 32-bit operation that replicates the low-order 32
1980 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1981 // of 8.
1982
1983 auto IsAllLow32 = [this](BitGroup & BG) {
1984 if (BG.StartIdx <= BG.EndIdx) {
1985 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1986 if (!Bits[i].hasValue())
1987 continue;
1988 if (Bits[i].getValueBitIndex() >= 32)
1989 return false;
1990 }
1991 } else {
1992 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1993 if (!Bits[i].hasValue())
1994 continue;
1995 if (Bits[i].getValueBitIndex() >= 32)
1996 return false;
1997 }
1998 for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1999 if (!Bits[i].hasValue())
2000 continue;
2001 if (Bits[i].getValueBitIndex() >= 32)
2002 return false;
2003 }
2004 }
2005
2006 return true;
2007 };
2008
2009 for (auto &BG : BitGroups) {
2010 // If this bit group has RLAmt of 0 and will not be merged with
2011 // another bit group, we don't benefit from Repl32. We don't mark
2012 // such group to give more freedom for later instruction selection.
2013 if (BG.RLAmt == 0) {
2014 auto PotentiallyMerged = [this](BitGroup & BG) {
2015 for (auto &BG2 : BitGroups)
2016 if (&BG != &BG2 && BG.V == BG2.V &&
2017 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
2018 return true;
2019 return false;
2020 };
2021 if (!PotentiallyMerged(BG))
2022 continue;
2023 }
2024 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2025 if (IsAllLow32(BG)) {
2026 if (BG.RLAmt >= 32) {
2027 BG.RLAmt -= 32;
2028 BG.Repl32CR = true;
2029 }
2030
2031 BG.Repl32 = true;
2032
2033 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2034 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2035 << BG.StartIdx << ", " << BG.EndIdx << "]\n");
2036 }
2037 }
2038 }
2039
2040 // Now walk through the bit groups, consolidating where possible.
2041 for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2042 // We might want to remove this bit group by merging it with the previous
2043 // group (which might be the ending group).
2044 auto IP = (I == BitGroups.begin()) ?
2045 std::prev(BitGroups.end()) : std::prev(I);
2046 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2047 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
2048
2049 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2050 << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2051 << I->StartIdx << ", " << I->EndIdx
2052 << "] with group with range [" << IP->StartIdx << ", "
2053 << IP->EndIdx << "]\n");
2054
2055 IP->EndIdx = I->EndIdx;
2056 IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
2057 IP->Repl32Coalesced = true;
2058 I = BitGroups.erase(I);
2059 continue;
2060 } else {
2061 // There is a special case worth handling: If there is a single group
2062 // covering the entire upper 32 bits, and it can be merged with both
2063 // the next and previous groups (which might be the same group), then
2064 // do so. If it is the same group (so there will be only one group in
2065 // total), then we need to reverse the order of the range so that it
2066 // covers the entire 64 bits.
2067 if (I->StartIdx == 32 && I->EndIdx == 63) {
2068 assert(std::next(I) == BitGroups.end() &&
2069 "bit group ends at index 63 but there is another?");
2070 auto IN = BitGroups.begin();
2071
2072 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2073 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
2074 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
2075 IsAllLow32(*I)) {
2076
2077 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2078 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2079 << ", " << I->EndIdx
2080 << "] with 32-bit replicated groups with ranges ["
2081 << IP->StartIdx << ", " << IP->EndIdx << "] and ["
2082 << IN->StartIdx << ", " << IN->EndIdx << "]\n");
2083
2084 if (IP == IN) {
2085 // There is only one other group; change it to cover the whole
2086 // range (backward, so that it can still be Repl32 but cover the
2087 // whole 64-bit range).
2088 IP->StartIdx = 31;
2089 IP->EndIdx = 30;
2090 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
2091 IP->Repl32Coalesced = true;
2092 I = BitGroups.erase(I);
2093 } else {
2094 // There are two separate groups, one before this group and one
2095 // after us (at the beginning). We're going to remove this group,
2096 // but also the group at the very beginning.
2097 IP->EndIdx = IN->EndIdx;
2098 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
2099 IP->Repl32Coalesced = true;
2100 I = BitGroups.erase(I);
2101 BitGroups.erase(BitGroups.begin());
2102 }
2103
2104 // This must be the last group in the vector (and we might have
2105 // just invalidated the iterator above), so break here.
2106 break;
2107 }
2108 }
2109 }
2110
2111 ++I;
2112 }
2113 }
2114
2115 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2116 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2117 }
2118
2119 uint64_t getZerosMask() {
2120 uint64_t Mask = 0;
2121 for (unsigned i = 0; i < Bits.size(); ++i) {
2122 if (Bits[i].hasValue())
2123 continue;
2124 Mask |= (UINT64_C(1) << i);
2125 }
2126
2127 return ~Mask;
2128 }
2129
2130 // This method extends an input value to 64 bit if input is 32-bit integer.
2131 // While selecting instructions in BitPermutationSelector in 64-bit mode,
2132 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2133 // In such case, we extend it to 64 bit to be consistent with other values.
2134 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2135 if (V.getValueSizeInBits() == 64)
2136 return V;
2137
2138 assert(V.getValueSizeInBits() == 32);
2139 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2140 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2141 MVT::i64), 0);
2142 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2143 MVT::i64, ImDef, V,
2144 SubRegIdx), 0);
2145 return ExtVal;
2146 }
2147
2148 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2149 if (V.getValueSizeInBits() == 32)
2150 return V;
2151
2152 assert(V.getValueSizeInBits() == 64);
2153 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2154 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2155 MVT::i32, V, SubRegIdx), 0);
2156 return SubVal;
2157 }
2158
2159 // Depending on the number of groups for a particular value, it might be
2160 // better to rotate, mask explicitly (using andi/andis), and then or the
2161 // result. Select this part of the result first.
2162 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2164 return;
2165
2166 for (ValueRotInfo &VRI : ValueRotsVec) {
2167 unsigned Mask = 0;
2168 for (unsigned i = 0; i < Bits.size(); ++i) {
2169 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2170 continue;
2171 if (RLAmt[i] != VRI.RLAmt)
2172 continue;
2173 Mask |= (1u << i);
2174 }
2175
2176 // Compute the masks for andi/andis that would be necessary.
2177 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2178 assert((ANDIMask != 0 || ANDISMask != 0) &&
2179 "No set bits in mask for value bit groups");
2180 bool NeedsRotate = VRI.RLAmt != 0;
2181
2182 // We're trying to minimize the number of instructions. If we have one
2183 // group, using one of andi/andis can break even. If we have three
2184 // groups, we can use both andi and andis and break even (to use both
2185 // andi and andis we also need to or the results together). We need four
2186 // groups if we also need to rotate. To use andi/andis we need to do more
2187 // than break even because rotate-and-mask instructions tend to be easier
2188 // to schedule.
2189
2190 // FIXME: We've biased here against using andi/andis, which is right for
2191 // POWER cores, but not optimal everywhere. For example, on the A2,
2192 // andi/andis have single-cycle latency whereas the rotate-and-mask
2193 // instructions take two cycles, and it would be better to bias toward
2194 // andi/andis in break-even cases.
2195
2196 unsigned NumAndInsts = (unsigned) NeedsRotate +
2197 (unsigned) (ANDIMask != 0) +
2198 (unsigned) (ANDISMask != 0) +
2199 (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2200 (unsigned) (bool) Res;
2201
2202 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2203 << " RL: " << VRI.RLAmt << ":"
2204 << "\n\t\t\tisel using masking: " << NumAndInsts
2205 << " using rotates: " << VRI.NumGroups << "\n");
2206
2207 if (NumAndInsts >= VRI.NumGroups)
2208 continue;
2209
2210 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2211
2212 if (InstCnt) *InstCnt += NumAndInsts;
2213
2214 SDValue VRot;
2215 if (VRI.RLAmt) {
2216 SDValue Ops[] =
2217 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2218 getI32Imm(0, dl), getI32Imm(31, dl) };
2219 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2220 Ops), 0);
2221 } else {
2222 VRot = TruncateToInt32(VRI.V, dl);
2223 }
2224
2225 SDValue ANDIVal, ANDISVal;
2226 if (ANDIMask != 0)
2227 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2228 VRot, getI32Imm(ANDIMask, dl)),
2229 0);
2230 if (ANDISMask != 0)
2231 ANDISVal =
2232 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2233 getI32Imm(ANDISMask, dl)),
2234 0);
2235
2236 SDValue TotalVal;
2237 if (!ANDIVal)
2238 TotalVal = ANDISVal;
2239 else if (!ANDISVal)
2240 TotalVal = ANDIVal;
2241 else
2242 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2243 ANDIVal, ANDISVal), 0);
2244
2245 if (!Res)
2246 Res = TotalVal;
2247 else
2248 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2249 Res, TotalVal), 0);
2250
2251 // Now, remove all groups with this underlying value and rotation
2252 // factor.
2253 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2254 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2255 });
2256 }
2257 }
2258
2259 // Instruction selection for the 32-bit case.
2260 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2261 SDLoc dl(N);
2262 SDValue Res;
2263
2264 if (InstCnt) *InstCnt = 0;
2265
2266 // Take care of cases that should use andi/andis first.
2267 SelectAndParts32(dl, Res, InstCnt);
2268
2269 // If we've not yet selected a 'starting' instruction, and we have no zeros
2270 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2271 // number of groups), and start with this rotated value.
2272 if ((!NeedMask || LateMask) && !Res) {
2273 ValueRotInfo &VRI = ValueRotsVec[0];
2274 if (VRI.RLAmt) {
2275 if (InstCnt) *InstCnt += 1;
2276 SDValue Ops[] =
2277 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2278 getI32Imm(0, dl), getI32Imm(31, dl) };
2279 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2280 0);
2281 } else {
2282 Res = TruncateToInt32(VRI.V, dl);
2283 }
2284
2285 // Now, remove all groups with this underlying value and rotation factor.
2286 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2287 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2288 });
2289 }
2290
2291 if (InstCnt) *InstCnt += BitGroups.size();
2292
2293 // Insert the other groups (one at a time).
2294 for (auto &BG : BitGroups) {
2295 if (!Res) {
2296 SDValue Ops[] =
2297 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2298 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2299 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2300 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2301 } else {
2302 SDValue Ops[] =
2303 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2304 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2305 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2306 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2307 }
2308 }
2309
2310 if (LateMask) {
2311 unsigned Mask = (unsigned) getZerosMask();
2312
2313 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2314 assert((ANDIMask != 0 || ANDISMask != 0) &&
2315 "No set bits in zeros mask?");
2316
2317 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2318 (unsigned) (ANDISMask != 0) +
2319 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2320
2321 SDValue ANDIVal, ANDISVal;
2322 if (ANDIMask != 0)
2323 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2324 Res, getI32Imm(ANDIMask, dl)),
2325 0);
2326 if (ANDISMask != 0)
2327 ANDISVal =
2328 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2329 getI32Imm(ANDISMask, dl)),
2330 0);
2331
2332 if (!ANDIVal)
2333 Res = ANDISVal;
2334 else if (!ANDISVal)
2335 Res = ANDIVal;
2336 else
2337 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2338 ANDIVal, ANDISVal), 0);
2339 }
2340
2341 return Res.getNode();
2342 }
2343
2344 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2345 unsigned MaskStart, unsigned MaskEnd,
2346 bool IsIns) {
2347 // In the notation used by the instructions, 'start' and 'end' are reversed
2348 // because bits are counted from high to low order.
2349 unsigned InstMaskStart = 64 - MaskEnd - 1,
2350 InstMaskEnd = 64 - MaskStart - 1;
2351
2352 if (Repl32)
2353 return 1;
2354
2355 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2356 InstMaskEnd == 63 - RLAmt)
2357 return 1;
2358
2359 return 2;
2360 }
2361
2362 // For 64-bit values, not all combinations of rotates and masks are
2363 // available. Produce one if it is available.
2364 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2365 bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2366 unsigned *InstCnt = nullptr) {
2367 // In the notation used by the instructions, 'start' and 'end' are reversed
2368 // because bits are counted from high to low order.
2369 unsigned InstMaskStart = 64 - MaskEnd - 1,
2370 InstMaskEnd = 64 - MaskStart - 1;
2371
2372 if (InstCnt) *InstCnt += 1;
2373
2374 if (Repl32) {
2375 // This rotation amount assumes that the lower 32 bits of the quantity
2376 // are replicated in the high 32 bits by the rotation operator (which is
2377 // done by rlwinm and friends).
2378 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2379 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2380 SDValue Ops[] =
2381 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2382 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2383 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2384 Ops), 0);
2385 }
2386
2387 if (InstMaskEnd == 63) {
2388 SDValue Ops[] =
2389 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2390 getI32Imm(InstMaskStart, dl) };
2391 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2392 }
2393
2394 if (InstMaskStart == 0) {
2395 SDValue Ops[] =
2396 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2397 getI32Imm(InstMaskEnd, dl) };
2398 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2399 }
2400
2401 if (InstMaskEnd == 63 - RLAmt) {
2402 SDValue Ops[] =
2403 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2404 getI32Imm(InstMaskStart, dl) };
2405 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2406 }
2407
2408 // We cannot do this with a single instruction, so we'll use two. The
2409 // problem is that we're not free to choose both a rotation amount and mask
2410 // start and end independently. We can choose an arbitrary mask start and
2411 // end, but then the rotation amount is fixed. Rotation, however, can be
2412 // inverted, and so by applying an "inverse" rotation first, we can get the
2413 // desired result.
2414 if (InstCnt) *InstCnt += 1;
2415
2416 // The rotation mask for the second instruction must be MaskStart.
2417 unsigned RLAmt2 = MaskStart;
2418 // The first instruction must rotate V so that the overall rotation amount
2419 // is RLAmt.
2420 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2421 if (RLAmt1)
2422 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2423 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2424 }
2425
2426 // For 64-bit values, not all combinations of rotates and masks are
2427 // available. Produce a rotate-mask-and-insert if one is available.
2428 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2429 unsigned RLAmt, bool Repl32, unsigned MaskStart,
2430 unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2431 // In the notation used by the instructions, 'start' and 'end' are reversed
2432 // because bits are counted from high to low order.
2433 unsigned InstMaskStart = 64 - MaskEnd - 1,
2434 InstMaskEnd = 64 - MaskStart - 1;
2435
2436 if (InstCnt) *InstCnt += 1;
2437
2438 if (Repl32) {
2439 // This rotation amount assumes that the lower 32 bits of the quantity
2440 // are replicated in the high 32 bits by the rotation operator (which is
2441 // done by rlwinm and friends).
2442 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2443 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2444 SDValue Ops[] =
2445 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2446 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2447 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2448 Ops), 0);
2449 }
2450
2451 if (InstMaskEnd == 63 - RLAmt) {
2452 SDValue Ops[] =
2453 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2454 getI32Imm(InstMaskStart, dl) };
2455 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2456 }
2457
2458 // We cannot do this with a single instruction, so we'll use two. The
2459 // problem is that we're not free to choose both a rotation amount and mask
2460 // start and end independently. We can choose an arbitrary mask start and
2461 // end, but then the rotation amount is fixed. Rotation, however, can be
2462 // inverted, and so by applying an "inverse" rotation first, we can get the
2463 // desired result.
2464 if (InstCnt) *InstCnt += 1;
2465
2466 // The rotation mask for the second instruction must be MaskStart.
2467 unsigned RLAmt2 = MaskStart;
2468 // The first instruction must rotate V so that the overall rotation amount
2469 // is RLAmt.
2470 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2471 if (RLAmt1)
2472 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2473 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2474 }
2475
2476 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2478 return;
2479
2480 // The idea here is the same as in the 32-bit version, but with additional
2481 // complications from the fact that Repl32 might be true. Because we
2482 // aggressively convert bit groups to Repl32 form (which, for small
2483 // rotation factors, involves no other change), and then coalesce, it might
2484 // be the case that a single 64-bit masking operation could handle both
2485 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2486 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2487 // completely capture the new combined bit group.
2488
2489 for (ValueRotInfo &VRI : ValueRotsVec) {
2490 uint64_t Mask = 0;
2491
2492 // We need to add to the mask all bits from the associated bit groups.
2493 // If Repl32 is false, we need to add bits from bit groups that have
2494 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2495 // group is trivially convertable if it overlaps only with the lower 32
2496 // bits, and the group has not been coalesced.
2497 auto MatchingBG = [VRI](const BitGroup &BG) {
2498 if (VRI.V != BG.V)
2499 return false;
2500
2501 unsigned EffRLAmt = BG.RLAmt;
2502 if (!VRI.Repl32 && BG.Repl32) {
2503 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2504 !BG.Repl32Coalesced) {
2505 if (BG.Repl32CR)
2506 EffRLAmt += 32;
2507 } else {
2508 return false;
2509 }
2510 } else if (VRI.Repl32 != BG.Repl32) {
2511 return false;
2512 }
2513
2514 return VRI.RLAmt == EffRLAmt;
2515 };
2516
2517 for (auto &BG : BitGroups) {
2518 if (!MatchingBG(BG))
2519 continue;
2520
2521 if (BG.StartIdx <= BG.EndIdx) {
2522 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2523 Mask |= (UINT64_C(1) << i);
2524 } else {
2525 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2526 Mask |= (UINT64_C(1) << i);
2527 for (unsigned i = 0; i <= BG.EndIdx; ++i)
2528 Mask |= (UINT64_C(1) << i);
2529 }
2530 }
2531
2532 // We can use the 32-bit andi/andis technique if the mask does not
2533 // require any higher-order bits. This can save an instruction compared
2534 // to always using the general 64-bit technique.
2535 bool Use32BitInsts = isUInt<32>(Mask);
2536 // Compute the masks for andi/andis that would be necessary.
2537 unsigned ANDIMask = (Mask & UINT16_MAX),
2538 ANDISMask = (Mask >> 16) & UINT16_MAX;
2539
2540 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2541
2542 unsigned NumAndInsts = (unsigned) NeedsRotate +
2543 (unsigned) (bool) Res;
2544 unsigned NumOfSelectInsts = 0;
2545 selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2546 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2547 if (Use32BitInsts)
2548 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2549 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2550 else
2551 NumAndInsts += NumOfSelectInsts + /* and */ 1;
2552
2553 unsigned NumRLInsts = 0;
2554 bool FirstBG = true;
2555 bool MoreBG = false;
2556 for (auto &BG : BitGroups) {
2557 if (!MatchingBG(BG)) {
2558 MoreBG = true;
2559 continue;
2560 }
2561 NumRLInsts +=
2562 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2563 !FirstBG);
2564 FirstBG = false;
2565 }
2566
2567 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2568 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2569 << "\n\t\t\tisel using masking: " << NumAndInsts
2570 << " using rotates: " << NumRLInsts << "\n");
2571
2572 // When we'd use andi/andis, we bias toward using the rotates (andi only
2573 // has a record form, and is cracked on POWER cores). However, when using
2574 // general 64-bit constant formation, bias toward the constant form,
2575 // because that exposes more opportunities for CSE.
2576 if (NumAndInsts > NumRLInsts)
2577 continue;
2578 // When merging multiple bit groups, instruction or is used.
2579 // But when rotate is used, rldimi can inert the rotated value into any
2580 // register, so instruction or can be avoided.
2581 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2582 continue;
2583
2584 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2585
2586 if (InstCnt) *InstCnt += NumAndInsts;
2587
2588 SDValue VRot;
2589 // We actually need to generate a rotation if we have a non-zero rotation
2590 // factor or, in the Repl32 case, if we care about any of the
2591 // higher-order replicated bits. In the latter case, we generate a mask
2592 // backward so that it actually includes the entire 64 bits.
2593 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2594 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2595 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2596 else
2597 VRot = VRI.V;
2598
2599 SDValue TotalVal;
2600 if (Use32BitInsts) {
2601 assert((ANDIMask != 0 || ANDISMask != 0) &&
2602 "No set bits in mask when using 32-bit ands for 64-bit value");
2603
2604 SDValue ANDIVal, ANDISVal;
2605 if (ANDIMask != 0)
2606 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2607 ExtendToInt64(VRot, dl),
2608 getI32Imm(ANDIMask, dl)),
2609 0);
2610 if (ANDISMask != 0)
2611 ANDISVal =
2612 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2613 ExtendToInt64(VRot, dl),
2614 getI32Imm(ANDISMask, dl)),
2615 0);
2616
2617 if (!ANDIVal)
2618 TotalVal = ANDISVal;
2619 else if (!ANDISVal)
2620 TotalVal = ANDIVal;
2621 else
2622 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2623 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2624 } else {
2625 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2626 TotalVal =
2627 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2628 ExtendToInt64(VRot, dl), TotalVal),
2629 0);
2630 }
2631
2632 if (!Res)
2633 Res = TotalVal;
2634 else
2635 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2636 ExtendToInt64(Res, dl), TotalVal),
2637 0);
2638
2639 // Now, remove all groups with this underlying value and rotation
2640 // factor.
2641 eraseMatchingBitGroups(MatchingBG);
2642 }
2643 }
2644
2645 // Instruction selection for the 64-bit case.
2646 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2647 SDLoc dl(N);
2648 SDValue Res;
2649
2650 if (InstCnt) *InstCnt = 0;
2651
2652 // Take care of cases that should use andi/andis first.
2653 SelectAndParts64(dl, Res, InstCnt);
2654
2655 // If we've not yet selected a 'starting' instruction, and we have no zeros
2656 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2657 // number of groups), and start with this rotated value.
2658 if ((!NeedMask || LateMask) && !Res) {
2659 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2660 // groups will come first, and so the VRI representing the largest number
2661 // of groups might not be first (it might be the first Repl32 groups).
2662 unsigned MaxGroupsIdx = 0;
2663 if (!ValueRotsVec[0].Repl32) {
2664 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2665 if (ValueRotsVec[i].Repl32) {
2666 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2667 MaxGroupsIdx = i;
2668 break;
2669 }
2670 }
2671
2672 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2673 bool NeedsRotate = false;
2674 if (VRI.RLAmt) {
2675 NeedsRotate = true;
2676 } else if (VRI.Repl32) {
2677 for (auto &BG : BitGroups) {
2678 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2679 BG.Repl32 != VRI.Repl32)
2680 continue;
2681
2682 // We don't need a rotate if the bit group is confined to the lower
2683 // 32 bits.
2684 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2685 continue;
2686
2687 NeedsRotate = true;
2688 break;
2689 }
2690 }
2691
2692 if (NeedsRotate)
2693 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2694 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2695 InstCnt);
2696 else
2697 Res = VRI.V;
2698
2699 // Now, remove all groups with this underlying value and rotation factor.
2700 if (Res)
2701 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2702 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2703 BG.Repl32 == VRI.Repl32;
2704 });
2705 }
2706
2707 // Because 64-bit rotates are more flexible than inserts, we might have a
2708 // preference regarding which one we do first (to save one instruction).
2709 if (!Res)
2710 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2711 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2712 false) <
2713 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2714 true)) {
2715 if (I != BitGroups.begin()) {
2716 BitGroup BG = *I;
2717 BitGroups.erase(I);
2718 BitGroups.insert(BitGroups.begin(), BG);
2719 }
2720
2721 break;
2722 }
2723 }
2724
2725 // Insert the other groups (one at a time).
2726 for (auto &BG : BitGroups) {
2727 if (!Res)
2728 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2729 BG.EndIdx, InstCnt);
2730 else
2731 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2732 BG.StartIdx, BG.EndIdx, InstCnt);
2733 }
2734
2735 if (LateMask) {
2736 uint64_t Mask = getZerosMask();
2737
2738 // We can use the 32-bit andi/andis technique if the mask does not
2739 // require any higher-order bits. This can save an instruction compared
2740 // to always using the general 64-bit technique.
2741 bool Use32BitInsts = isUInt<32>(Mask);
2742 // Compute the masks for andi/andis that would be necessary.
2743 unsigned ANDIMask = (Mask & UINT16_MAX),
2744 ANDISMask = (Mask >> 16) & UINT16_MAX;
2745
2746 if (Use32BitInsts) {
2747 assert((ANDIMask != 0 || ANDISMask != 0) &&
2748 "No set bits in mask when using 32-bit ands for 64-bit value");
2749
2750 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2751 (unsigned) (ANDISMask != 0) +
2752 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2753
2754 SDValue ANDIVal, ANDISVal;
2755 if (ANDIMask != 0)
2756 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2757 ExtendToInt64(Res, dl),
2758 getI32Imm(ANDIMask, dl)),
2759 0);
2760 if (ANDISMask != 0)
2761 ANDISVal =
2762 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2763 ExtendToInt64(Res, dl),
2764 getI32Imm(ANDISMask, dl)),
2765 0);
2766
2767 if (!ANDIVal)
2768 Res = ANDISVal;
2769 else if (!ANDISVal)
2770 Res = ANDIVal;
2771 else
2772 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2773 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2774 } else {
2775 unsigned NumOfSelectInsts = 0;
2776 SDValue MaskVal =
2777 SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2778 Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2779 ExtendToInt64(Res, dl), MaskVal),
2780 0);
2781 if (InstCnt)
2782 *InstCnt += NumOfSelectInsts + /* and */ 1;
2783 }
2784 }
2785
2786 return Res.getNode();
2787 }
2788
2789 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2790 // Fill in BitGroups.
2791 collectBitGroups(LateMask);
2792 if (BitGroups.empty())
2793 return nullptr;
2794
2795 // For 64-bit values, figure out when we can use 32-bit instructions.
2796 if (Bits.size() == 64)
2797 assignRepl32BitGroups();
2798
2799 // Fill in ValueRotsVec.
2800 collectValueRotInfo();
2801
2802 if (Bits.size() == 32) {
2803 return Select32(N, LateMask, InstCnt);
2804 } else {
2805 assert(Bits.size() == 64 && "Not 64 bits here?");
2806 return Select64(N, LateMask, InstCnt);
2807 }
2808
2809 return nullptr;
2810 }
2811
2812 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2813 erase_if(BitGroups, F);
2814 }
2815
2817
2818 bool NeedMask = false;
2820
2821 SmallVector<BitGroup, 16> BitGroups;
2822
2823 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2824 SmallVector<ValueRotInfo, 16> ValueRotsVec;
2825
2826 SelectionDAG *CurDAG = nullptr;
2827
2828public:
2829 BitPermutationSelector(SelectionDAG *DAG)
2830 : CurDAG(DAG) {}
2831
2832 // Here we try to match complex bit permutations into a set of
2833 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2834 // known to produce optimal code for common cases (like i32 byte swapping).
2835 SDNode *Select(SDNode *N) {
2836 Memoizer.clear();
2837 auto Result =
2838 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2839 if (!Result.first)
2840 return nullptr;
2841 Bits = std::move(*Result.second);
2842
2843 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2844 " selection for: ");
2845 LLVM_DEBUG(N->dump(CurDAG));
2846
2847 // Fill it RLAmt and set NeedMask.
2848 computeRotationAmounts();
2849
2850 if (!NeedMask)
2851 return Select(N, false);
2852
2853 // We currently have two techniques for handling results with zeros: early
2854 // masking (the default) and late masking. Late masking is sometimes more
2855 // efficient, but because the structure of the bit groups is different, it
2856 // is hard to tell without generating both and comparing the results. With
2857 // late masking, we ignore zeros in the resulting value when inserting each
2858 // set of bit groups, and then mask in the zeros at the end. With early
2859 // masking, we only insert the non-zero parts of the result at every step.
2860
2861 unsigned InstCnt = 0, InstCntLateMask = 0;
2862 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2863 SDNode *RN = Select(N, false, &InstCnt);
2864 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2865
2866 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2867 SDNode *RNLM = Select(N, true, &InstCntLateMask);
2868 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2869 << " instructions\n");
2870
2871 if (InstCnt <= InstCntLateMask) {
2872 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2873 return RN;
2874 }
2875
2876 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2877 return RNLM;
2878 }
2879};
2880
2881class IntegerCompareEliminator {
2882 SelectionDAG *CurDAG;
2883 PPCDAGToDAGISel *S;
2884 // Conversion type for interpreting results of a 32-bit instruction as
2885 // a 64-bit value or vice versa.
2886 enum ExtOrTruncConversion { Ext, Trunc };
2887
2888 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2889 // in a GPR.
2890 // ZExtOrig - use the original condition code, zero-extend value
2891 // ZExtInvert - invert the condition code, zero-extend value
2892 // SExtOrig - use the original condition code, sign-extend value
2893 // SExtInvert - invert the condition code, sign-extend value
2894 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2895
2896 // Comparisons against zero to emit GPR code sequences for. Each of these
2897 // sequences may need to be emitted for two or more equivalent patterns.
2898 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2899 // matters as well as the extension type: sext (-1/0), zext (1/0).
2900 // GEZExt - (zext (LHS >= 0))
2901 // GESExt - (sext (LHS >= 0))
2902 // LEZExt - (zext (LHS <= 0))
2903 // LESExt - (sext (LHS <= 0))
2904 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2905
2906 SDNode *tryEXTEND(SDNode *N);
2907 SDNode *tryLogicOpOfCompares(SDNode *N);
2908 SDValue computeLogicOpInGPR(SDValue LogicOp);
2909 SDValue signExtendInputIfNeeded(SDValue Input);
2910 SDValue zeroExtendInputIfNeeded(SDValue Input);
2911 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2912 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2913 ZeroCompare CmpTy);
2914 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2915 int64_t RHSValue, SDLoc dl);
2916 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2917 int64_t RHSValue, SDLoc dl);
2918 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2919 int64_t RHSValue, SDLoc dl);
2920 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2921 int64_t RHSValue, SDLoc dl);
2922 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2923
2924public:
2925 IntegerCompareEliminator(SelectionDAG *DAG,
2926 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2928 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2929 "Only expecting to use this on 64 bit targets.");
2930 }
2931 SDNode *Select(SDNode *N) {
2932 if (CmpInGPR == ICGPR_None)
2933 return nullptr;
2934 switch (N->getOpcode()) {
2935 default: break;
2936 case ISD::ZERO_EXTEND:
2939 return nullptr;
2940 [[fallthrough]];
2941 case ISD::SIGN_EXTEND:
2944 return nullptr;
2945 return tryEXTEND(N);
2946 case ISD::AND:
2947 case ISD::OR:
2948 case ISD::XOR:
2949 return tryLogicOpOfCompares(N);
2950 }
2951 return nullptr;
2952 }
2953};
2954
2955// The obvious case for wanting to keep the value in a GPR. Namely, the
2956// result of the comparison is actually needed in a GPR.
2957SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2958 assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2959 N->getOpcode() == ISD::SIGN_EXTEND) &&
2960 "Expecting a zero/sign extend node!");
2961 SDValue WideRes;
2962 // If we are zero-extending the result of a logical operation on i1
2963 // values, we can keep the values in GPRs.
2964 if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&
2965 N->getOperand(0).getValueType() == MVT::i1 &&
2966 N->getOpcode() == ISD::ZERO_EXTEND)
2967 WideRes = computeLogicOpInGPR(N->getOperand(0));
2968 else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2969 return nullptr;
2970 else
2971 WideRes =
2972 getSETCCInGPR(N->getOperand(0),
2973 N->getOpcode() == ISD::SIGN_EXTEND ?
2974 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2975
2976 if (!WideRes)
2977 return nullptr;
2978
2979 bool Input32Bit = WideRes.getValueType() == MVT::i32;
2980 bool Output32Bit = N->getValueType(0) == MVT::i32;
2981
2982 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2983 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2984
2985 SDValue ConvOp = WideRes;
2986 if (Input32Bit != Output32Bit)
2987 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2988 ExtOrTruncConversion::Trunc);
2989 return ConvOp.getNode();
2990}
2991
2992// Attempt to perform logical operations on the results of comparisons while
2993// keeping the values in GPRs. Without doing so, these would end up being
2994// lowered to CR-logical operations which suffer from significant latency and
2995// low ILP.
2996SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2997 if (N->getValueType(0) != MVT::i1)
2998 return nullptr;
2999 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
3000 "Expected a logic operation on setcc results.");
3001 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
3002 if (!LoweredLogical)
3003 return nullptr;
3004
3005 SDLoc dl(N);
3006 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3007 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3008 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
3009 SDValue LHS = LoweredLogical.getOperand(0);
3010 SDValue RHS = LoweredLogical.getOperand(1);
3011 SDValue WideOp;
3012 SDValue OpToConvToRecForm;
3013
3014 // Look through any 32-bit to 64-bit implicit extend nodes to find the
3015 // opcode that is input to the XORI.
3016 if (IsBitwiseNegate &&
3017 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
3018 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
3019 else if (IsBitwiseNegate)
3020 // If the input to the XORI isn't an extension, that's what we're after.
3021 OpToConvToRecForm = LoweredLogical.getOperand(0);
3022 else
3023 // If this is not an XORI, it is a reg-reg logical op and we can convert
3024 // it to record-form.
3025 OpToConvToRecForm = LoweredLogical;
3026
3027 // Get the record-form version of the node we're looking to use to get the
3028 // CR result from.
3029 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3030 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
3031
3032 // Convert the right node to record-form. This is either the logical we're
3033 // looking at or it is the input node to the negation (if we're looking at
3034 // a bitwise negation).
3035 if (NewOpc != -1 && IsBitwiseNegate) {
3036 // The input to the XORI has a record-form. Use it.
3037 assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
3038 "Expected a PPC::XORI8 only for bitwise negation.");
3039 // Emit the record-form instruction.
3040 std::vector<SDValue> Ops;
3041 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3042 Ops.push_back(OpToConvToRecForm.getOperand(i));
3043
3044 WideOp =
3045 SDValue(CurDAG->getMachineNode(NewOpc, dl,
3046 OpToConvToRecForm.getValueType(),
3047 MVT::Glue, Ops), 0);
3048 } else {
3049 assert((NewOpc != -1 || !IsBitwiseNegate) &&
3050 "No record form available for AND8/OR8/XOR8?");
3051 WideOp =
3052 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
3053 dl, MVT::i64, MVT::Glue, LHS, RHS),
3054 0);
3055 }
3056
3057 // Select this node to a single bit from CR0 set by the record-form node
3058 // just created. For bitwise negation, use the EQ bit which is the equivalent
3059 // of negating the result (i.e. it is a bit set when the result of the
3060 // operation is zero).
3061 SDValue SRIdxVal =
3062 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
3063 SDValue CRBit =
3064 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
3065 MVT::i1, CR0Reg, SRIdxVal,
3066 WideOp.getValue(1)), 0);
3067 return CRBit.getNode();
3068}
3069
3070// Lower a logical operation on i1 values into a GPR sequence if possible.
3071// The result can be kept in a GPR if requested.
3072// Three types of inputs can be handled:
3073// - SETCC
3074// - TRUNCATE
3075// - Logical operation (AND/OR/XOR)
3076// There is also a special case that is handled (namely a complement operation
3077// achieved with xor %a, -1).
3078SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3080 "Can only handle logic operations here.");
3081 assert(LogicOp.getValueType() == MVT::i1 &&
3082 "Can only handle logic operations on i1 values here.");
3083 SDLoc dl(LogicOp);
3084 SDValue LHS, RHS;
3085
3086 // Special case: xor %a, -1
3087 bool IsBitwiseNegation = isBitwiseNot(LogicOp);
3088
3089 // Produces a GPR sequence for each operand of the binary logic operation.
3090 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3091 // the value in a GPR and for logic operations, it will recursively produce
3092 // a GPR sequence for the operation.
3093 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3094 unsigned OperandOpcode = Operand.getOpcode();
3095 if (OperandOpcode == ISD::SETCC)
3096 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
3097 else if (OperandOpcode == ISD::TRUNCATE) {
3098 SDValue InputOp = Operand.getOperand(0);
3099 EVT InVT = InputOp.getValueType();
3100 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
3101 PPC::RLDICL, dl, InVT, InputOp,
3102 S->getI64Imm(0, dl),
3103 S->getI64Imm(63, dl)), 0);
3104 } else if (ISD::isBitwiseLogicOp(OperandOpcode))
3105 return computeLogicOpInGPR(Operand);
3106 return SDValue();
3107 };
3108 LHS = getLogicOperand(LogicOp.getOperand(0));
3109 RHS = getLogicOperand(LogicOp.getOperand(1));
3110
3111 // If a GPR sequence can't be produced for the LHS we can't proceed.
3112 // Not producing a GPR sequence for the RHS is only a problem if this isn't
3113 // a bitwise negation operation.
3114 if (!LHS || (!RHS && !IsBitwiseNegation))
3115 return SDValue();
3116
3117 NumLogicOpsOnComparison++;
3118
3119 // We will use the inputs as 64-bit values.
3120 if (LHS.getValueType() == MVT::i32)
3121 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
3122 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3123 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
3124
3125 unsigned NewOpc;
3126 switch (LogicOp.getOpcode()) {
3127 default: llvm_unreachable("Unknown logic operation.");
3128 case ISD::AND: NewOpc = PPC::AND8; break;
3129 case ISD::OR: NewOpc = PPC::OR8; break;
3130 case ISD::XOR: NewOpc = PPC::XOR8; break;
3131 }
3132
3133 if (IsBitwiseNegation) {
3134 RHS = S->getI64Imm(1, dl);
3135 NewOpc = PPC::XORI8;
3136 }
3137
3138 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
3139
3140}
3141
3142/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3143/// Otherwise just reinterpret it as a 64-bit value.
3144/// Useful when emitting comparison code for 32-bit values without using
3145/// the compare instruction (which only considers the lower 32-bits).
3146SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3147 assert(Input.getValueType() == MVT::i32 &&
3148 "Can only sign-extend 32-bit values here.");
3149 unsigned Opc = Input.getOpcode();
3150
3151 // The value was sign extended and then truncated to 32-bits. No need to
3152 // sign extend it again.
3153 if (Opc == ISD::TRUNCATE &&
3154 (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
3155 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
3156 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3157
3158 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3159 // The input is a sign-extending load. All ppc sign-extending loads
3160 // sign-extend to the full 64-bits.
3161 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3162 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3163
3164 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3165 // We don't sign-extend constants.
3166 if (InputConst)
3167 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3168
3169 SDLoc dl(Input);
3170 SignExtensionsAdded++;
3171 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3172 MVT::i64, Input), 0);
3173}
3174
3175/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3176/// Otherwise just reinterpret it as a 64-bit value.
3177/// Useful when emitting comparison code for 32-bit values without using
3178/// the compare instruction (which only considers the lower 32-bits).
3179SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3180 assert(Input.getValueType() == MVT::i32 &&
3181 "Can only zero-extend 32-bit values here.");
3182 unsigned Opc = Input.getOpcode();
3183
3184 // The only condition under which we can omit the actual extend instruction:
3185 // - The value is a positive constant
3186 // - The value comes from a load that isn't a sign-extending load
3187 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3188 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3189 (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3190 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3191 if (IsTruncateOfZExt)
3192 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3193
3194 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3195 if (InputConst && InputConst->getSExtValue() >= 0)
3196 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3197
3198 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3199 // The input is a load that doesn't sign-extend (it will be zero-extended).
3200 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3201 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3202
3203 // None of the above, need to zero-extend.
3204 SDLoc dl(Input);
3205 ZeroExtensionsAdded++;
3206 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3207 S->getI64Imm(0, dl),
3208 S->getI64Imm(32, dl)), 0);
3209}
3210
3211// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3212// course not actual zero/sign extensions that will generate machine code,
3213// they're just a way to reinterpret a 32 bit value in a register as a
3214// 64 bit value and vice-versa.
3215SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3216 ExtOrTruncConversion Conv) {
3217 SDLoc dl(NatWidthRes);
3218
3219 // For reinterpreting 32-bit values as 64 bit values, we generate
3220 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3221 if (Conv == ExtOrTruncConversion::Ext) {
3222 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3223 SDValue SubRegIdx =
3224 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3225 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3226 ImDef, NatWidthRes, SubRegIdx), 0);
3227 }
3228
3229 assert(Conv == ExtOrTruncConversion::Trunc &&
3230 "Unknown convertion between 32 and 64 bit values.");
3231 // For reinterpreting 64-bit values as 32-bit values, we just need to
3232 // EXTRACT_SUBREG (i.e. extract the low word).
3233 SDValue SubRegIdx =
3234 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3235 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3236 NatWidthRes, SubRegIdx), 0);
3237}
3238
3239// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3240// Handle both zero-extensions and sign-extensions.
3241SDValue
3242IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3243 ZeroCompare CmpTy) {
3244 EVT InVT = LHS.getValueType();
3245 bool Is32Bit = InVT == MVT::i32;
3246 SDValue ToExtend;
3247
3248 // Produce the value that needs to be either zero or sign extended.
3249 switch (CmpTy) {
3250 case ZeroCompare::GEZExt:
3251 case ZeroCompare::GESExt:
3252 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3253 dl, InVT, LHS, LHS), 0);
3254 break;
3255 case ZeroCompare::LEZExt:
3256 case ZeroCompare::LESExt: {
3257 if (Is32Bit) {
3258 // Upper 32 bits cannot be undefined for this sequence.
3259 LHS = signExtendInputIfNeeded(LHS);
3260 SDValue Neg =
3261 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3262 ToExtend =
3263 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3264 Neg, S->getI64Imm(1, dl),
3265 S->getI64Imm(63, dl)), 0);
3266 } else {
3267 SDValue Addi =
3268 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3269 S->getI64Imm(~0ULL, dl)), 0);
3270 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3271 Addi, LHS), 0);
3272 }
3273 break;
3274 }
3275 }
3276
3277 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3278 if (!Is32Bit &&
3279 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3280 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3281 ToExtend, S->getI64Imm(1, dl),
3282 S->getI64Imm(63, dl)), 0);
3283 if (!Is32Bit &&
3284 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3285 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3286 S->getI64Imm(63, dl)), 0);
3287
3288 assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3289 // For 32-bit sequences, the extensions differ between GE/LE cases.
3290 switch (CmpTy) {
3291 case ZeroCompare::GEZExt: {
3292 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3293 S->getI32Imm(31, dl) };
3294 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3295 ShiftOps), 0);
3296 }
3297 case ZeroCompare::GESExt:
3298 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3299 S->getI32Imm(31, dl)), 0);
3300 case ZeroCompare::LEZExt:
3301 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3302 S->getI32Imm(1, dl)), 0);
3303 case ZeroCompare::LESExt:
3304 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3305 S->getI32Imm(-1, dl)), 0);
3306 }
3307
3308 // The above case covers all the enumerators so it can't have a default clause
3309 // to avoid compiler warnings.
3310 llvm_unreachable("Unknown zero-comparison type.");
3311}
3312
3313/// Produces a zero-extended result of comparing two 32-bit values according to
3314/// the passed condition code.
3315SDValue
3316IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3317 ISD::CondCode CC,
3318 int64_t RHSValue, SDLoc dl) {
3321 return SDValue();
3322 bool IsRHSZero = RHSValue == 0;
3323 bool IsRHSOne = RHSValue == 1;
3324 bool IsRHSNegOne = RHSValue == -1LL;
3325 switch (CC) {
3326 default: return SDValue();
3327 case ISD::SETEQ: {
3328 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3329 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3330 SDValue Xor = IsRHSZero ? LHS :
3331 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3332 SDValue Clz =
3333 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3334 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3335 S->getI32Imm(31, dl) };
3336 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3337 ShiftOps), 0);
3338 }
3339 case ISD::SETNE: {
3340 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3341 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3342 SDValue Xor = IsRHSZero ? LHS :
3343 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3344 SDValue Clz =
3345 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3346 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3347 S->getI32Imm(31, dl) };
3348 SDValue Shift =
3349 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3350 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3351 S->getI32Imm(1, dl)), 0);
3352 }
3353 case ISD::SETGE: {
3354 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3355 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3356 if(IsRHSZero)
3357 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3358
3359 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3360 // by swapping inputs and falling through.
3361 std::swap(LHS, RHS);
3362 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3363 IsRHSZero = RHSConst && RHSConst->isZero();
3364 [[fallthrough]];
3365 }
3366 case ISD::SETLE: {
3367 if (CmpInGPR == ICGPR_NonExtIn)
3368 return SDValue();
3369 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3370 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3371 if(IsRHSZero) {
3372 if (CmpInGPR == ICGPR_NonExtIn)
3373 return SDValue();
3374 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3375 }
3376
3377 // The upper 32-bits of the register can't be undefined for this sequence.
3378 LHS = signExtendInputIfNeeded(LHS);
3379 RHS = signExtendInputIfNeeded(RHS);
3380 SDValue Sub =
3381 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3382 SDValue Shift =
3383 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3384 S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3385 0);
3386 return
3387 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3388 MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3389 }
3390 case ISD::SETGT: {
3391 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3392 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3393 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3394 // Handle SETLT -1 (which is equivalent to SETGE 0).
3395 if (IsRHSNegOne)
3396 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3397
3398 if (IsRHSZero) {
3399 if (CmpInGPR == ICGPR_NonExtIn)
3400 return SDValue();
3401 // The upper 32-bits of the register can't be undefined for this sequence.
3402 LHS = signExtendInputIfNeeded(LHS);
3403 RHS = signExtendInputIfNeeded(RHS);
3404 SDValue Neg =
3405 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3406 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3407 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3408 }
3409 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3410 // (%b < %a) by swapping inputs and falling through.
3411 std::swap(LHS, RHS);
3412 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3413 IsRHSZero = RHSConst && RHSConst->isZero();
3414 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3415 [[fallthrough]];
3416 }
3417 case ISD::SETLT: {
3418 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3419 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3420 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3421 // Handle SETLT 1 (which is equivalent to SETLE 0).
3422 if (IsRHSOne) {
3423 if (CmpInGPR == ICGPR_NonExtIn)
3424 return SDValue();
3425 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3426 }
3427
3428 if (IsRHSZero) {
3429 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3430 S->getI32Imm(31, dl) };
3431 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3432 ShiftOps), 0);
3433 }
3434
3435 if (CmpInGPR == ICGPR_NonExtIn)
3436 return SDValue();
3437 // The upper 32-bits of the register can't be undefined for this sequence.
3438 LHS = signExtendInputIfNeeded(LHS);
3439 RHS = signExtendInputIfNeeded(RHS);
3440 SDValue SUBFNode =
3441 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3442 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3443 SUBFNode, S->getI64Imm(1, dl),
3444 S->getI64Imm(63, dl)), 0);
3445 }
3446 case ISD::SETUGE:
3447 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3448 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3449 std::swap(LHS, RHS);
3450 [[fallthrough]];
3451 case ISD::SETULE: {
3452 if (CmpInGPR == ICGPR_NonExtIn)
3453 return SDValue();
3454 // The upper 32-bits of the register can't be undefined for this sequence.
3455 LHS = zeroExtendInputIfNeeded(LHS);
3456 RHS = zeroExtendInputIfNeeded(RHS);
3458 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3459 SDValue SrdiNode =
3460 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3461 Subtract, S->getI64Imm(1, dl),
3462 S->getI64Imm(63, dl)), 0);
3463 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3464 S->getI32Imm(1, dl)), 0);
3465 }
3466 case ISD::SETUGT:
3467 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3468 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3469 std::swap(LHS, RHS);
3470 [[fallthrough]];
3471 case ISD::SETULT: {
3472 if (CmpInGPR == ICGPR_NonExtIn)
3473 return SDValue();
3474 // The upper 32-bits of the register can't be undefined for this sequence.
3475 LHS = zeroExtendInputIfNeeded(LHS);
3476 RHS = zeroExtendInputIfNeeded(RHS);
3478 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3479 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3480 Subtract, S->getI64Imm(1, dl),
3481 S->getI64Imm(63, dl)), 0);
3482 }
3483 }
3484}
3485
3486/// Produces a sign-extended result of comparing two 32-bit values according to
3487/// the passed condition code.
3488SDValue
3489IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3490 ISD::CondCode CC,
3491 int64_t RHSValue, SDLoc dl) {
3494 return SDValue();
3495 bool IsRHSZero = RHSValue == 0;
3496 bool IsRHSOne = RHSValue == 1;
3497 bool IsRHSNegOne = RHSValue == -1LL;
3498
3499 switch (CC) {
3500 default: return SDValue();
3501 case ISD::SETEQ: {
3502 // (sext (setcc %a, %b, seteq)) ->
3503 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3504 // (sext (setcc %a, 0, seteq)) ->
3505 // (ashr (shl (ctlz %a), 58), 63)
3506 SDValue CountInput = IsRHSZero ? LHS :
3507 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3508 SDValue Cntlzw =
3509 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3510 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3511 S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3512 SDValue Slwi =
3513 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3514 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3515 }
3516 case ISD::SETNE: {
3517 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3518 // flip the bit, finally take 2's complement.
3519 // (sext (setcc %a, %b, setne)) ->
3520 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3521 // Same as above, but the first xor is not needed.
3522 // (sext (setcc %a, 0, setne)) ->
3523 // (neg (xor (lshr (ctlz %a), 5), 1))
3524 SDValue Xor = IsRHSZero ? LHS :
3525 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3526 SDValue Clz =
3527 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3528 SDValue ShiftOps[] =
3529 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3530 SDValue Shift =
3531 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3532 SDValue Xori =
3533 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3534 S->getI32Imm(1, dl)), 0);
3535 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3536 }
3537 case ISD::SETGE: {
3538 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3539 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3540 if (IsRHSZero)
3541 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3542
3543 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3544 // by swapping inputs and falling through.
3545 std::swap(LHS, RHS);
3546 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3547 IsRHSZero = RHSConst && RHSConst->isZero();
3548 [[fallthrough]];
3549 }
3550 case ISD::SETLE: {
3551 if (CmpInGPR == ICGPR_NonExtIn)
3552 return SDValue();
3553 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3554 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3555 if (IsRHSZero)
3556 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3557
3558 // The upper 32-bits of the register can't be undefined for this sequence.
3559 LHS = signExtendInputIfNeeded(LHS);
3560 RHS = signExtendInputIfNeeded(RHS);
3561 SDValue SUBFNode =
3562 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3563 LHS, RHS), 0);
3564 SDValue Srdi =
3565 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3566 SUBFNode, S->getI64Imm(1, dl),
3567 S->getI64Imm(63, dl)), 0);
3568 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3569 S->getI32Imm(-1, dl)), 0);
3570 }
3571 case ISD::SETGT: {
3572 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3573 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3574 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3575 if (IsRHSNegOne)
3576 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3577 if (IsRHSZero) {
3578 if (CmpInGPR == ICGPR_NonExtIn)
3579 return SDValue();
3580 // The upper 32-bits of the register can't be undefined for this sequence.
3581 LHS = signExtendInputIfNeeded(LHS);
3582 RHS = signExtendInputIfNeeded(RHS);
3583 SDValue Neg =
3584 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3585 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3586 S->getI64Imm(63, dl)), 0);
3587 }
3588 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3589 // (%b < %a) by swapping inputs and falling through.
3590 std::swap(LHS, RHS);
3591 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3592 IsRHSZero = RHSConst && RHSConst->isZero();
3593 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3594 [[fallthrough]];
3595 }
3596 case ISD::SETLT: {
3597 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3598 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3599 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3600 if (IsRHSOne) {
3601 if (CmpInGPR == ICGPR_NonExtIn)
3602 return SDValue();
3603 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3604 }
3605 if (IsRHSZero)
3606 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3607 S->getI32Imm(31, dl)), 0);
3608
3609 if (CmpInGPR == ICGPR_NonExtIn)
3610 return SDValue();
3611 // The upper 32-bits of the register can't be undefined for this sequence.
3612 LHS = signExtendInputIfNeeded(LHS);
3613 RHS = signExtendInputIfNeeded(RHS);
3614 SDValue SUBFNode =
3615 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3616 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3617 SUBFNode, S->getI64Imm(63, dl)), 0);
3618 }
3619 case ISD::SETUGE:
3620 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3621 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3622 std::swap(LHS, RHS);
3623 [[fallthrough]];
3624 case ISD::SETULE: {
3625 if (CmpInGPR == ICGPR_NonExtIn)
3626 return SDValue();
3627 // The upper 32-bits of the register can't be undefined for this sequence.
3628 LHS = zeroExtendInputIfNeeded(LHS);
3629 RHS = zeroExtendInputIfNeeded(RHS);
3631 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3632 SDValue Shift =
3633 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3634 S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3635 0);
3636 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3637 S->getI32Imm(-1, dl)), 0);
3638 }
3639 case ISD::SETUGT:
3640 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3641 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3642 std::swap(LHS, RHS);
3643 [[fallthrough]];
3644 case ISD::SETULT: {
3645 if (CmpInGPR == ICGPR_NonExtIn)
3646 return SDValue();
3647 // The upper 32-bits of the register can't be undefined for this sequence.
3648 LHS = zeroExtendInputIfNeeded(LHS);
3649 RHS = zeroExtendInputIfNeeded(RHS);
3651 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3652 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3653 Subtract, S->getI64Imm(63, dl)), 0);
3654 }
3655 }
3656}
3657
3658/// Produces a zero-extended result of comparing two 64-bit values according to
3659/// the passed condition code.
3660SDValue
3661IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3662 ISD::CondCode CC,
3663 int64_t RHSValue, SDLoc dl) {
3666 return SDValue();
3667 bool IsRHSZero = RHSValue == 0;
3668 bool IsRHSOne = RHSValue == 1;
3669 bool IsRHSNegOne = RHSValue == -1LL;
3670 switch (CC) {
3671 default: return SDValue();
3672 case ISD::SETEQ: {
3673 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3674 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3675 SDValue Xor = IsRHSZero ? LHS :
3676 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3677 SDValue Clz =
3678 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3679 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3680 S->getI64Imm(58, dl),
3681 S->getI64Imm(63, dl)), 0);
3682 }
3683 case ISD::SETNE: {
3684 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3685 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3686 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3687 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3688 SDValue Xor = IsRHSZero ? LHS :
3689 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3690 SDValue AC =
3691 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3692 Xor, S->getI32Imm(~0U, dl)), 0);
3693 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3694 Xor, AC.getValue(1)), 0);
3695 }
3696 case ISD::SETGE: {
3697 // {subc.reg, subc.CA} = (subcarry %a, %b)
3698 // (zext (setcc %a, %b, setge)) ->
3699 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3700 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3701 if (IsRHSZero)
3702 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3703 std::swap(LHS, RHS);
3704 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3705 IsRHSZero = RHSConst && RHSConst->isZero();
3706 [[fallthrough]];
3707 }
3708 case ISD::SETLE: {
3709 // {subc.reg, subc.CA} = (subcarry %b, %a)
3710 // (zext (setcc %a, %b, setge)) ->
3711 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3712 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3713 if (IsRHSZero)
3714 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3715 SDValue ShiftL =
3716 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3717 S->getI64Imm(1, dl),
3718 S->getI64Imm(63, dl)), 0);
3719 SDValue ShiftR =
3720 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3721 S->getI64Imm(63, dl)), 0);
3722 SDValue SubtractCarry =
3723 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3724 LHS, RHS), 1);
3725 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3726 ShiftR, ShiftL, SubtractCarry), 0);
3727 }
3728 case ISD::SETGT: {
3729 // {subc.reg, subc.CA} = (subcarry %b, %a)
3730 // (zext (setcc %a, %b, setgt)) ->
3731 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3732 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3733 if (IsRHSNegOne)
3734 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3735 if (IsRHSZero) {
3736 SDValue Addi =
3737 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3738 S->getI64Imm(~0ULL, dl)), 0);
3739 SDValue Nor =
3740 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3741 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3742 S->getI64Imm(1, dl),
3743 S->getI64Imm(63, dl)), 0);
3744 }
3745 std::swap(LHS, RHS);
3746 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3747 IsRHSZero = RHSConst && RHSConst->isZero();
3748 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3749 [[fallthrough]];
3750 }
3751 case ISD::SETLT: {
3752 // {subc.reg, subc.CA} = (subcarry %a, %b)
3753 // (zext (setcc %a, %b, setlt)) ->
3754 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3755 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3756 if (IsRHSOne)
3757 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3758 if (IsRHSZero)
3759 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3760 S->getI64Imm(1, dl),
3761 S->getI64Imm(63, dl)), 0);
3762 SDValue SRADINode =
3763 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3764 LHS, S->getI64Imm(63, dl)), 0);
3765 SDValue SRDINode =
3766 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3767 RHS, S->getI64Imm(1, dl),
3768 S->getI64Imm(63, dl)), 0);
3769 SDValue SUBFC8Carry =
3770 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3771 RHS, LHS), 1);
3772 SDValue ADDE8Node =
3773 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3774 SRDINode, SRADINode, SUBFC8Carry), 0);
3775 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3776 ADDE8Node, S->getI64Imm(1, dl)), 0);
3777 }
3778 case ISD::SETUGE:
3779 // {subc.reg, subc.CA} = (subcarry %a, %b)
3780 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3781 std::swap(LHS, RHS);
3782 [[fallthrough]];
3783 case ISD::SETULE: {
3784 // {subc.reg, subc.CA} = (subcarry %b, %a)
3785 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3786 SDValue SUBFC8Carry =
3787 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3788 LHS, RHS), 1);
3789 SDValue SUBFE8Node =
3790 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3791 LHS, LHS, SUBFC8Carry), 0);
3792 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3793 SUBFE8Node, S->getI64Imm(1, dl)), 0);
3794 }
3795 case ISD::SETUGT:
3796 // {subc.reg, subc.CA} = (subcarry %b, %a)
3797 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3798 std::swap(LHS, RHS);
3799 [[fallthrough]];
3800 case ISD::SETULT: {
3801 // {subc.reg, subc.CA} = (subcarry %a, %b)
3802 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3803 SDValue SubtractCarry =
3804 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3805 RHS, LHS), 1);
3806 SDValue ExtSub =
3807 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3808 LHS, LHS, SubtractCarry), 0);
3809 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3810 ExtSub), 0);
3811 }
3812 }
3813}
3814
3815/// Produces a sign-extended result of comparing two 64-bit values according to
3816/// the passed condition code.
3817SDValue
3818IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3819 ISD::CondCode CC,
3820 int64_t RHSValue, SDLoc dl) {
3823 return SDValue();
3824 bool IsRHSZero = RHSValue == 0;
3825 bool IsRHSOne = RHSValue == 1;
3826 bool IsRHSNegOne = RHSValue == -1LL;
3827 switch (CC) {
3828 default: return SDValue();
3829 case ISD::SETEQ: {
3830 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3831 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3832 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3833 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3834 SDValue AddInput = IsRHSZero ? LHS :
3835 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3836 SDValue Addic =
3837 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3838 AddInput, S->getI32Imm(~0U, dl)), 0);
3839 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3840 Addic, Addic.getValue(1)), 0);
3841 }
3842 case ISD::SETNE: {
3843 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3844 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3845 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3846 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3847 SDValue Xor = IsRHSZero ? LHS :
3848 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3849 SDValue SC =
3850 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3851 Xor, S->getI32Imm(0, dl)), 0);
3852 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3853 SC, SC.getValue(1)), 0);
3854 }
3855 case ISD::SETGE: {
3856 // {subc.reg, subc.CA} = (subcarry %a, %b)
3857 // (zext (setcc %a, %b, setge)) ->
3858 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3859 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3860 if (IsRHSZero)
3861 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3862 std::swap(LHS, RHS);
3863 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3864 IsRHSZero = RHSConst && RHSConst->isZero();
3865 [[fallthrough]];
3866 }
3867 case ISD::SETLE: {
3868 // {subc.reg, subc.CA} = (subcarry %b, %a)
3869 // (zext (setcc %a, %b, setge)) ->
3870 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3871 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3872 if (IsRHSZero)
3873 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3874 SDValue ShiftR =
3875 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3876 S->getI64Imm(63, dl)), 0);
3877 SDValue ShiftL =
3878 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3879 S->getI64Imm(1, dl),
3880 S->getI64Imm(63, dl)), 0);
3881 SDValue SubtractCarry =
3882 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3883 LHS, RHS), 1);
3884 SDValue Adde =
3885 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3886 ShiftR, ShiftL, SubtractCarry), 0);
3887 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3888 }
3889 case ISD::SETGT: {
3890 // {subc.reg, subc.CA} = (subcarry %b, %a)
3891 // (zext (setcc %a, %b, setgt)) ->
3892 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3893 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3894 if (IsRHSNegOne)
3895 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3896 if (IsRHSZero) {
3897 SDValue Add =
3898 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3899 S->getI64Imm(-1, dl)), 0);
3900 SDValue Nor =
3901 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3902 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3903 S->getI64Imm(63, dl)), 0);
3904 }
3905 std::swap(LHS, RHS);
3906 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3907 IsRHSZero = RHSConst && RHSConst->isZero();
3908 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3909 [[fallthrough]];
3910 }
3911 case ISD::SETLT: {
3912 // {subc.reg, subc.CA} = (subcarry %a, %b)
3913 // (zext (setcc %a, %b, setlt)) ->
3914 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3915 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3916 if (IsRHSOne)
3917 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3918 if (IsRHSZero) {
3919 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3920 S->getI64Imm(63, dl)), 0);
3921 }
3922 SDValue SRADINode =
3923 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3924 LHS, S->getI64Imm(63, dl)), 0);
3925 SDValue SRDINode =
3926 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3927 RHS, S->getI64Imm(1, dl),
3928 S->getI64Imm(63, dl)), 0);
3929 SDValue SUBFC8Carry =
3930 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3931 RHS, LHS), 1);
3932 SDValue ADDE8Node =
3933 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3934 SRDINode, SRADINode, SUBFC8Carry), 0);
3935 SDValue XORI8Node =
3936 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3937 ADDE8Node, S->getI64Imm(1, dl)), 0);
3938 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3939 XORI8Node), 0);
3940 }
3941 case ISD::SETUGE:
3942 // {subc.reg, subc.CA} = (subcarry %a, %b)
3943 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3944 std::swap(LHS, RHS);
3945 [[fallthrough]];
3946 case ISD::SETULE: {
3947 // {subc.reg, subc.CA} = (subcarry %b, %a)
3948 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3949 SDValue SubtractCarry =
3950 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3951 LHS, RHS), 1);
3952 SDValue ExtSub =
3953 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3954 LHS, SubtractCarry), 0);
3955 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3956 ExtSub, ExtSub), 0);
3957 }
3958 case ISD::SETUGT:
3959 // {subc.reg, subc.CA} = (subcarry %b, %a)
3960 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3961 std::swap(LHS, RHS);
3962 [[fallthrough]];
3963 case ISD::SETULT: {
3964 // {subc.reg, subc.CA} = (subcarry %a, %b)
3965 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3966 SDValue SubCarry =
3967 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3968 RHS, LHS), 1);
3969 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3970 LHS, LHS, SubCarry), 0);
3971 }
3972 }
3973}
3974
3975/// Do all uses of this SDValue need the result in a GPR?
3976/// This is meant to be used on values that have type i1 since
3977/// it is somewhat meaningless to ask if values of other types
3978/// should be kept in GPR's.
3979static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3980 assert(Compare.getOpcode() == ISD::SETCC &&
3981 "An ISD::SETCC node required here.");
3982
3983 // For values that have a single use, the caller should obviously already have
3984 // checked if that use is an extending use. We check the other uses here.
3985 if (Compare.hasOneUse())
3986 return true;
3987 // We want the value in a GPR if it is being extended, used for a select, or
3988 // used in logical operations.
3989 for (auto *CompareUse : Compare.getNode()->users())
3990 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3991 CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3992 CompareUse->getOpcode() != ISD::SELECT &&
3993 !ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {
3994 OmittedForNonExtendUses++;
3995 return false;
3996 }
3997 return true;
3998}
3999
4000/// Returns an equivalent of a SETCC node but with the result the same width as
4001/// the inputs. This can also be used for SELECT_CC if either the true or false
4002/// values is a power of two while the other is zero.
4003SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
4004 SetccInGPROpts ConvOpts) {
4005 assert((Compare.getOpcode() == ISD::SETCC ||
4006 Compare.getOpcode() == ISD::SELECT_CC) &&
4007 "An ISD::SETCC node required here.");
4008
4009 // Don't convert this comparison to a GPR sequence because there are uses
4010 // of the i1 result (i.e. uses that require the result in the CR).
4011 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4012 return SDValue();
4013
4014 SDValue LHS = Compare.getOperand(0);
4015 SDValue RHS = Compare.getOperand(1);
4016
4017 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4018 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
4019 ISD::CondCode CC =
4020 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
4021 EVT InputVT = LHS.getValueType();
4022 if (InputVT != MVT::i32 && InputVT != MVT::i64)
4023 return SDValue();
4024
4025 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4026 ConvOpts == SetccInGPROpts::SExtInvert)
4027 CC = ISD::getSetCCInverse(CC, InputVT);
4028
4029 bool Inputs32Bit = InputVT == MVT::i32;
4030
4031 SDLoc dl(Compare);
4032 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4033 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4034 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4035 ConvOpts == SetccInGPROpts::SExtInvert;
4036
4037 if (IsSext && Inputs32Bit)
4038 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4039 else if (Inputs32Bit)
4040 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4041 else if (IsSext)
4042 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4043 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4044}
4045
4046} // end anonymous namespace
4047
4048bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4049 if (N->getValueType(0) != MVT::i32 &&
4050 N->getValueType(0) != MVT::i64)
4051 return false;
4052
4053 // This optimization will emit code that assumes 64-bit registers
4054 // so we don't want to run it in 32-bit mode. Also don't run it
4055 // on functions that are not to be optimized.
4056 if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
4057 return false;
4058
4059 // For POWER10, it is more profitable to use the set boolean extension
4060 // instructions rather than the integer compare elimination codegen.
4061 // Users can override this via the command line option, `--ppc-gpr-icmps`.
4062 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4063 return false;
4064
4065 switch (N->getOpcode()) {
4066 default: break;
4067 case ISD::ZERO_EXTEND:
4068 case ISD::SIGN_EXTEND:
4069 case ISD::AND:
4070 case ISD::OR:
4071 case ISD::XOR: {
4072 IntegerCompareEliminator ICmpElim(CurDAG, this);
4073 if (SDNode *New = ICmpElim.Select(N)) {
4074 ReplaceNode(N, New);
4075 return true;
4076 }
4077 }
4078 }
4079 return false;
4080}
4081
4082bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4083 if (N->getValueType(0) != MVT::i32 &&
4084 N->getValueType(0) != MVT::i64)
4085 return false;
4086
4087 if (!UseBitPermRewriter)
4088 return false;
4089
4090 switch (N->getOpcode()) {
4091 default: break;
4092 case ISD::SRL:
4093 // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4094 // uses the BRH instruction.
4095 if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
4096 N->getOperand(0).getOpcode() == ISD::BSWAP) {
4097 auto &OpRight = N->getOperand(1);
4098 ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
4099 if (SRLConst && SRLConst->getSExtValue() == 16)
4100 return false;
4101 }
4102 [[fallthrough]];
4103 case ISD::ROTL:
4104 case ISD::SHL:
4105 case ISD::AND:
4106 case ISD::OR: {
4107 BitPermutationSelector BPS(CurDAG);
4108 if (SDNode *New = BPS.Select(N)) {
4109 ReplaceNode(N, New);
4110 return true;
4111 }
4112 return false;
4113 }
4114 }
4115
4116 return false;
4117}
4118
4119/// SelectCC - Select a comparison of the specified values with the specified
4120/// condition code, returning the CR# of the expression.
4121SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4122 const SDLoc &dl, SDValue Chain) {
4123 // Always select the LHS.
4124 unsigned Opc;
4125
4126 if (LHS.getValueType() == MVT::i32) {
4127 unsigned Imm;
4128 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4129 if (isInt32Immediate(RHS, Imm)) {
4130 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4131 if (isUInt<16>(Imm))
4132 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4133 getI32Imm(Imm & 0xFFFF, dl)),
4134 0);
4135 // If this is a 16-bit signed immediate, fold it.
4136 if (isInt<16>((int)Imm))
4137 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4138 getI32Imm(Imm & 0xFFFF, dl)),
4139 0);
4140
4141 // For non-equality comparisons, the default code would materialize the
4142 // constant, then compare against it, like this:
4143 // lis r2, 4660
4144 // ori r2, r2, 22136
4145 // cmpw cr0, r3, r2
4146 // Since we are just comparing for equality, we can emit this instead:
4147 // xoris r0,r3,0x1234
4148 // cmplwi cr0,r0,0x5678
4149 // beq cr0,L6
4150 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
4151 getI32Imm(Imm >> 16, dl)), 0);
4152 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
4153 getI32Imm(Imm & 0xFFFF, dl)), 0);
4154 }
4155 Opc = PPC::CMPLW;
4156 } else if (ISD::isUnsignedIntSetCC(CC)) {
4157 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
4158 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4159 getI32Imm(Imm & 0xFFFF, dl)), 0);
4160 Opc = PPC::CMPLW;
4161 } else {
4162 int16_t SImm;
4163 if (isIntS16Immediate(RHS, SImm))
4164 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4165 getI32Imm((int)SImm & 0xFFFF,
4166 dl)),
4167 0);
4168 Opc = PPC::CMPW;
4169 }
4170 } else if (LHS.getValueType() == MVT::i64) {
4171 uint64_t Imm;
4172 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4173 if (isInt64Immediate(RHS.getNode(), Imm)) {
4174 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4175 if (isUInt<16>(Imm))
4176 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4177 getI32Imm(Imm & 0xFFFF, dl)),
4178 0);
4179 // If this is a 16-bit signed immediate, fold it.
4180 if (isInt<16>(Imm))
4181 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4182 getI32Imm(Imm & 0xFFFF, dl)),
4183 0);
4184
4185 // For non-equality comparisons, the default code would materialize the
4186 // constant, then compare against it, like this:
4187 // lis r2, 4660
4188 // ori r2, r2, 22136
4189 // cmpd cr0, r3, r2
4190 // Since we are just comparing for equality, we can emit this instead:
4191 // xoris r0,r3,0x1234
4192 // cmpldi cr0,r0,0x5678
4193 // beq cr0,L6
4194 if (isUInt<32>(Imm)) {
4195 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4196 getI64Imm(Imm >> 16, dl)), 0);
4197 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4198 getI64Imm(Imm & 0xFFFF, dl)),
4199 0);
4200 }
4201 }
4202 Opc = PPC::CMPLD;
4203 } else if (ISD::isUnsignedIntSetCC(CC)) {
4204 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4205 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4206 getI64Imm(Imm & 0xFFFF, dl)), 0);
4207 Opc = PPC::CMPLD;
4208 } else {
4209 int16_t SImm;
4210 if (isIntS16Immediate(RHS, SImm))
4211 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4212 getI64Imm(SImm & 0xFFFF, dl)),
4213 0);
4214 Opc = PPC::CMPD;
4215 }
4216 } else if (LHS.getValueType() == MVT::f32) {
4217 if (Subtarget->hasSPE()) {
4218 switch (CC) {
4219 default:
4220 case ISD::SETEQ:
4221 case ISD::SETNE:
4222 Opc = PPC::EFSCMPEQ;
4223 break;
4224 case ISD::SETLT:
4225 case ISD::SETGE:
4226 case ISD::SETOLT:
4227 case ISD::SETOGE:
4228 case ISD::SETULT:
4229 case ISD::SETUGE:
4230 Opc = PPC::EFSCMPLT;
4231 break;
4232 case ISD::SETGT:
4233 case ISD::SETLE:
4234 case ISD::SETOGT:
4235 case ISD::SETOLE:
4236 case ISD::SETUGT:
4237 case ISD::SETULE:
4238 Opc = PPC::EFSCMPGT;
4239 break;
4240 }
4241 } else
4242 Opc = PPC::FCMPUS;
4243 } else if (LHS.getValueType() == MVT::f64) {
4244 if (Subtarget->hasSPE()) {
4245 switch (CC) {
4246 default:
4247 case ISD::SETEQ:
4248 case ISD::SETNE:
4249 Opc = PPC::EFDCMPEQ;
4250 break;
4251 case ISD::SETLT:
4252 case ISD::SETGE:
4253 case ISD::SETOLT:
4254 case ISD::SETOGE:
4255 case ISD::SETULT:
4256 case ISD::SETUGE:
4257 Opc = PPC::EFDCMPLT;
4258 break;
4259 case ISD::SETGT:
4260 case ISD::SETLE:
4261 case ISD::SETOGT:
4262 case ISD::SETOLE:
4263 case ISD::SETUGT:
4264 case ISD::SETULE:
4265 Opc = PPC::EFDCMPGT;
4266 break;
4267 }
4268 } else
4269 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4270 } else {
4271 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4272 assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4273 Opc = PPC::XSCMPUQP;
4274 }
4275 if (Chain)
4276 return SDValue(
4277 CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4278 0);
4279 else
4280 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4281}
4282
4284 const PPCSubtarget *Subtarget) {
4285 // For SPE instructions, the result is in GT bit of the CR
4286 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4287
4288 switch (CC) {
4289 case ISD::SETUEQ:
4290 case ISD::SETONE:
4291 case ISD::SETOLE:
4292 case ISD::SETOGE:
4293 llvm_unreachable("Should be lowered by legalize!");
4294 default: llvm_unreachable("Unknown condition!");
4295 case ISD::SETOEQ:
4296 case ISD::SETEQ:
4297 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4298 case ISD::SETUNE:
4299 case ISD::SETNE:
4300 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4301 case ISD::SETOLT:
4302 case ISD::SETLT:
4303 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4304 case ISD::SETULE:
4305 case ISD::SETLE:
4306 return PPC::PRED_LE;
4307 case ISD::SETOGT:
4308 case ISD::SETGT:
4309 return PPC::PRED_GT;
4310 case ISD::SETUGE:
4311 case ISD::SETGE:
4312 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4313 case ISD::SETO: return PPC::PRED_NU;
4314 case ISD::SETUO: return PPC::PRED_UN;
4315 // These two are invalid for floating point. Assume we have int.
4316 case ISD::SETULT: return PPC::PRED_LT;
4317 case ISD::SETUGT: return PPC::PRED_GT;
4318 }
4319}
4320
4321/// getCRIdxForSetCC - Return the index of the condition register field
4322/// associated with the SetCC condition, and whether or not the field is
4323/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4324static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4325 Invert = false;
4326 switch (CC) {
4327 default: llvm_unreachable("Unknown condition!");
4328 case ISD::SETOLT:
4329 case ISD::SETLT: return 0; // Bit #0 = SETOLT
4330 case ISD::SETOGT:
4331 case ISD::SETGT: return 1; // Bit #1 = SETOGT
4332 case ISD::SETOEQ:
4333 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4334 case ISD::SETUO: return 3; // Bit #3 = SETUO
4335 case ISD::SETUGE:
4336 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4337 case ISD::SETULE:
4338 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4339 case ISD::SETUNE:
4340 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4341 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4342 case ISD::SETUEQ:
4343 case ISD::SETOGE:
4344 case ISD::SETOLE:
4345 case ISD::SETONE:
4346 llvm_unreachable("Invalid branch code: should be expanded by legalize");
4347 // These are invalid for floating point. Assume integer.
4348 case ISD::SETULT: return 0;
4349 case ISD::SETUGT: return 1;
4350 }
4351}
4352
4353// getVCmpInst: return the vector compare instruction for the specified
4354// vector type and condition code. Since this is for altivec specific code,
4355// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4356// and v4f32).
4357static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4358 bool HasVSX, bool &Swap, bool &Negate) {
4359 Swap = false;
4360 Negate = false;
4361
4362 if (VecVT.isFloatingPoint()) {
4363 /* Handle some cases by swapping input operands. */
4364 switch (CC) {
4365 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4366 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4367 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4368 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4369 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4370 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4371 default: break;
4372 }
4373 /* Handle some cases by negating the result. */
4374 switch (CC) {
4375 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4376 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4377 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4378 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4379 default: break;
4380 }
4381 /* We have instructions implementing the remaining cases. */
4382 switch (CC) {
4383 case ISD::SETEQ:
4384 case ISD::SETOEQ:
4385 if (VecVT == MVT::v4f32)
4386 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4387 else if (VecVT == MVT::v2f64)
4388 return PPC::XVCMPEQDP;
4389 break;
4390 case ISD::SETGT:
4391 case ISD::SETOGT:
4392 if (VecVT == MVT::v4f32)
4393 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4394 else if (VecVT == MVT::v2f64)
4395 return PPC::XVCMPGTDP;
4396 break;
4397 case ISD::SETGE:
4398 case ISD::SETOGE:
4399 if (VecVT == MVT::v4f32)
4400 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4401 else if (VecVT == MVT::v2f64)
4402 return PPC::XVCMPGEDP;
4403 break;
4404 default:
4405 break;
4406 }
4407 llvm_unreachable("Invalid floating-point vector compare condition");
4408 } else {
4409 /* Handle some cases by swapping input operands. */
4410 switch (CC) {
4411 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4412 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4413 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4414 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4415 default: break;
4416 }
4417 /* Handle some cases by negating the result. */
4418 switch (CC) {
4419 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4420 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4421 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4422 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4423 default: break;
4424 }
4425 /* We have instructions implementing the remaining cases. */
4426 switch (CC) {
4427 case ISD::SETEQ:
4428 case ISD::SETUEQ:
4429 if (VecVT == MVT::v16i8)
4430 return PPC::VCMPEQUB;
4431 else if (VecVT == MVT::v8i16)
4432 return PPC::VCMPEQUH;
4433 else if (VecVT == MVT::v4i32)
4434 return PPC::VCMPEQUW;
4435 else if (VecVT == MVT::v2i64)
4436 return PPC::VCMPEQUD;
4437 else if (VecVT == MVT::v1i128)
4438 return PPC::VCMPEQUQ;
4439 break;
4440 case ISD::SETGT:
4441 if (VecVT == MVT::v16i8)
4442 return PPC::VCMPGTSB;
4443 else if (VecVT == MVT::v8i16)
4444 return PPC::VCMPGTSH;
4445 else if (VecVT == MVT::v4i32)
4446 return PPC::VCMPGTSW;
4447 else if (VecVT == MVT::v2i64)
4448 return PPC::VCMPGTSD;
4449 else if (VecVT == MVT::v1i128)
4450 return PPC::VCMPGTSQ;
4451 break;
4452 case ISD::SETUGT:
4453 if (VecVT == MVT::v16i8)
4454 return PPC::VCMPGTUB;
4455 else if (VecVT == MVT::v8i16)
4456 return PPC::VCMPGTUH;
4457 else if (VecVT == MVT::v4i32)
4458 return PPC::VCMPGTUW;
4459 else if (VecVT == MVT::v2i64)
4460 return PPC::VCMPGTUD;
4461 else if (VecVT == MVT::v1i128)
4462 return PPC::VCMPGTUQ;
4463 break;
4464 default:
4465 break;
4466 }
4467 llvm_unreachable("Invalid integer vector compare condition");
4468 }
4469}
4470
4471bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4472 SDLoc dl(N);
4473 unsigned Imm;
4474 bool IsStrict = N->isStrictFPOpcode();
4475 ISD::CondCode CC =
4476 cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4477 EVT PtrVT =
4479 bool isPPC64 = (PtrVT == MVT::i64);
4480 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4481
4482 SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4483 SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4484
4485 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4486 // We can codegen setcc op, imm very efficiently compared to a brcond.
4487 // Check for those cases here.
4488 // setcc op, 0
4489 if (Imm == 0) {
4490 SDValue Op = LHS;
4491 switch (CC) {
4492 default: break;
4493 case ISD::SETEQ: {
4494 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4495 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4496 getI32Imm(31, dl) };
4497 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4498 return true;
4499 }
4500 case ISD::SETNE: {
4501 if (isPPC64) break;
4502 SDValue AD =
4503 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4504 Op, getI32Imm(~0U, dl)), 0);
4505 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4506 return true;
4507 }
4508 case ISD::SETLT: {
4509 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4510 getI32Imm(31, dl) };
4511 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4512 return true;
4513 }
4514 case ISD::SETGT: {
4515 SDValue T =
4516 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4517 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4518 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4519 getI32Imm(31, dl) };
4520 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4521 return true;
4522 }
4523 }
4524 } else if (Imm == ~0U) { // setcc op, -1
4525 SDValue Op = LHS;
4526 switch (CC) {
4527 default: break;
4528 case ISD::SETEQ:
4529 if (isPPC64) break;
4530 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4531 Op, getI32Imm(1, dl)), 0);
4532 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4533 SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4534 MVT::i32,
4535 getI32Imm(0, dl)),
4536 0), Op.getValue(1));
4537 return true;
4538 case ISD::SETNE: {
4539 if (isPPC64) break;
4540 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4541 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4542 Op, getI32Imm(~0U, dl));
4543 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4544 SDValue(AD, 1));
4545 return true;
4546 }
4547 case ISD::SETLT: {
4548 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4549 getI32Imm(1, dl)), 0);
4550 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4551 Op), 0);
4552 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4553 getI32Imm(31, dl) };
4554 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4555 return true;
4556 }
4557 case ISD::SETGT: {
4558 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4559 getI32Imm(31, dl) };
4560 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4561 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4562 return true;
4563 }
4564 }
4565 }
4566 }
4567
4568 // Altivec Vector compare instructions do not set any CR register by default and
4569 // vector compare operations return the same type as the operands.
4570 if (!IsStrict && LHS.getValueType().isVector()) {
4571 if (Subtarget->hasSPE())
4572 return false;
4573
4574 EVT VecVT = LHS.getValueType();
4575 // Optimize 'Not equal to zero-vector' comparisons to 'Greater than or
4576 // less than' operators.
4577 // Example: Consider k to be any non-zero positive value.
4578 // * for k != 0, change SETNE to SETUGT (k > 0)
4579 // * for 0 != k, change SETNE to SETULT (0 < k)
4580 if (CC == ISD::SETNE) {
4581 // Only optimize for integer types (avoid FP completely)
4582 if (VecVT.getVectorElementType().isInteger()) {
4583 if (ISD::isBuildVectorAllZeros(RHS.getNode()))
4584 CC = ISD::SETUGT;
4585 else if (ISD::isBuildVectorAllZeros(LHS.getNode()))
4586 CC = ISD::SETULT;
4587 }
4588 }
4589 bool Swap, Negate;
4590 unsigned int VCmpInst =
4591 getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4592 if (Swap)
4593 std::swap(LHS, RHS);
4594
4595 EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4596 if (Negate) {
4597 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4598 CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4599 ResVT, VCmp, VCmp);
4600 return true;
4601 }
4602
4603 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4604 return true;
4605 }
4606
4607 if (Subtarget->useCRBits())
4608 return false;
4609
4610 bool Inv;
4611 unsigned Idx = getCRIdxForSetCC(CC, Inv);
4612 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4613 if (IsStrict)
4614 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4615 SDValue IntCR;
4616
4617 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4618 // The correct compare instruction is already set by SelectCC()
4619 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4620 Idx = 1;
4621 }
4622
4623 // Force the ccreg into CR7.
4624 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4625
4626 SDValue InGlue; // Null incoming flag value.
4627 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4628 InGlue).getValue(1);
4629
4630 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4631 CCReg), 0);
4632
4633 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4634 getI32Imm(31, dl), getI32Imm(31, dl) };
4635 if (!Inv) {
4636 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4637 return true;
4638 }
4639
4640 // Get the specified bit.
4641 SDValue Tmp =
4642 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4643 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4644 return true;
4645}
4646
4647/// Does this node represent a load/store node whose address can be represented
4648/// with a register plus an immediate that's a multiple of \p Val:
4649bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4650 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4651 StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4652 MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4653 SDValue AddrOp;
4654 if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4655 AddrOp = N->getOperand(1);
4656 else if (STN)
4657 AddrOp = STN->getOperand(2);
4658
4659 // If the address points a frame object or a frame object with an offset,
4660 // we need to check the object alignment.
4661 short Imm = 0;
4662 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4663 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4664 AddrOp)) {
4665 // If op0 is a frame index that is under aligned, we can't do it either,
4666 // because it is translated to r31 or r1 + slot + offset. We won't know the
4667 // slot number until the stack frame is finalized.
4668 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4669 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4670 if ((SlotAlign % Val) != 0)
4671 return false;
4672
4673 // If we have an offset, we need further check on the offset.
4674 if (AddrOp.getOpcode() != ISD::ADD)
4675 return true;
4676 }
4677
4678 if (AddrOp.getOpcode() == ISD::ADD)
4679 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4680
4681 // If the address comes from the outside, the offset will be zero.
4682 return AddrOp.getOpcode() == ISD::CopyFromReg;
4683}
4684
4685void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4686 // Transfer memoperands.
4687 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4688 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4689}
4690
4691static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4692 bool &NeedSwapOps, bool &IsUnCmp) {
4693
4694 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4695
4696 SDValue LHS = N->getOperand(0);
4697 SDValue RHS = N->getOperand(1);
4698 SDValue TrueRes = N->getOperand(2);
4699 SDValue FalseRes = N->getOperand(3);
4700 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4701 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4702 N->getSimpleValueType(0) != MVT::i32))
4703 return false;
4704
4705 // We are looking for any of:
4706 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4707 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4708 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4709 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4710 int64_t TrueResVal = TrueConst->getSExtValue();
4711 if ((TrueResVal < -1 || TrueResVal > 1) ||
4712 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4713 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4714 (TrueResVal == 0 &&
4715 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4716 return false;
4717
4718 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4719 ? FalseRes
4720 : FalseRes.getOperand(0);
4721 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4722 if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4723 SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4724 return false;
4725
4726 // Without this setb optimization, the outer SELECT_CC will be manually
4727 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4728 // transforms pseudo instruction to isel instruction. When there are more than
4729 // one use for result like zext/sext, with current optimization we only see
4730 // isel is replaced by setb but can't see any significant gain. Since
4731 // setb has longer latency than original isel, we should avoid this. Another
4732 // point is that setb requires comparison always kept, it can break the
4733 // opportunity to get the comparison away if we have in future.
4734 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4735 return false;
4736
4737 SDValue InnerLHS = SetOrSelCC.getOperand(0);
4738 SDValue InnerRHS = SetOrSelCC.getOperand(1);
4739 ISD::CondCode InnerCC =
4740 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4741 // If the inner comparison is a select_cc, make sure the true/false values are
4742 // 1/-1 and canonicalize it if needed.
4743 if (InnerIsSel) {
4744 ConstantSDNode *SelCCTrueConst =
4745 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4746 ConstantSDNode *SelCCFalseConst =
4747 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4748 if (!SelCCTrueConst || !SelCCFalseConst)
4749 return false;
4750 int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4751 int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4752 // The values must be -1/1 (requiring a swap) or 1/-1.
4753 if (SelCCTVal == -1 && SelCCFVal == 1) {
4754 std::swap(InnerLHS, InnerRHS);
4755 } else if (SelCCTVal != 1 || SelCCFVal != -1)
4756 return false;
4757 }
4758
4759 // Canonicalize unsigned case
4760 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4761 IsUnCmp = true;
4762 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4763 }
4764
4765 bool InnerSwapped = false;
4766 if (LHS == InnerRHS && RHS == InnerLHS)
4767 InnerSwapped = true;
4768 else if (LHS != InnerLHS || RHS != InnerRHS)
4769 return false;
4770
4771 switch (CC) {
4772 // (select_cc lhs, rhs, 0, \
4773 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4774 case ISD::SETEQ:
4775 if (!InnerIsSel)
4776 return false;
4777 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4778 return false;
4779 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4780 break;
4781
4782 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4783 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4784 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4785 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4786 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4787 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4788 case ISD::SETULT:
4789 if (!IsUnCmp && InnerCC != ISD::SETNE)
4790 return false;
4791 IsUnCmp = true;
4792 [[fallthrough]];
4793 case ISD::SETLT:
4794 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4795 (InnerCC == ISD::SETLT && InnerSwapped))
4796 NeedSwapOps = (TrueResVal == 1);
4797 else
4798 return false;
4799 break;
4800
4801 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4802 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4803 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4804 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4805 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4806 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4807 case ISD::SETUGT:
4808 if (!IsUnCmp && InnerCC != ISD::SETNE)
4809 return false;
4810 IsUnCmp = true;
4811 [[fallthrough]];
4812 case ISD::SETGT:
4813 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4814 (InnerCC == ISD::SETGT && InnerSwapped))
4815 NeedSwapOps = (TrueResVal == -1);
4816 else
4817 return false;
4818 break;
4819
4820 default:
4821 return false;
4822 }
4823
4824 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4825 LLVM_DEBUG(N->dump());
4826
4827 return true;
4828}
4829
4830// Return true if it's a software square-root/divide operand.
4831static bool isSWTestOp(SDValue N) {
4832 if (N.getOpcode() == PPCISD::FTSQRT)
4833 return true;
4834 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
4835 N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4836 return false;
4837 switch (N.getConstantOperandVal(0)) {
4838 case Intrinsic::ppc_vsx_xvtdivdp:
4839 case Intrinsic::ppc_vsx_xvtdivsp:
4840 case Intrinsic::ppc_vsx_xvtsqrtdp:
4841 case Intrinsic::ppc_vsx_xvtsqrtsp:
4842 return true;
4843 }
4844 return false;
4845}
4846
4847bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4848 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4849 // We are looking for following patterns, where `truncate to i1` actually has
4850 // the same semantic with `and 1`.
4851 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4852 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4853 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4854 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4855 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4856 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4857 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4858 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4859 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4860 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4861 return false;
4862
4863 SDValue CmpRHS = N->getOperand(3);
4864 if (!isNullConstant(CmpRHS))
4865 return false;
4866
4867 SDValue CmpLHS = N->getOperand(2);
4868 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4869 return false;
4870
4871 unsigned PCC = 0;
4872 bool IsCCNE = CC == ISD::SETNE;
4873 if (CmpLHS.getOpcode() == ISD::AND &&
4875 switch (CmpLHS.getConstantOperandVal(1)) {
4876 case 1:
4877 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4878 break;
4879 case 2:
4880 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4881 break;
4882 case 4:
4883 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4884 break;
4885 case 8:
4886 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4887 break;
4888 default:
4889 return false;
4890 }
4891 else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4892 CmpLHS.getValueType() == MVT::i1)
4893 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4894
4895 if (PCC) {
4896 SDLoc dl(N);
4897 SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4898 N->getOperand(0)};
4899 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4900 return true;
4901 }
4902 return false;
4903}
4904
4905bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4906 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4907 // value, for example when crbits is disabled. If so, select the
4908 // loop_decrement intrinsics now.
4909 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4910 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
4911
4912 if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||
4913 isNullConstant(LHS.getOperand(1)))
4914 return false;
4915
4916 if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4917 LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement)
4918 return false;
4919
4921 return false;
4922
4923 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
4924 "Counter decrement comparison is not EQ or NE");
4925
4926 SDValue OldDecrement = LHS.getOperand(0);
4927 assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4928
4929 SDLoc DecrementLoc(OldDecrement);
4930 SDValue ChainInput = OldDecrement.getOperand(0);
4931 SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)
4932 : getI32Imm(1, DecrementLoc)};
4933 unsigned DecrementOpcode =
4934 Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4935 SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
4936 MVT::i1, DecrementOps);
4937
4938 unsigned Val = RHS->getAsZExtVal();
4939 bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
4940 unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4941
4942 ReplaceUses(LHS.getValue(0), LHS.getOperand(1));
4943 CurDAG->RemoveDeadNode(LHS.getNode());
4944
4945 // Mark the old loop_decrement intrinsic as dead.
4946 ReplaceUses(OldDecrement.getValue(1), ChainInput);
4947 CurDAG->RemoveDeadNode(OldDecrement.getNode());
4948
4949 SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
4950 ChainInput, N->getOperand(0));
4951
4952 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),
4953 N->getOperand(4), Chain);
4954 return true;
4955}
4956
4957bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4958 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4959 unsigned Imm;
4960 if (!isInt32Immediate(N->getOperand(1), Imm))
4961 return false;
4962
4963 SDLoc dl(N);
4964 SDValue Val = N->getOperand(0);
4965 unsigned SH, MB, ME;
4966 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4967 // with a mask, emit rlwinm
4968 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4969 Val = Val.getOperand(0);
4970 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4971 getI32Imm(ME, dl)};
4972 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4973 return true;
4974 }
4975
4976 // If this is just a masked value where the input is not handled, and
4977 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4978 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4979 // The result of LBARX/LHARX do not need to be cleared as the instructions
4980 // implicitly clear the upper bits.
4981 unsigned AlreadyCleared = 0;
4982 if (Val.getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4983 auto IntrinsicID = Val.getConstantOperandVal(1);
4984 if (IntrinsicID == Intrinsic::ppc_lbarx)
4985 AlreadyCleared = 24;
4986 else if (IntrinsicID == Intrinsic::ppc_lharx)
4987 AlreadyCleared = 16;
4988 if (AlreadyCleared != 0 && AlreadyCleared == MB && ME == 31) {
4989 ReplaceUses(SDValue(N, 0), N->getOperand(0));
4990 return true;
4991 }
4992 }
4993
4994 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4995 getI32Imm(ME, dl)};
4996 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4997 return true;
4998 }
4999
5000 // AND X, 0 -> 0, not "rlwinm 32".
5001 if (Imm == 0) {
5002 ReplaceUses(SDValue(N, 0), N->getOperand(1));
5003 return true;
5004 }
5005
5006 return false;
5007}
5008
5009bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
5010 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5011 uint64_t Imm64;
5012 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5013 return false;
5014
5015 unsigned MB, ME;
5016 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
5017 // MB ME
5018 // +----------------------+
5019 // |xxxxxxxxxxx00011111000|
5020 // +----------------------+
5021 // 0 32 64
5022 // We can only do it if the MB is larger than 32 and MB <= ME
5023 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
5024 // we didn't rotate it.
5025 SDLoc dl(N);
5026 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
5027 getI64Imm(ME - 32, dl)};
5028 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
5029 return true;
5030 }
5031
5032 return false;
5033}
5034
5035bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5036 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5037 uint64_t Imm64;
5038 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5039 return false;
5040
5041 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
5042 // it well with "andi.".
5043 if (isUInt<16>(Imm64))
5044 return false;
5045
5046 SDLoc Loc(N);
5047 SDValue Val = N->getOperand(0);
5048
5049 // Optimized with two rldicl's as follows:
5050 // Add missing bits on left to the mask and check that the mask is a
5051 // wrapped run of ones, i.e.
5052 // Change pattern |0001111100000011111111|
5053 // to |1111111100000011111111|.
5054 unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);
5055 if (NumOfLeadingZeros != 0)
5056 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
5057
5058 unsigned MB, ME;
5059 if (!isRunOfOnes64(Imm64, MB, ME))
5060 return false;
5061
5062 // ME MB MB-ME+63
5063 // +----------------------+ +----------------------+
5064 // |1111111100000011111111| -> |0000001111111111111111|
5065 // +----------------------+ +----------------------+
5066 // 0 63 0 63
5067 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5068 unsigned OnesOnLeft = ME + 1;
5069 unsigned ZerosInBetween = (MB - ME + 63) & 63;
5070 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5071 // on the left the bits that are already zeros in the mask.
5072 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
5073 getI64Imm(OnesOnLeft, Loc),
5074 getI64Imm(ZerosInBetween, Loc)),
5075 0);
5076 // MB-ME+63 ME MB
5077 // +----------------------+ +----------------------+
5078 // |0000001111111111111111| -> |0001111100000011111111|
5079 // +----------------------+ +----------------------+
5080 // 0 63 0 63
5081 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5082 // left the number of ones we previously added.
5083 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
5084 getI64Imm(NumOfLeadingZeros, Loc)};
5085 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5086 return true;
5087}
5088
5089bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5090 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5091 unsigned Imm;
5092 if (!isInt32Immediate(N->getOperand(1), Imm))
5093 return false;
5094
5095 SDValue Val = N->getOperand(0);
5096 unsigned Imm2;
5097 // ISD::OR doesn't get all the bitfield insertion fun.
5098 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5099 // bitfield insert.
5100 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
5101 return false;
5102
5103 // The idea here is to check whether this is equivalent to:
5104 // (c1 & m) | (x & ~m)
5105 // where m is a run-of-ones mask. The logic here is that, for each bit in
5106 // c1 and c2:
5107 // - if both are 1, then the output will be 1.
5108 // - if both are 0, then the output will be 0.
5109 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5110 // come from x.
5111 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5112 // be 0.
5113 // If that last condition is never the case, then we can form m from the
5114 // bits that are the same between c1 and c2.
5115 unsigned MB, ME;
5116 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5117 SDLoc dl(N);
5118 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
5119 getI32Imm(MB, dl), getI32Imm(ME, dl)};
5120 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
5121 return true;
5122 }
5123
5124 return false;
5125}
5126
5127bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5128 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5129
5130 uint64_t Imm64;
5131 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5132 return false;
5133
5134 SDValue Val = N->getOperand(0);
5135
5136 if (Val.getOpcode() != ISD::ROTL)
5137 return false;
5138
5139 // Looking to try to avoid a situation like this one:
5140 // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5141 // %and1 = and i64 %2, 9223372036854775807
5142 // In this function we are looking to try to match RLDCL. However, the above
5143 // DAG would better match RLDICL instead which is not what we are looking
5144 // for here.
5145 SDValue RotateAmt = Val.getOperand(1);
5146 if (RotateAmt.getOpcode() == ISD::Constant)
5147 return false;
5148
5149 unsigned MB = 64 - llvm::countr_one(Imm64);
5150 SDLoc dl(N);
5151 SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
5152 CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5153 return true;
5154}
5155
5156bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5157 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5158 uint64_t Imm64;
5159 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5160 return false;
5161
5162 // If this is a 64-bit zero-extension mask, emit rldicl.
5163 unsigned MB = 64 - llvm::countr_one(Imm64);
5164 unsigned SH = 0;
5165 unsigned Imm;
5166 SDValue Val = N->getOperand(0);
5167 SDLoc dl(N);
5168
5169 if (Val.getOpcode() == ISD::ANY_EXTEND) {
5170 auto Op0 = Val.getOperand(0);
5171 if (Op0.getOpcode() == ISD::SRL &&
5172 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
5173
5174 auto ResultType = Val.getNode()->getValueType(0);
5175 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
5176 SDValue IDVal(ImDef, 0);
5177
5178 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
5179 IDVal, Op0.getOperand(0),
5180 getI32Imm(1, dl)),
5181 0);
5182 SH = 64 - Imm;
5183 }
5184 }
5185
5186 // If the operand is a logical right shift, we can fold it into this
5187 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5188 // for n <= mb. The right shift is really a left rotate followed by a
5189 // mask, and this mask is a more-restrictive sub-mask of the mask implied
5190 // by the shift.
5191 if (Val.getOpcode() == ISD::SRL &&
5192 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
5193 assert(Imm < 64 && "Illegal shift amount");
5194 Val = Val.getOperand(0);
5195 SH = 64 - Imm;
5196 }
5197
5198 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
5199 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5200 return true;
5201}
5202
5203bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5204 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5205 uint64_t Imm64;
5206 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5207 !isMask_64(~Imm64))
5208 return false;
5209
5210 // If this is a negated 64-bit zero-extension mask,
5211 // i.e. the immediate is a sequence of ones from most significant side
5212 // and all zero for reminder, we should use rldicr.
5213 unsigned MB = 63 - llvm::countr_one(~Imm64);
5214 unsigned SH = 0;
5215 SDLoc dl(N);
5216 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
5217 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5218 return true;
5219}
5220
5221bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5222 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5223 uint64_t Imm64;
5224 unsigned MB, ME;
5225 SDValue N0 = N->getOperand(0);
5226
5227 // We won't get fewer instructions if the imm is 32-bit integer.
5228 // rldimi requires the imm to have consecutive ones with both sides zero.
5229 // Also, make sure the first Op has only one use, otherwise this may increase
5230 // register pressure since rldimi is destructive.
5231 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5232 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
5233 return false;
5234
5235 unsigned SH = 63 - ME;
5236 SDLoc Dl(N);
5237 // Use select64Imm for making LI instr instead of directly putting Imm64
5238 SDValue Ops[] = {
5239 N->getOperand(0),
5240 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
5241 getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
5242 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
5243 return true;
5244}
5245
5246// Select - Convert the specified operand from a target-independent to a
5247// target-specific node if it hasn't already been changed.
5248void PPCDAGToDAGISel::Select(SDNode *N) {
5249 SDLoc dl(N);
5250 if (N->isMachineOpcode()) {
5251 N->setNodeId(-1);
5252 return; // Already selected.
5253 }
5254
5255 // In case any misguided DAG-level optimizations form an ADD with a
5256 // TargetConstant operand, crash here instead of miscompiling (by selecting
5257 // an r+r add instead of some kind of r+i add).
5258 if (N->getOpcode() == ISD::ADD &&
5259 N->getOperand(1).getOpcode() == ISD::TargetConstant)
5260 llvm_unreachable("Invalid ADD with TargetConstant operand");
5261
5262 // Try matching complex bit permutations before doing anything else.
5263 if (tryBitPermutation(N))
5264 return;
5265
5266 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5267 if (tryIntCompareInGPR(N))
5268 return;
5269
5270 switch (N->getOpcode()) {
5271 default: break;
5272
5273 case ISD::Constant:
5274 if (N->getValueType(0) == MVT::i64) {
5275 ReplaceNode(N, selectI64Imm(CurDAG, N));
5276 return;
5277 }
5278 break;
5279
5280 case ISD::INTRINSIC_VOID: {
5281 auto IntrinsicID = N->getConstantOperandVal(1);
5282 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5283 IntrinsicID != Intrinsic::ppc_trapd &&
5284 IntrinsicID != Intrinsic::ppc_trap)
5285 break;
5286 unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
5287 IntrinsicID == Intrinsic::ppc_trapd)
5288 ? PPC::TDI
5289 : PPC::TWI;
5290 SmallVector<SDValue, 4> OpsWithMD;
5291 unsigned MDIndex;
5292 if (IntrinsicID == Intrinsic::ppc_tdw ||
5293 IntrinsicID == Intrinsic::ppc_tw) {
5294 SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
5295 int16_t SImmOperand2;
5296 int16_t SImmOperand3;
5297 int16_t SImmOperand4;
5298 bool isOperand2IntS16Immediate =
5299 isIntS16Immediate(N->getOperand(2), SImmOperand2);
5300 bool isOperand3IntS16Immediate =
5301 isIntS16Immediate(N->getOperand(3), SImmOperand3);
5302 // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5303 // reg or imm + imm. The imm + imm form will be optimized to either an
5304 // unconditional trap or a nop in a later pass.
5305 if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5306 Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5307 else if (isOperand3IntS16Immediate)
5308 // The 2nd and 3rd operands are reg + imm.
5309 Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
5310 else {
5311 // The 2nd and 3rd operands are imm + reg.
5312 bool isOperand4IntS16Immediate =
5313 isIntS16Immediate(N->getOperand(4), SImmOperand4);
5314 (void)isOperand4IntS16Immediate;
5315 assert(isOperand4IntS16Immediate &&
5316 "The 4th operand is not an Immediate");
5317 // We need to flip the condition immediate TO.
5318 int16_t TO = int(SImmOperand4) & 0x1F;
5319 // We swap the first and second bit of TO if they are not same.
5320 if ((TO & 0x1) != ((TO & 0x2) >> 1))
5321 TO = (TO & 0x1) ? TO + 1 : TO - 1;
5322 // We swap the fourth and fifth bit of TO if they are not same.
5323 if ((TO & 0x8) != ((TO & 0x10) >> 1))
5324 TO = (TO & 0x8) ? TO + 8 : TO - 8;
5325 Ops[0] = getI32Imm(TO, dl);
5326 Ops[1] = N->getOperand(3);
5327 Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
5328 }
5329 OpsWithMD = {Ops[0], Ops[1], Ops[2]};
5330 MDIndex = 5;
5331 } else {
5332 OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};
5333 MDIndex = 3;
5334 }
5335
5336 if (N->getNumOperands() > MDIndex) {
5337 SDValue MDV = N->getOperand(MDIndex);
5338 const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
5339 assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
5340 assert((isa<MDString>(MD->getOperand(0)) &&
5341 cast<MDString>(MD->getOperand(0))->getString() ==
5342 "ppc-trap-reason") &&
5343 "Unsupported annotation data type!");
5344 for (unsigned i = 1; i < MD->getNumOperands(); i++) {
5345 assert(isa<MDString>(MD->getOperand(i)) &&
5346 "Invalid data type for annotation ppc-trap-reason!");
5347 OpsWithMD.push_back(
5348 getI32Imm(std::stoi(cast<MDString>(
5349 MD->getOperand(i))->getString().str()), dl));
5350 }
5351 }
5352 OpsWithMD.push_back(N->getOperand(0)); // chain
5353 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
5354 return;
5355 }
5356
5358 // We emit the PPC::FSELS instruction here because of type conflicts with
5359 // the comparison operand. The FSELS instruction is defined to use an 8-byte
5360 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5361 // value for the comparison. When selecting through a .td file, a type
5362 // error is raised. Must check this first so we never break on the
5363 // !Subtarget->isISA3_1() check.
5364 auto IntID = N->getConstantOperandVal(0);
5365 if (IntID == Intrinsic::ppc_fsels) {
5366 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
5367 CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5368 return;
5369 }
5370
5371 if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5372 auto Pred = N->getConstantOperandVal(1);
5373 unsigned Opcode =
5374 IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5375 unsigned SubReg = 0;
5376 unsigned ShiftVal = 0;
5377 bool Reverse = false;
5378 switch (Pred) {
5379 case 0:
5380 SubReg = PPC::sub_eq;
5381 ShiftVal = 1;
5382 break;
5383 case 1:
5384 SubReg = PPC::sub_eq;
5385 ShiftVal = 1;
5386 Reverse = true;
5387 break;
5388 case 2:
5389 SubReg = PPC::sub_lt;
5390 ShiftVal = 3;
5391 break;
5392 case 3:
5393 SubReg = PPC::sub_lt;
5394 ShiftVal = 3;
5395 Reverse = true;
5396 break;
5397 case 4:
5398 SubReg = PPC::sub_gt;
5399 ShiftVal = 2;
5400 break;
5401 case 5:
5402 SubReg = PPC::sub_gt;
5403 ShiftVal = 2;
5404 Reverse = true;
5405 break;
5406 case 6:
5407 SubReg = PPC::sub_un;
5408 break;
5409 case 7:
5410 SubReg = PPC::sub_un;
5411 Reverse = true;
5412 break;
5413 }
5414
5415 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5416 SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
5417 CurDAG->getTargetConstant(0, dl, MVT::i32)};
5418 SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
5419 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5420 // On Power10, we can use SETBC[R]. On prior architectures, we have to use
5421 // MFOCRF and shift/negate the value.
5422 if (Subtarget->isISA3_1()) {
5423 SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5424 SDValue CRBit = SDValue(
5425 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5426 CR6Reg, SubRegIdx, BCDOp.getValue(1)),
5427 0);
5428 CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5429 CRBit);
5430 } else {
5431 SDValue Move =
5432 SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5433 BCDOp.getValue(1)),
5434 0);
5435 SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
5436 getI32Imm(31, dl), getI32Imm(31, dl)};
5437 if (!Reverse)
5438 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5439 else {
5440 SDValue Shift = SDValue(
5441 CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
5442 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
5443 }
5444 }
5445 return;
5446 }
5447
5448 if (!Subtarget->isISA3_1())
5449 break;
5450 unsigned Opcode = 0;
5451 switch (IntID) {
5452 default:
5453 break;
5454 case Intrinsic::ppc_altivec_vstribr_p:
5455 Opcode = PPC::VSTRIBR_rec;
5456 break;
5457 case Intrinsic::ppc_altivec_vstribl_p:
5458 Opcode = PPC::VSTRIBL_rec;
5459 break;
5460 case Intrinsic::ppc_altivec_vstrihr_p:
5461 Opcode = PPC::VSTRIHR_rec;
5462 break;
5463 case Intrinsic::ppc_altivec_vstrihl_p:
5464 Opcode = PPC::VSTRIHL_rec;
5465 break;
5466 }
5467 if (!Opcode)
5468 break;
5469
5470 // Generate the appropriate vector string isolate intrinsic to match.
5471 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5472 SDValue VecStrOp =
5473 SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
5474 // Vector string isolate instructions update the EQ bit of CR6.
5475 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5476 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5477 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5478 SDValue CRBit = SDValue(
5479 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5480 CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
5481 0);
5482 CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5483 return;
5484 }
5485
5486 case ISD::SETCC:
5487 case ISD::STRICT_FSETCC:
5489 if (trySETCC(N))
5490 return;
5491 break;
5492 // These nodes will be transformed into GETtlsADDR32 node, which
5493 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5494 case PPCISD::ADDI_TLSLD_L_ADDR:
5495 case PPCISD::ADDI_TLSGD_L_ADDR: {
5496 const Module *Mod = MF->getFunction().getParent();
5497 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5498 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5499 Mod->getPICLevel() == PICLevel::SmallPIC)
5500 break;
5501 // Attach global base pointer on GETtlsADDR32 node in order to
5502 // generate secure plt code for TLS symbols.
5503 getGlobalBaseReg();
5504 } break;
5505 case PPCISD::CALL:
5506 case PPCISD::CALL_RM: {
5507 if (Subtarget->isPPC64() || !TM.isPositionIndependent() ||
5508 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF())
5509 break;
5510
5511 SDValue Op = N->getOperand(1);
5512
5513 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5514 if (GA->getTargetFlags() == PPCII::MO_PLT)
5515 getGlobalBaseReg();
5516 }
5517 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
5518 if (ES->getTargetFlags() == PPCII::MO_PLT)
5519 getGlobalBaseReg();
5520 }
5521 } break;
5522
5524 ReplaceNode(N, getGlobalBaseReg());
5525 return;
5526
5527 case ISD::FrameIndex:
5528 selectFrameIndex(N, N);
5529 return;
5530
5531 case PPCISD::MFOCRF: {
5532 SDValue InGlue = N->getOperand(1);
5533 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5534 N->getOperand(0), InGlue));
5535 return;
5536 }
5537
5539 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5540 MVT::Other, N->getOperand(0)));
5541 return;
5542
5543 case PPCISD::SRA_ADDZE: {
5544 SDValue N0 = N->getOperand(0);
5545 SDValue ShiftAmt =
5546 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
5547 getConstantIntValue(), dl,
5548 N->getValueType(0));
5549 if (N->getValueType(0) == MVT::i64) {
5550 SDNode *Op =
5551 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5552 N0, ShiftAmt);
5553 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
5554 SDValue(Op, 1));
5555 return;
5556 } else {
5557 assert(N->getValueType(0) == MVT::i32 &&
5558 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5559 SDNode *Op =
5560 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5561 N0, ShiftAmt);
5562 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
5563 SDValue(Op, 1));
5564 return;
5565 }
5566 }
5567
5568 case ISD::STORE: {
5569 // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5570 // X-form stores.
5571 StoreSDNode *ST = cast<StoreSDNode>(N);
5572 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
5573 ST->getAddressingMode() != ISD::PRE_INC)
5574 if (tryTLSXFormStore(ST))
5575 return;
5576 break;
5577 }
5578 case ISD::LOAD: {
5579 // Handle preincrement loads.
5580 LoadSDNode *LD = cast<LoadSDNode>(N);
5581 EVT LoadedVT = LD->getMemoryVT();
5582
5583 // Normal loads are handled by code generated from the .td file.
5584 if (LD->getAddressingMode() != ISD::PRE_INC) {
5585 // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5586 // X-form loads.
5587 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
5588 if (tryTLSXFormLoad(LD))
5589 return;
5590 break;
5591 }
5592
5593 SDValue Offset = LD->getOffset();
5594 if (Offset.getOpcode() == ISD::TargetConstant ||
5595 Offset.getOpcode() == ISD::TargetGlobalAddress) {
5596
5597 unsigned Opcode;
5598 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5599 if (LD->getValueType(0) != MVT::i64) {
5600 // Handle PPC32 integer and normal FP loads.
5601 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5602 switch (LoadedVT.getSimpleVT().SimpleTy) {
5603 default: llvm_unreachable("Invalid PPC load type!");
5604 case MVT::f64: Opcode = PPC::LFDU; break;
5605 case MVT::f32: Opcode = PPC::LFSU; break;
5606 case MVT::i32: Opcode = PPC::LWZU; break;
5607 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5608 case MVT::i1:
5609 case MVT::i8: Opcode = PPC::LBZU; break;
5610 }
5611 } else {
5612 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5613 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5614 switch (LoadedVT.getSimpleVT().SimpleTy) {
5615 default: llvm_unreachable("Invalid PPC load type!");
5616 case MVT::i64: Opcode = PPC::LDU; break;
5617 case MVT::i32: Opcode = PPC::LWZU8; break;
5618 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5619 case MVT::i1:
5620 case MVT::i8: Opcode = PPC::LBZU8; break;
5621 }
5622 }
5623
5624 SDValue Chain = LD->getChain();
5625 SDValue Base = LD->getBasePtr();
5626 SDValue Ops[] = { Offset, Base, Chain };
5627 SDNode *MN = CurDAG->getMachineNode(
5628 Opcode, dl, LD->getValueType(0),
5629 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5630 transferMemOperands(N, MN);
5631 ReplaceNode(N, MN);
5632 return;
5633 } else {
5634 unsigned Opcode;
5635 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5636 if (LD->getValueType(0) != MVT::i64) {
5637 // Handle PPC32 integer and normal FP loads.
5638 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5639 switch (LoadedVT.getSimpleVT().SimpleTy) {
5640 default: llvm_unreachable("Invalid PPC load type!");
5641 case MVT::f64: Opcode = PPC::LFDUX; break;
5642 case MVT::f32: Opcode = PPC::LFSUX; break;
5643 case MVT::i32: Opcode = PPC::LWZUX; break;
5644 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5645 case MVT::i1:
5646 case MVT::i8: Opcode = PPC::LBZUX; break;
5647 }
5648 } else {
5649 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5650 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
5651 "Invalid sext update load");
5652 switch (LoadedVT.getSimpleVT().SimpleTy) {
5653 default: llvm_unreachable("Invalid PPC load type!");
5654 case MVT::i64: Opcode = PPC::LDUX; break;
5655 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5656 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5657 case MVT::i1:
5658 case MVT::i8: Opcode = PPC::LBZUX8; break;
5659 }
5660 }
5661
5662 SDValue Chain = LD->getChain();
5663 SDValue Base = LD->getBasePtr();
5664 SDValue Ops[] = { Base, Offset, Chain };
5665 SDNode *MN = CurDAG->getMachineNode(
5666 Opcode, dl, LD->getValueType(0),
5667 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5668 transferMemOperands(N, MN);
5669 ReplaceNode(N, MN);
5670 return;
5671 }
5672 }
5673
5674 case ISD::AND:
5675 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5676 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5677 tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5678 tryAsPairOfRLDICL(N))
5679 return;
5680
5681 // Other cases are autogenerated.
5682 break;
5683 case ISD::OR: {
5684 if (N->getValueType(0) == MVT::i32)
5685 if (tryBitfieldInsert(N))
5686 return;
5687
5688 int16_t Imm;
5689 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5690 isIntS16Immediate(N->getOperand(1), Imm)) {
5691 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
5692
5693 // If this is equivalent to an add, then we can fold it with the
5694 // FrameIndex calculation.
5695 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5696 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5697 return;
5698 }
5699 }
5700
5701 // If this is 'or' against an imm with consecutive ones and both sides zero,
5702 // try to emit rldimi
5703 if (tryAsSingleRLDIMI(N))
5704 return;
5705
5706 // OR with a 32-bit immediate can be handled by ori + oris
5707 // without creating an immediate in a GPR.
5708 uint64_t Imm64 = 0;
5709 bool IsPPC64 = Subtarget->isPPC64();
5710 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5711 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5712 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5713 uint64_t ImmHi = Imm64 >> 16;
5714 uint64_t ImmLo = Imm64 & 0xFFFF;
5715 if (ImmHi != 0 && ImmLo != 0) {
5716 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5717 N->getOperand(0),
5718 getI16Imm(ImmLo, dl));
5719 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5720 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5721 return;
5722 }
5723 }
5724
5725 // Other cases are autogenerated.
5726 break;
5727 }
5728 case ISD::XOR: {
5729 // XOR with a 32-bit immediate can be handled by xori + xoris
5730 // without creating an immediate in a GPR.
5731 uint64_t Imm64 = 0;
5732 bool IsPPC64 = Subtarget->isPPC64();
5733 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5734 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5735 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5736 uint64_t ImmHi = Imm64 >> 16;
5737 uint64_t ImmLo = Imm64 & 0xFFFF;
5738 if (ImmHi != 0 && ImmLo != 0) {
5739 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5740 N->getOperand(0),
5741 getI16Imm(ImmLo, dl));
5742 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5743 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5744 return;
5745 }
5746 }
5747
5748 break;
5749 }
5750 case ISD::ADD: {
5751 int16_t Imm;
5752 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5753 isIntS16Immediate(N->getOperand(1), Imm)) {
5754 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5755 return;
5756 }
5757
5758 break;
5759 }
5760 case ISD::SHL: {
5761 unsigned Imm, SH, MB, ME;
5762 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5763 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5764 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5765 getI32Imm(SH, dl), getI32Imm(MB, dl),
5766 getI32Imm(ME, dl) };
5767 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5768 return;
5769 }
5770
5771 // Other cases are autogenerated.
5772 break;
5773 }
5774 case ISD::SRL: {
5775 unsigned Imm, SH, MB, ME;
5776 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5777 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5778 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5779 getI32Imm(SH, dl), getI32Imm(MB, dl),
5780 getI32Imm(ME, dl) };
5781 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5782 return;
5783 }
5784
5785 // Other cases are autogenerated.
5786 break;
5787 }
5788 case ISD::MUL: {
5789 SDValue Op1 = N->getOperand(1);
5790 if (Op1.getOpcode() != ISD::Constant ||
5791 (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5792 break;
5793
5794 // If the multiplier fits int16, we can handle it with mulli.
5795 int64_t Imm = Op1->getAsZExtVal();
5796 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
5797 if (isInt<16>(Imm) || !Shift)
5798 break;
5799
5800 // If the shifted value fits int16, we can do this transformation:
5801 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5802 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5803 uint64_t ImmSh = Imm >> Shift;
5804 if (!isInt<16>(ImmSh))
5805 break;
5806
5807 uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
5808 if (Op1.getValueType() == MVT::i64) {
5809 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5810 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5811 N->getOperand(0), SDImm);
5812
5813 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5814 getI32Imm(63 - Shift, dl)};
5815 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5816 return;
5817 } else {
5818 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
5819 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
5820 N->getOperand(0), SDImm);
5821
5822 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5823 getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
5824 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5825 return;
5826 }
5827 break;
5828 }
5829 // FIXME: Remove this once the ANDI glue bug is fixed:
5832 if (!ANDIGlueBug)
5833 break;
5834
5835 EVT InVT = N->getOperand(0).getValueType();
5836 assert((InVT == MVT::i64 || InVT == MVT::i32) &&
5837 "Invalid input type for ANDI_rec_1_EQ_BIT");
5838
5839 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5840 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5841 N->getOperand(0),
5842 CurDAG->getTargetConstant(1, dl, InVT)),
5843 0);
5844 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5845 SDValue SRIdxVal = CurDAG->getTargetConstant(
5846 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5847 dl, MVT::i32);
5848
5849 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5850 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
5851 return;
5852 }
5853 case ISD::SELECT_CC: {
5854 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5855 EVT PtrVT =
5857 bool isPPC64 = (PtrVT == MVT::i64);
5858
5859 // If this is a select of i1 operands, we'll pattern match it.
5860 if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
5861 break;
5862
5863 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5864 bool NeedSwapOps = false;
5865 bool IsUnCmp = false;
5866 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
5867 SDValue LHS = N->getOperand(0);
5868 SDValue RHS = N->getOperand(1);
5869 if (NeedSwapOps)
5870 std::swap(LHS, RHS);
5871
5872 // Make use of SelectCC to generate the comparison to set CR bits, for
5873 // equality comparisons having one literal operand, SelectCC probably
5874 // doesn't need to materialize the whole literal and just use xoris to
5875 // check it first, it leads the following comparison result can't
5876 // exactly represent GT/LT relationship. So to avoid this we specify
5877 // SETGT/SETUGT here instead of SETEQ.
5878 SDValue GenCC =
5879 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5880 CurDAG->SelectNodeTo(
5881 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5882 N->getValueType(0), GenCC);
5883 NumP9Setb++;
5884 return;
5885 }
5886 }
5887
5888 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5889 if (!isPPC64 && isNullConstant(N->getOperand(1)) &&
5890 isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&
5891 CC == ISD::SETNE &&
5892 // FIXME: Implement this optzn for PPC64.
5893 N->getValueType(0) == MVT::i32) {
5894 SDNode *Tmp =
5895 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5896 N->getOperand(0), getI32Imm(~0U, dl));
5897 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
5898 N->getOperand(0), SDValue(Tmp, 1));
5899 return;
5900 }
5901
5902 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
5903
5904 if (N->getValueType(0) == MVT::i1) {
5905 // An i1 select is: (c & t) | (!c & f).
5906 bool Inv;
5907 unsigned Idx = getCRIdxForSetCC(CC, Inv);
5908
5909 unsigned SRI;
5910 switch (Idx) {
5911 default: llvm_unreachable("Invalid CC index");
5912 case 0: SRI = PPC::sub_lt; break;
5913 case 1: SRI = PPC::sub_gt; break;
5914 case 2: SRI = PPC::sub_eq; break;
5915 case 3: SRI = PPC::sub_un; break;
5916 }
5917
5918 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5919
5920 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5921 CCBit, CCBit), 0);
5922 SDValue C = Inv ? NotCCBit : CCBit,
5923 NotC = Inv ? CCBit : NotCCBit;
5924
5925 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5926 C, N->getOperand(2)), 0);
5927 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5928 NotC, N->getOperand(3)), 0);
5929
5930 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5931 return;
5932 }
5933
5934 unsigned BROpc =
5935 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
5936
5937 unsigned SelectCCOp;
5938 if (N->getValueType(0) == MVT::i32)
5939 SelectCCOp = PPC::SELECT_CC_I4;
5940 else if (N->getValueType(0) == MVT::i64)
5941 SelectCCOp = PPC::SELECT_CC_I8;
5942 else if (N->getValueType(0) == MVT::f32) {
5943 if (Subtarget->hasP8Vector())
5944 SelectCCOp = PPC::SELECT_CC_VSSRC;
5945 else if (Subtarget->hasSPE())
5946 SelectCCOp = PPC::SELECT_CC_SPE4;
5947 else
5948 SelectCCOp = PPC::SELECT_CC_F4;
5949 } else if (N->getValueType(0) == MVT::f64) {
5950 if (Subtarget->hasVSX())
5951 SelectCCOp = PPC::SELECT_CC_VSFRC;
5952 else if (Subtarget->hasSPE())
5953 SelectCCOp = PPC::SELECT_CC_SPE;
5954 else
5955 SelectCCOp = PPC::SELECT_CC_F8;
5956 } else if (N->getValueType(0) == MVT::f128)
5957 SelectCCOp = PPC::SELECT_CC_F16;
5958 else if (Subtarget->hasSPE())
5959 SelectCCOp = PPC::SELECT_CC_SPE;
5960 else if (N->getValueType(0) == MVT::v2f64 ||
5961 N->getValueType(0) == MVT::v2i64)
5962 SelectCCOp = PPC::SELECT_CC_VSRC;
5963 else
5964 SelectCCOp = PPC::SELECT_CC_VRRC;
5965
5966 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
5967 getI32Imm(BROpc, dl) };
5968 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
5969 return;
5970 }
5972 if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
5973 N->getValueType(0) == MVT::v2i64)) {
5974 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5975
5976 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
5977 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
5978 unsigned DM[2];
5979
5980 for (int i = 0; i < 2; ++i)
5981 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5982 DM[i] = 0;
5983 else
5984 DM[i] = 1;
5985
5986 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5988 isa<LoadSDNode>(Op1.getOperand(0))) {
5989 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5991
5992 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5993 (LD->getMemoryVT() == MVT::f64 ||
5994 LD->getMemoryVT() == MVT::i64) &&
5995 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5996 SDValue Chain = LD->getChain();
5997 SDValue Ops[] = { Base, Offset, Chain };
5998 MachineMemOperand *MemOp = LD->getMemOperand();
5999 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
6000 N->getValueType(0), Ops);
6001 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
6002 return;
6003 }
6004 }
6005
6006 // For little endian, we must swap the input operands and adjust
6007 // the mask elements (reverse and invert them).
6008 if (Subtarget->isLittleEndian()) {
6009 std::swap(Op1, Op2);
6010 unsigned tmp = DM[0];
6011 DM[0] = 1 - DM[1];
6012 DM[1] = 1 - tmp;
6013 }
6014
6015 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
6016 MVT::i32);
6017 SDValue Ops[] = { Op1, Op2, DMV };
6018 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
6019 return;
6020 }
6021
6022 break;
6023 case PPCISD::BDNZ:
6024 case PPCISD::BDZ: {
6025 bool IsPPC64 = Subtarget->isPPC64();
6026 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
6027 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
6028 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
6029 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
6030 MVT::Other, Ops);
6031 return;
6032 }
6033 case PPCISD::COND_BRANCH: {
6034 // Op #0 is the Chain.
6035 // Op #1 is the PPC::PRED_* number.
6036 // Op #2 is the CR#
6037 // Op #3 is the Dest MBB
6038 // Op #4 is the Flag.
6039 // Prevent PPC::PRED_* from being selected into LI.
6040 unsigned PCC = N->getConstantOperandVal(1);
6041 if (EnableBranchHint)
6042 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
6043
6044 SDValue Pred = getI32Imm(PCC, dl);
6045 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
6046 N->getOperand(0), N->getOperand(4) };
6047 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6048 return;
6049 }
6050 case ISD::BR_CC: {
6051 if (tryFoldSWTestBRCC(N))
6052 return;
6053 if (trySelectLoopCountIntrinsic(N))
6054 return;
6055 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
6056 unsigned PCC =
6057 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
6058
6059 if (N->getOperand(2).getValueType() == MVT::i1) {
6060 unsigned Opc;
6061 bool Swap;
6062 switch (PCC) {
6063 default: llvm_unreachable("Unexpected Boolean-operand predicate");
6064 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6065 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6066 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6067 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6068 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6069 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6070 }
6071
6072 // A signed comparison of i1 values produces the opposite result to an
6073 // unsigned one if the condition code includes less-than or greater-than.
6074 // This is because 1 is the most negative signed i1 number and the most
6075 // positive unsigned i1 number. The CR-logical operations used for such
6076 // comparisons are non-commutative so for signed comparisons vs. unsigned
6077 // ones, the input operands just need to be swapped.
6078 if (ISD::isSignedIntSetCC(CC))
6079 Swap = !Swap;
6080
6081 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
6082 N->getOperand(Swap ? 3 : 2),
6083 N->getOperand(Swap ? 2 : 3)), 0);
6084 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
6085 N->getOperand(0));
6086 return;
6087 }
6088
6089 if (EnableBranchHint)
6090 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
6091
6092 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
6093 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
6094 N->getOperand(4), N->getOperand(0) };
6095 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6096 return;
6097 }
6098 case ISD::BRIND: {
6099 // FIXME: Should custom lower this.
6100 SDValue Chain = N->getOperand(0);
6101 SDValue Target = N->getOperand(1);
6102 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6103 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6104 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
6105 Chain), 0);
6106 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
6107 return;
6108 }
6109 case PPCISD::TOC_ENTRY: {
6110 const bool isPPC64 = Subtarget->isPPC64();
6111 const bool isELFABI = Subtarget->isSVR4ABI();
6112 const bool isAIXABI = Subtarget->isAIXABI();
6113
6114 // PowerPC only support small, medium and large code model.
6115 const CodeModel::Model CModel = getCodeModel(*Subtarget, TM, N);
6116
6117 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
6118 "PowerPC doesn't support tiny or kernel code models.");
6119
6120 if (isAIXABI && CModel == CodeModel::Medium)
6121 report_fatal_error("Medium code model is not supported on AIX.");
6122
6123 // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6124 // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6125 // small code model, we need to check for a toc-data attribute.
6126 if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6127 break;
6128
6129 auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6130 EVT OperandTy) {
6131 SDValue GA = TocEntry->getOperand(0);
6132 SDValue TocBase = TocEntry->getOperand(1);
6133 SDNode *MN = nullptr;
6134 if (OpCode == PPC::ADDItoc || OpCode == PPC::ADDItoc8)
6135 // toc-data access doesn't involve in loading from got, no need to
6136 // keep memory operands.
6137 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, TocBase, GA);
6138 else {
6139 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
6140 transferMemOperands(TocEntry, MN);
6141 }
6142 ReplaceNode(TocEntry, MN);
6143 };
6144
6145 // Handle 32-bit small code model.
6146 if (!isPPC64 && CModel == CodeModel::Small) {
6147 // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6148 // PPC::ADDItoc, or PPC::LWZtoc
6149 if (isELFABI) {
6151 "32-bit ELF can only have TOC entries in position independent"
6152 " code.");
6153 // 32-bit ELF always uses a small code model toc access.
6154 replaceWith(PPC::LWZtoc, N, MVT::i32);
6155 return;
6156 }
6157
6158 assert(isAIXABI && "ELF ABI already handled");
6159
6160 if (hasTocDataAttr(N->getOperand(0))) {
6161 replaceWith(PPC::ADDItoc, N, MVT::i32);
6162 return;
6163 }
6164
6165 replaceWith(PPC::LWZtoc, N, MVT::i32);
6166 return;
6167 }
6168
6169 if (isPPC64 && CModel == CodeModel::Small) {
6170 assert(isAIXABI && "ELF ABI handled in common SelectCode");
6171
6172 if (hasTocDataAttr(N->getOperand(0))) {
6173 replaceWith(PPC::ADDItoc8, N, MVT::i64);
6174 return;
6175 }
6176 // Break if it doesn't have toc data attribute. Proceed with common
6177 // SelectCode.
6178 break;
6179 }
6180
6181 assert(CModel != CodeModel::Small && "All small code models handled.");
6182
6183 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6184 " ELF/AIX or 32-bit AIX in the following.");
6185
6186 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode,
6187 // 64-bit medium (ELF-only), or 64-bit large (ELF and AIX) code model code
6188 // that does not contain TOC data symbols. We generate two instructions as
6189 // described below. The first source operand is a symbol reference. If it
6190 // must be referenced via the TOC according to Subtarget, we generate:
6191 // [32-bit AIX]
6192 // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6193 // [64-bit ELF/AIX]
6194 // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6195 // Otherwise for medium code model ELF we generate:
6196 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6197
6198 // And finally for AIX with toc-data we generate:
6199 // [32-bit AIX]
6200 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
6201 // [64-bit AIX]
6202 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6203
6204 SDValue GA = N->getOperand(0);
6205 SDValue TOCbase = N->getOperand(1);
6206
6207 EVT VT = Subtarget->getScalarIntVT();
6208 SDNode *Tmp = CurDAG->getMachineNode(
6209 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
6210
6211 // On AIX, if the symbol has the toc-data attribute it will be defined
6212 // in the TOC entry, so we use an ADDItocL/ADDItocL8.
6213 if (isAIXABI && hasTocDataAttr(GA)) {
6214 ReplaceNode(
6215 N, CurDAG->getMachineNode(isPPC64 ? PPC::ADDItocL8 : PPC::ADDItocL,
6216 dl, VT, SDValue(Tmp, 0), GA));
6217 return;
6218 }
6219
6220 if (PPCLowering->isAccessedAsGotIndirect(GA)) {
6221 // If it is accessed as got-indirect, we need an extra LWZ/LD to load
6222 // the address.
6223 SDNode *MN = CurDAG->getMachineNode(
6224 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
6225
6226 transferMemOperands(N, MN);
6227 ReplaceNode(N, MN);
6228 return;
6229 }
6230
6231 assert(isPPC64 && "TOC_ENTRY already handled for 32-bit.");
6232 // Build the address relative to the TOC-pointer.
6233 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
6234 SDValue(Tmp, 0), GA));
6235 return;
6236 }
6238 // Generate a PIC-safe GOT reference.
6239 assert(Subtarget->is32BitELFABI() &&
6240 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6241 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
6242 PPCLowering->getPointerTy(CurDAG->getDataLayout()),
6243 MVT::i32);
6244 return;
6245
6246 case PPCISD::VADD_SPLAT: {
6247 // This expands into one of three sequences, depending on whether
6248 // the first operand is odd or even, positive or negative.
6249 assert(isa<ConstantSDNode>(N->getOperand(0)) &&
6250 isa<ConstantSDNode>(N->getOperand(1)) &&
6251 "Invalid operand on VADD_SPLAT!");
6252
6253 int Elt = N->getConstantOperandVal(0);
6254 int EltSize = N->getConstantOperandVal(1);
6255 unsigned Opc1, Opc2, Opc3;
6256 EVT VT;
6257
6258 if (EltSize == 1) {
6259 Opc1 = PPC::VSPLTISB;
6260 Opc2 = PPC::VADDUBM;
6261 Opc3 = PPC::VSUBUBM;
6262 VT = MVT::v16i8;
6263 } else if (EltSize == 2) {
6264 Opc1 = PPC::VSPLTISH;
6265 Opc2 = PPC::VADDUHM;
6266 Opc3 = PPC::VSUBUHM;
6267 VT = MVT::v8i16;
6268 } else {
6269 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
6270 Opc1 = PPC::VSPLTISW;
6271 Opc2 = PPC::VADDUWM;
6272 Opc3 = PPC::VSUBUWM;
6273 VT = MVT::v4i32;
6274 }
6275
6276 if ((Elt & 1) == 0) {
6277 // Elt is even, in the range [-32,-18] + [16,30].
6278 //
6279 // Convert: VADD_SPLAT elt, size
6280 // Into: tmp = VSPLTIS[BHW] elt
6281 // VADDU[BHW]M tmp, tmp
6282 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6283 SDValue EltVal = getI32Imm(Elt >> 1, dl);
6284 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6285 SDValue TmpVal = SDValue(Tmp, 0);
6286 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
6287 return;
6288 } else if (Elt > 0) {
6289 // Elt is odd and positive, in the range [17,31].
6290 //
6291 // Convert: VADD_SPLAT elt, size
6292 // Into: tmp1 = VSPLTIS[BHW] elt-16
6293 // tmp2 = VSPLTIS[BHW] -16
6294 // VSUBU[BHW]M tmp1, tmp2
6295 SDValue EltVal = getI32Imm(Elt - 16, dl);
6296 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6297 EltVal = getI32Imm(-16, dl);
6298 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6299 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
6300 SDValue(Tmp2, 0)));
6301 return;
6302 } else {
6303 // Elt is odd and negative, in the range [-31,-17].
6304 //
6305 // Convert: VADD_SPLAT elt, size
6306 // Into: tmp1 = VSPLTIS[BHW] elt+16
6307 // tmp2 = VSPLTIS[BHW] -16
6308 // VADDU[BHW]M tmp1, tmp2
6309 SDValue EltVal = getI32Imm(Elt + 16, dl);
6310 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6311 EltVal = getI32Imm(-16, dl);
6312 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6313 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
6314 SDValue(Tmp2, 0)));
6315 return;
6316 }
6317 }
6318 case PPCISD::LD_SPLAT: {
6319 // Here we want to handle splat load for type v16i8 and v8i16 when there is
6320 // no direct move, we don't need to use stack for this case. If target has
6321 // direct move, we should be able to get the best selection in the .td file.
6322 if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
6323 break;
6324
6325 EVT Type = N->getValueType(0);
6326 if (Type != MVT::v16i8 && Type != MVT::v8i16)
6327 break;
6328
6329 // If the alignment for the load is 16 or bigger, we don't need the
6330 // permutated mask to get the required value. The value must be the 0
6331 // element in big endian target or 7/15 in little endian target in the
6332 // result vsx register of lvx instruction.
6333 // Select the instruction in the .td file.
6334 if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
6335 isOffsetMultipleOf(N, 16))
6336 break;
6337
6338 SDValue ZeroReg =
6339 CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6340 Subtarget->getScalarIntVT());
6341 unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6342 // v16i8 LD_SPLAT addr
6343 // ======>
6344 // Mask = LVSR/LVSL 0, addr
6345 // LoadLow = LVX 0, addr
6346 // Perm = VPERM LoadLow, LoadLow, Mask
6347 // Splat = VSPLTB 15/0, Perm
6348 //
6349 // v8i16 LD_SPLAT addr
6350 // ======>
6351 // Mask = LVSR/LVSL 0, addr
6352 // LoadLow = LVX 0, addr
6353 // LoadHigh = LVX (LI, 1), addr
6354 // Perm = VPERM LoadLow, LoadHigh, Mask
6355 // Splat = VSPLTH 7/0, Perm
6356 unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6357 unsigned SplatElemIndex =
6358 Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
6359
6360 SDNode *Mask = CurDAG->getMachineNode(
6361 Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
6362 N->getOperand(1));
6363
6364 SDNode *LoadLow =
6365 CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
6366 {ZeroReg, N->getOperand(1), N->getOperand(0)});
6367
6368 SDNode *LoadHigh = LoadLow;
6369 if (Type == MVT::v8i16) {
6370 LoadHigh = CurDAG->getMachineNode(
6371 PPC::LVX, dl, MVT::v16i8, MVT::Other,
6372 {SDValue(CurDAG->getMachineNode(
6373 LIOpcode, dl, MVT::i32,
6374 CurDAG->getTargetConstant(1, dl, MVT::i8)),
6375 0),
6376 N->getOperand(1), SDValue(LoadLow, 1)});
6377 }
6378
6379 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
6380 transferMemOperands(N, LoadHigh);
6381
6382 SDNode *Perm =
6383 CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
6384 SDValue(LoadHigh, 0), SDValue(Mask, 0));
6385 CurDAG->SelectNodeTo(N, SplatOp, Type,
6386 CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
6387 SDValue(Perm, 0));
6388 return;
6389 }
6390 }
6391
6392 SelectCode(N);
6393}
6394
6395// If the target supports the cmpb instruction, do the idiom recognition here.
6396// We don't do this as a DAG combine because we don't want to do it as nodes
6397// are being combined (because we might miss part of the eventual idiom). We
6398// don't want to do it during instruction selection because we want to reuse
6399// the logic for lowering the masking operations already part of the
6400// instruction selector.
6401SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6402 SDLoc dl(N);
6403
6404 assert(N->getOpcode() == ISD::OR &&
6405 "Only OR nodes are supported for CMPB");
6406
6407 SDValue Res;
6408 if (!Subtarget->hasCMPB())
6409 return Res;
6410
6411 if (N->getValueType(0) != MVT::i32 &&
6412 N->getValueType(0) != MVT::i64)
6413 return Res;
6414
6415 EVT VT = N->getValueType(0);
6416
6417 SDValue RHS, LHS;
6418 bool BytesFound[8] = {false, false, false, false, false, false, false, false};
6419 uint64_t Mask = 0, Alt = 0;
6420
6421 auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6422 uint64_t &Mask, uint64_t &Alt,
6423 SDValue &LHS, SDValue &RHS) {
6424 if (O.getOpcode() != ISD::SELECT_CC)
6425 return false;
6426 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
6427
6428 if (!isa<ConstantSDNode>(O.getOperand(2)) ||
6429 !isa<ConstantSDNode>(O.getOperand(3)))
6430 return false;
6431
6432 uint64_t PM = O.getConstantOperandVal(2);
6433 uint64_t PAlt = O.getConstantOperandVal(3);
6434 for (b = 0; b < 8; ++b) {
6435 uint64_t Mask = UINT64_C(0xFF) << (8*b);
6436 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6437 break;
6438 }
6439
6440 if (b == 8)
6441 return false;
6442 Mask |= PM;
6443 Alt |= PAlt;
6444
6445 if (!isa<ConstantSDNode>(O.getOperand(1)) ||
6446 O.getConstantOperandVal(1) != 0) {
6447 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
6448 if (Op0.getOpcode() == ISD::TRUNCATE)
6449 Op0 = Op0.getOperand(0);
6450 if (Op1.getOpcode() == ISD::TRUNCATE)
6451 Op1 = Op1.getOperand(0);
6452
6453 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6454 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
6456
6457 unsigned Bits = Op0.getValueSizeInBits();
6458 if (b != Bits/8-1)
6459 return false;
6460 if (Op0.getConstantOperandVal(1) != Bits-8)
6461 return false;
6462
6463 LHS = Op0.getOperand(0);
6464 RHS = Op1.getOperand(0);
6465 return true;
6466 }
6467
6468 // When we have small integers (i16 to be specific), the form present
6469 // post-legalization uses SETULT in the SELECT_CC for the
6470 // higher-order byte, depending on the fact that the
6471 // even-higher-order bytes are known to all be zero, for example:
6472 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6473 // (so when the second byte is the same, because all higher-order
6474 // bits from bytes 3 and 4 are known to be zero, the result of the
6475 // xor can be at most 255)
6476 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6477 isa<ConstantSDNode>(O.getOperand(1))) {
6478
6479 uint64_t ULim = O.getConstantOperandVal(1);
6480 if (ULim != (UINT64_C(1) << b*8))
6481 return false;
6482
6483 // Now we need to make sure that the upper bytes are known to be
6484 // zero.
6485 unsigned Bits = Op0.getValueSizeInBits();
6486 if (!CurDAG->MaskedValueIsZero(
6487 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
6488 return false;
6489
6490 LHS = Op0.getOperand(0);
6491 RHS = Op0.getOperand(1);
6492 return true;
6493 }
6494
6495 return false;
6496 }
6497
6498 if (CC != ISD::SETEQ)
6499 return false;
6500
6501 SDValue Op = O.getOperand(0);
6502 if (Op.getOpcode() == ISD::AND) {
6503 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6504 return false;
6505 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
6506 return false;
6507
6508 SDValue XOR = Op.getOperand(0);
6509 if (XOR.getOpcode() == ISD::TRUNCATE)
6510 XOR = XOR.getOperand(0);
6511 if (XOR.getOpcode() != ISD::XOR)
6512 return false;
6513
6514 LHS = XOR.getOperand(0);
6515 RHS = XOR.getOperand(1);
6516 return true;
6517 } else if (Op.getOpcode() == ISD::SRL) {
6518 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6519 return false;
6520 unsigned Bits = Op.getValueSizeInBits();
6521 if (b != Bits/8-1)
6522 return false;
6523 if (Op.getConstantOperandVal(1) != Bits-8)
6524 return false;
6525
6526 SDValue XOR = Op.getOperand(0);
6527 if (XOR.getOpcode() == ISD::TRUNCATE)
6528 XOR = XOR.getOperand(0);
6529 if (XOR.getOpcode() != ISD::XOR)
6530 return false;
6531
6532 LHS = XOR.getOperand(0);
6533 RHS = XOR.getOperand(1);
6534 return true;
6535 }
6536
6537 return false;
6538 };
6539
6541 while (!Queue.empty()) {
6542 SDValue V = Queue.pop_back_val();
6543
6544 for (const SDValue &O : V.getNode()->ops()) {
6545 unsigned b = 0;
6546 uint64_t M = 0, A = 0;
6547 SDValue OLHS, ORHS;
6548 if (O.getOpcode() == ISD::OR) {
6549 Queue.push_back(O);
6550 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
6551 if (!LHS) {
6552 LHS = OLHS;
6553 RHS = ORHS;
6554 BytesFound[b] = true;
6555 Mask |= M;
6556 Alt |= A;
6557 } else if ((LHS == ORHS && RHS == OLHS) ||
6558 (RHS == ORHS && LHS == OLHS)) {
6559 BytesFound[b] = true;
6560 Mask |= M;
6561 Alt |= A;
6562 } else {
6563 return Res;
6564 }
6565 } else {
6566 return Res;
6567 }
6568 }
6569 }
6570
6571 unsigned LastB = 0, BCnt = 0;
6572 for (unsigned i = 0; i < 8; ++i)
6573 if (BytesFound[LastB]) {
6574 ++BCnt;
6575 LastB = i;
6576 }
6577
6578 if (!LastB || BCnt < 2)
6579 return Res;
6580
6581 // Because we'll be zero-extending the output anyway if don't have a specific
6582 // value for each input byte (via the Mask), we can 'anyext' the inputs.
6583 if (LHS.getValueType() != VT) {
6584 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
6585 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
6586 }
6587
6588 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
6589
6590 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
6591 if (NonTrivialMask && !Alt) {
6592 // Res = Mask & CMPB
6593 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6594 CurDAG->getConstant(Mask, dl, VT));
6595 } else if (Alt) {
6596 // Res = (CMPB & Mask) | (~CMPB & Alt)
6597 // Which, as suggested here:
6598 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6599 // can be written as:
6600 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
6601 // useful because the (Alt ^ Mask) can be pre-computed.
6602 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6603 CurDAG->getConstant(Mask ^ Alt, dl, VT));
6604 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
6605 CurDAG->getConstant(Alt, dl, VT));
6606 }
6607
6608 return Res;
6609}
6610
6611// When CR bit registers are enabled, an extension of an i1 variable to a i32
6612// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6613// involves constant materialization of a 0 or a 1 or both. If the result of
6614// the extension is then operated upon by some operator that can be constant
6615// folded with a constant 0 or 1, and that constant can be materialized using
6616// only one instruction (like a zero or one), then we should fold in those
6617// operations with the select.
6618void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6619 if (!Subtarget->useCRBits())
6620 return;
6621
6622 if (N->getOpcode() != ISD::ZERO_EXTEND &&
6623 N->getOpcode() != ISD::SIGN_EXTEND &&
6624 N->getOpcode() != ISD::ANY_EXTEND)
6625 return;
6626
6627 if (N->getOperand(0).getValueType() != MVT::i1)
6628 return;
6629
6630 if (!N->hasOneUse())
6631 return;
6632
6633 SDLoc dl(N);
6634 EVT VT = N->getValueType(0);
6635 SDValue Cond = N->getOperand(0);
6636 SDValue ConstTrue = CurDAG->getSignedConstant(
6637 N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
6638 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
6639
6640 do {
6641 SDNode *User = *N->user_begin();
6642 if (User->getNumOperands() != 2)
6643 break;
6644
6645 auto TryFold = [this, N, User, dl](SDValue Val) {
6646 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
6647 SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6648 SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6649
6650 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
6651 User->getValueType(0), {O0, O1});
6652 };
6653
6654 // FIXME: When the semantics of the interaction between select and undef
6655 // are clearly defined, it may turn out to be unnecessary to break here.
6656 SDValue TrueRes = TryFold(ConstTrue);
6657 if (!TrueRes || TrueRes.isUndef())
6658 break;
6659 SDValue FalseRes = TryFold(ConstFalse);
6660 if (!FalseRes || FalseRes.isUndef())
6661 break;
6662
6663 // For us to materialize these using one instruction, we must be able to
6664 // represent them as signed 16-bit integers.
6665 uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal();
6666 if (!isInt<16>(True) || !isInt<16>(False))
6667 break;
6668
6669 // We can replace User with a new SELECT node, and try again to see if we
6670 // can fold the select with its user.
6671 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
6672 N = User;
6673 ConstTrue = TrueRes;
6674 ConstFalse = FalseRes;
6675 } while (N->hasOneUse());
6676}
6677
6678void PPCDAGToDAGISel::PreprocessISelDAG() {
6679 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6680
6681 bool MadeChange = false;
6682 while (Position != CurDAG->allnodes_begin()) {
6683 SDNode *N = &*--Position;
6684 if (N->use_empty())
6685 continue;
6686
6687 SDValue Res;
6688 switch (N->getOpcode()) {
6689 default: break;
6690 case ISD::OR:
6691 Res = combineToCMPB(N);
6692 break;
6693 }
6694
6695 if (!Res)
6696 foldBoolExts(Res, N);
6697
6698 if (Res) {
6699 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6700 LLVM_DEBUG(N->dump(CurDAG));
6701 LLVM_DEBUG(dbgs() << "\nNew: ");
6702 LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6703 LLVM_DEBUG(dbgs() << "\n");
6704
6705 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
6706 MadeChange = true;
6707 }
6708 }
6709
6710 if (MadeChange)
6711 CurDAG->RemoveDeadNodes();
6712}
6713
6714/// PostprocessISelDAG - Perform some late peephole optimizations
6715/// on the DAG representation.
6716void PPCDAGToDAGISel::PostprocessISelDAG() {
6717 // Skip peepholes at -O0.
6718 if (TM.getOptLevel() == CodeGenOptLevel::None)
6719 return;
6720
6721 PeepholePPC64();
6722 PeepholeCROps();
6723 PeepholePPC64ZExt();
6724}
6725
6726// Check if all users of this node will become isel where the second operand
6727// is the constant zero. If this is so, and if we can negate the condition,
6728// then we can flip the true and false operands. This will allow the zero to
6729// be folded with the isel so that we don't need to materialize a register
6730// containing zero.
6731bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6732 for (const SDNode *User : N->users()) {
6733 if (!User->isMachineOpcode())
6734 return false;
6735 if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6736 User->getMachineOpcode() != PPC::SELECT_I8)
6737 return false;
6738
6739 SDNode *Op1 = User->getOperand(1).getNode();
6740 SDNode *Op2 = User->getOperand(2).getNode();
6741 // If we have a degenerate select with two equal operands, swapping will
6742 // not do anything, and we may run into an infinite loop.
6743 if (Op1 == Op2)
6744 return false;
6745
6746 if (!Op2->isMachineOpcode())
6747 return false;
6748
6749 if (Op2->getMachineOpcode() != PPC::LI &&
6750 Op2->getMachineOpcode() != PPC::LI8)
6751 return false;
6752
6753 if (!isNullConstant(Op2->getOperand(0)))
6754 return false;
6755 }
6756
6757 return true;
6758}
6759
6760void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6761 SmallVector<SDNode *, 4> ToReplace;
6762 for (SDNode *User : N->users()) {
6763 assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
6764 User->getMachineOpcode() == PPC::SELECT_I8) &&
6765 "Must have all select users");
6766 ToReplace.push_back(User);
6767 }
6768
6769 for (SDNode *User : ToReplace) {
6770 SDNode *ResNode =
6771 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
6772 User->getValueType(0), User->getOperand(0),
6773 User->getOperand(2),
6774 User->getOperand(1));
6775
6776 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6777 LLVM_DEBUG(User->dump(CurDAG));
6778 LLVM_DEBUG(dbgs() << "\nNew: ");
6779 LLVM_DEBUG(ResNode->dump(CurDAG));
6780 LLVM_DEBUG(dbgs() << "\n");
6781
6782 ReplaceUses(User, ResNode);
6783 }
6784}
6785
6786void PPCDAGToDAGISel::PeepholeCROps() {
6787 bool IsModified;
6788 do {
6789 IsModified = false;
6790 for (SDNode &Node : CurDAG->allnodes()) {
6791 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
6792 if (!MachineNode || MachineNode->use_empty())
6793 continue;
6794 SDNode *ResNode = MachineNode;
6795
6796 bool Op1Set = false, Op1Unset = false,
6797 Op1Not = false,
6798 Op2Set = false, Op2Unset = false,
6799 Op2Not = false;
6800
6801 unsigned Opcode = MachineNode->getMachineOpcode();
6802 switch (Opcode) {
6803 default: break;
6804 case PPC::CRAND:
6805 case PPC::CRNAND:
6806 case PPC::CROR:
6807 case PPC::CRXOR:
6808 case PPC::CRNOR:
6809 case PPC::CREQV:
6810 case PPC::CRANDC:
6811 case PPC::CRORC: {
6812 SDValue Op = MachineNode->getOperand(1);
6813 if (Op.isMachineOpcode()) {
6814 if (Op.getMachineOpcode() == PPC::CRSET)
6815 Op2Set = true;
6816 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6817 Op2Unset = true;
6818 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6819 Op.getOperand(0) == Op.getOperand(1)) ||
6820 Op.getMachineOpcode() == PPC::CRNOT)
6821 Op2Not = true;
6822 }
6823 [[fallthrough]];
6824 }
6825 case PPC::BC:
6826 case PPC::BCn:
6827 case PPC::SELECT_I4:
6828 case PPC::SELECT_I8:
6829 case PPC::SELECT_F4:
6830 case PPC::SELECT_F8:
6831 case PPC::SELECT_SPE:
6832 case PPC::SELECT_SPE4:
6833 case PPC::SELECT_VRRC:
6834 case PPC::SELECT_VSFRC:
6835 case PPC::SELECT_VSSRC:
6836 case PPC::SELECT_VSRC: {
6837 SDValue Op = MachineNode->getOperand(0);
6838 if (Op.isMachineOpcode()) {
6839 if (Op.getMachineOpcode() == PPC::CRSET)
6840 Op1Set = true;
6841 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6842 Op1Unset = true;
6843 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6844 Op.getOperand(0) == Op.getOperand(1)) ||
6845 Op.getMachineOpcode() == PPC::CRNOT)
6846 Op1Not = true;
6847 }
6848 }
6849 break;
6850 }
6851
6852 bool SelectSwap = false;
6853 switch (Opcode) {
6854 default: break;
6855 case PPC::CRAND:
6856 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6857 // x & x = x
6858 ResNode = MachineNode->getOperand(0).getNode();
6859 else if (Op1Set)
6860 // 1 & y = y
6861 ResNode = MachineNode->getOperand(1).getNode();
6862 else if (Op2Set)
6863 // x & 1 = x
6864 ResNode = MachineNode->getOperand(0).getNode();
6865 else if (Op1Unset || Op2Unset)
6866 // x & 0 = 0 & y = 0
6867 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6868 MVT::i1);
6869 else if (Op1Not)
6870 // ~x & y = andc(y, x)
6871 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6872 MVT::i1, MachineNode->getOperand(1),
6873 MachineNode->getOperand(0).
6874 getOperand(0));
6875 else if (Op2Not)
6876 // x & ~y = andc(x, y)
6877 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6878 MVT::i1, MachineNode->getOperand(0),
6879 MachineNode->getOperand(1).
6880 getOperand(0));
6881 else if (AllUsersSelectZero(MachineNode)) {
6882 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6883 MVT::i1, MachineNode->getOperand(0),
6884 MachineNode->getOperand(1));
6885 SelectSwap = true;
6886 }
6887 break;
6888 case PPC::CRNAND:
6889 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6890 // nand(x, x) -> nor(x, x)
6891 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6892 MVT::i1, MachineNode->getOperand(0),
6893 MachineNode->getOperand(0));
6894 else if (Op1Set)
6895 // nand(1, y) -> nor(y, y)
6896 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6897 MVT::i1, MachineNode->getOperand(1),
6898 MachineNode->getOperand(1));
6899 else if (Op2Set)
6900 // nand(x, 1) -> nor(x, x)
6901 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6902 MVT::i1, MachineNode->getOperand(0),
6903 MachineNode->getOperand(0));
6904 else if (Op1Unset || Op2Unset)
6905 // nand(x, 0) = nand(0, y) = 1
6906 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6907 MVT::i1);
6908 else if (Op1Not)
6909 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6910 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6911 MVT::i1, MachineNode->getOperand(0).
6912 getOperand(0),
6913 MachineNode->getOperand(1));
6914 else if (Op2Not)
6915 // nand(x, ~y) = ~x | y = orc(y, x)
6916 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6917 MVT::i1, MachineNode->getOperand(1).
6918 getOperand(0),
6919 MachineNode->getOperand(0));
6920 else if (AllUsersSelectZero(MachineNode)) {
6921 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6922 MVT::i1, MachineNode->getOperand(0),
6923 MachineNode->getOperand(1));
6924 SelectSwap = true;
6925 }
6926 break;
6927 case PPC::CROR:
6928 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6929 // x | x = x
6930 ResNode = MachineNode->getOperand(0).getNode();
6931 else if (Op1Set || Op2Set)
6932 // x | 1 = 1 | y = 1
6933 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6934 MVT::i1);
6935 else if (Op1Unset)
6936 // 0 | y = y
6937 ResNode = MachineNode->getOperand(1).getNode();
6938 else if (Op2Unset)
6939 // x | 0 = x
6940 ResNode = MachineNode->getOperand(0).getNode();
6941 else if (Op1Not)
6942 // ~x | y = orc(y, x)
6943 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6944 MVT::i1, MachineNode->getOperand(1),
6945 MachineNode->getOperand(0).
6946 getOperand(0));
6947 else if (Op2Not)
6948 // x | ~y = orc(x, y)
6949 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6950 MVT::i1, MachineNode->getOperand(0),
6951 MachineNode->getOperand(1).
6952 getOperand(0));
6953 else if (AllUsersSelectZero(MachineNode)) {
6954 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6955 MVT::i1, MachineNode->getOperand(0),
6956 MachineNode->getOperand(1));
6957 SelectSwap = true;
6958 }
6959 break;
6960 case PPC::CRXOR:
6961 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6962 // xor(x, x) = 0
6963 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6964 MVT::i1);
6965 else if (Op1Set)
6966 // xor(1, y) -> nor(y, y)
6967 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6968 MVT::i1, MachineNode->getOperand(1),
6969 MachineNode->getOperand(1));
6970 else if (Op2Set)
6971 // xor(x, 1) -> nor(x, x)
6972 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6973 MVT::i1, MachineNode->getOperand(0),
6974 MachineNode->getOperand(0));
6975 else if (Op1Unset)
6976 // xor(0, y) = y
6977 ResNode = MachineNode->getOperand(1).getNode();
6978 else if (Op2Unset)
6979 // xor(x, 0) = x
6980 ResNode = MachineNode->getOperand(0).getNode();
6981 else if (Op1Not)
6982 // xor(~x, y) = eqv(x, y)
6983 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6984 MVT::i1, MachineNode->getOperand(0).
6985 getOperand(0),
6986 MachineNode->getOperand(1));
6987 else if (Op2Not)
6988 // xor(x, ~y) = eqv(x, y)
6989 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6990 MVT::i1, MachineNode->getOperand(0),
6991 MachineNode->getOperand(1).
6992 getOperand(0));
6993 else if (AllUsersSelectZero(MachineNode)) {
6994 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6995 MVT::i1, MachineNode->getOperand(0),
6996 MachineNode->getOperand(1));
6997 SelectSwap = true;
6998 }
6999 break;
7000 case PPC::CRNOR:
7001 if (Op1Set || Op2Set)
7002 // nor(1, y) -> 0
7003 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7004 MVT::i1);
7005 else if (Op1Unset)
7006 // nor(0, y) = ~y -> nor(y, y)
7007 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7008 MVT::i1, MachineNode->getOperand(1),
7009 MachineNode->getOperand(1));
7010 else if (Op2Unset)
7011 // nor(x, 0) = ~x
7012 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7013 MVT::i1, MachineNode->getOperand(0),
7014 MachineNode->getOperand(0));
7015 else if (Op1Not)
7016 // nor(~x, y) = andc(x, y)
7017 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7018 MVT::i1, MachineNode->getOperand(0).
7019 getOperand(0),
7020 MachineNode->getOperand(1));
7021 else if (Op2Not)
7022 // nor(x, ~y) = andc(y, x)
7023 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7024 MVT::i1, MachineNode->getOperand(1).
7025 getOperand(0),
7026 MachineNode->getOperand(0));
7027 else if (AllUsersSelectZero(MachineNode)) {
7028 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7029 MVT::i1, MachineNode->getOperand(0),
7030 MachineNode->getOperand(1));
7031 SelectSwap = true;
7032 }
7033 break;
7034 case PPC::CREQV:
7035 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7036 // eqv(x, x) = 1
7037 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7038 MVT::i1);
7039 else if (Op1Set)
7040 // eqv(1, y) = y
7041 ResNode = MachineNode->getOperand(1).getNode();
7042 else if (Op2Set)
7043 // eqv(x, 1) = x
7044 ResNode = MachineNode->getOperand(0).getNode();
7045 else if (Op1Unset)
7046 // eqv(0, y) = ~y -> nor(y, y)
7047 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7048 MVT::i1, MachineNode->getOperand(1),
7049 MachineNode->getOperand(1));
7050 else if (Op2Unset)
7051 // eqv(x, 0) = ~x
7052 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7053 MVT::i1, MachineNode->getOperand(0),
7054 MachineNode->getOperand(0));
7055 else if (Op1Not)
7056 // eqv(~x, y) = xor(x, y)
7057 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7058 MVT::i1, MachineNode->getOperand(0).
7059 getOperand(0),
7060 MachineNode->getOperand(1));
7061 else if (Op2Not)
7062 // eqv(x, ~y) = xor(x, y)
7063 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7064 MVT::i1, MachineNode->getOperand(0),
7065 MachineNode->getOperand(1).
7066 getOperand(0));
7067 else if (AllUsersSelectZero(MachineNode)) {
7068 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7069 MVT::i1, MachineNode->getOperand(0),
7070 MachineNode->getOperand(1));
7071 SelectSwap = true;
7072 }
7073 break;
7074 case PPC::CRANDC:
7075 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7076 // andc(x, x) = 0
7077 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7078 MVT::i1);
7079 else if (Op1Set)
7080 // andc(1, y) = ~y
7081 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7082 MVT::i1, MachineNode->getOperand(1),
7083 MachineNode->getOperand(1));
7084 else if (Op1Unset || Op2Set)
7085 // andc(0, y) = andc(x, 1) = 0
7086 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7087 MVT::i1);
7088 else if (Op2Unset)
7089 // andc(x, 0) = x
7090 ResNode = MachineNode->getOperand(0).getNode();
7091 else if (Op1Not)
7092 // andc(~x, y) = ~(x | y) = nor(x, y)
7093 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7094 MVT::i1, MachineNode->getOperand(0).
7095 getOperand(0),
7096 MachineNode->getOperand(1));
7097 else if (Op2Not)
7098 // andc(x, ~y) = x & y
7099 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
7100 MVT::i1, MachineNode->getOperand(0),
7101 MachineNode->getOperand(1).
7102 getOperand(0));
7103 else if (AllUsersSelectZero(MachineNode)) {
7104 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
7105 MVT::i1, MachineNode->getOperand(1),
7106 MachineNode->getOperand(0));
7107 SelectSwap = true;
7108 }
7109 break;
7110 case PPC::CRORC:
7111 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7112 // orc(x, x) = 1
7113 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7114 MVT::i1);
7115 else if (Op1Set || Op2Unset)
7116 // orc(1, y) = orc(x, 0) = 1
7117 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7118 MVT::i1);
7119 else if (Op2Set)
7120 // orc(x, 1) = x
7121 ResNode = MachineNode->getOperand(0).getNode();
7122 else if (Op1Unset)
7123 // orc(0, y) = ~y
7124 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7125 MVT::i1, MachineNode->getOperand(1),
7126 MachineNode->getOperand(1));
7127 else if (Op1Not)
7128 // orc(~x, y) = ~(x & y) = nand(x, y)
7129 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
7130 MVT::i1, MachineNode->getOperand(0).
7131 getOperand(0),
7132 MachineNode->getOperand(1));
7133 else if (Op2Not)
7134 // orc(x, ~y) = x | y
7135 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7136 MVT::i1, MachineNode->getOperand(0),
7137 MachineNode->getOperand(1).
7138 getOperand(0));
7139 else if (AllUsersSelectZero(MachineNode)) {
7140 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7141 MVT::i1, MachineNode->getOperand(1),
7142 MachineNode->getOperand(0));
7143 SelectSwap = true;
7144 }
7145 break;
7146 case PPC::SELECT_I4:
7147 case PPC::SELECT_I8:
7148 case PPC::SELECT_F4:
7149 case PPC::SELECT_F8:
7150 case PPC::SELECT_SPE:
7151 case PPC::SELECT_SPE4:
7152 case PPC::SELECT_VRRC:
7153 case PPC::SELECT_VSFRC:
7154 case PPC::SELECT_VSSRC:
7155 case PPC::SELECT_VSRC:
7156 if (Op1Set)
7157 ResNode = MachineNode->getOperand(1).getNode();
7158 else if (Op1Unset)
7159 ResNode = MachineNode->getOperand(2).getNode();
7160 else if (Op1Not)
7161 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
7162 SDLoc(MachineNode),
7163 MachineNode->getValueType(0),
7164 MachineNode->getOperand(0).
7165 getOperand(0),
7166 MachineNode->getOperand(2),
7167 MachineNode->getOperand(1));
7168 break;
7169 case PPC::BC:
7170 case PPC::BCn:
7171 if (Op1Not)
7172 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
7173 PPC::BC,
7174 SDLoc(MachineNode),
7175 MVT::Other,
7176 MachineNode->getOperand(0).
7177 getOperand(0),
7178 MachineNode->getOperand(1),
7179 MachineNode->getOperand(2));
7180 // FIXME: Handle Op1Set, Op1Unset here too.
7181 break;
7182 }
7183
7184 // If we're inverting this node because it is used only by selects that
7185 // we'd like to swap, then swap the selects before the node replacement.
7186 if (SelectSwap)
7187 SwapAllSelectUsers(MachineNode);
7188
7189 if (ResNode != MachineNode) {
7190 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7191 LLVM_DEBUG(MachineNode->dump(CurDAG));
7192 LLVM_DEBUG(dbgs() << "\nNew: ");
7193 LLVM_DEBUG(ResNode->dump(CurDAG));
7194 LLVM_DEBUG(dbgs() << "\n");
7195
7196 ReplaceUses(MachineNode, ResNode);
7197 IsModified = true;
7198 }
7199 }
7200 if (IsModified)
7201 CurDAG->RemoveDeadNodes();
7202 } while (IsModified);
7203}
7204
7205// Gather the set of 32-bit operations that are known to have their
7206// higher-order 32 bits zero, where ToPromote contains all such operations.
7208 SmallPtrSetImpl<SDNode *> &ToPromote) {
7209 if (!Op32.isMachineOpcode())
7210 return false;
7211
7212 // First, check for the "frontier" instructions (those that will clear the
7213 // higher-order 32 bits.
7214
7215 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7216 // around. If it does not, then these instructions will clear the
7217 // higher-order bits.
7218 if ((Op32.getMachineOpcode() == PPC::RLWINM ||
7219 Op32.getMachineOpcode() == PPC::RLWNM) &&
7220 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
7221 ToPromote.insert(Op32.getNode());
7222 return true;
7223 }
7224
7225 // SLW and SRW always clear the higher-order bits.
7226 if (Op32.getMachineOpcode() == PPC::SLW ||
7227 Op32.getMachineOpcode() == PPC::SRW) {
7228 ToPromote.insert(Op32.getNode());
7229 return true;
7230 }
7231
7232 // For LI and LIS, we need the immediate to be positive (so that it is not
7233 // sign extended).
7234 if (Op32.getMachineOpcode() == PPC::LI ||
7235 Op32.getMachineOpcode() == PPC::LIS) {
7236 if (!isUInt<15>(Op32.getConstantOperandVal(0)))
7237 return false;
7238
7239 ToPromote.insert(Op32.getNode());
7240 return true;
7241 }
7242
7243 // LHBRX and LWBRX always clear the higher-order bits.
7244 if (Op32.getMachineOpcode() == PPC::LHBRX ||
7245 Op32.getMachineOpcode() == PPC::LWBRX) {
7246 ToPromote.insert(Op32.getNode());
7247 return true;
7248 }
7249
7250 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7251 if (Op32.getMachineOpcode() == PPC::CNTLZW ||
7252 Op32.getMachineOpcode() == PPC::CNTTZW) {
7253 ToPromote.insert(Op32.getNode());
7254 return true;
7255 }
7256
7257 // Next, check for those instructions we can look through.
7258
7259 // Assuming the mask does not wrap around, then the higher-order bits are
7260 // taken directly from the first operand.
7261 if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7262 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
7263 SmallPtrSet<SDNode *, 16> ToPromote1;
7264 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7265 return false;
7266
7267 ToPromote.insert(Op32.getNode());
7268 ToPromote.insert_range(ToPromote1);
7269 return true;
7270 }
7271
7272 // For OR, the higher-order bits are zero if that is true for both operands.
7273 // For SELECT_I4, the same is true (but the relevant operand numbers are
7274 // shifted by 1).
7275 if (Op32.getMachineOpcode() == PPC::OR ||
7276 Op32.getMachineOpcode() == PPC::SELECT_I4) {
7277 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
7278 SmallPtrSet<SDNode *, 16> ToPromote1;
7279 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
7280 return false;
7281 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
7282 return false;
7283
7284 ToPromote.insert(Op32.getNode());
7285 ToPromote.insert_range(ToPromote1);
7286 return true;
7287 }
7288
7289 // For ORI and ORIS, we need the higher-order bits of the first operand to be
7290 // zero, and also for the constant to be positive (so that it is not sign
7291 // extended).
7292 if (Op32.getMachineOpcode() == PPC::ORI ||
7293 Op32.getMachineOpcode() == PPC::ORIS) {
7294 SmallPtrSet<SDNode *, 16> ToPromote1;
7295 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7296 return false;
7297 if (!isUInt<15>(Op32.getConstantOperandVal(1)))
7298 return false;
7299
7300 ToPromote.insert(Op32.getNode());
7301 ToPromote.insert_range(ToPromote1);
7302 return true;
7303 }
7304
7305 // The higher-order bits of AND are zero if that is true for at least one of
7306 // the operands.
7307 if (Op32.getMachineOpcode() == PPC::AND) {
7308 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
7309 bool Op0OK =
7310 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7311 bool Op1OK =
7312 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
7313 if (!Op0OK && !Op1OK)
7314 return false;
7315
7316 ToPromote.insert(Op32.getNode());
7317
7318 if (Op0OK)
7319 ToPromote.insert_range(ToPromote1);
7320
7321 if (Op1OK)
7322 ToPromote.insert_range(ToPromote2);
7323
7324 return true;
7325 }
7326
7327 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
7328 // of the first operand, or if the second operand is positive (so that it is
7329 // not sign extended).
7330 if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
7331 Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7332 SmallPtrSet<SDNode *, 16> ToPromote1;
7333 bool Op0OK =
7334 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7335 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
7336 if (!Op0OK && !Op1OK)
7337 return false;
7338
7339 ToPromote.insert(Op32.getNode());
7340
7341 if (Op0OK)
7342 ToPromote.insert_range(ToPromote1);
7343
7344 return true;
7345 }
7346
7347 return false;
7348}
7349
7350void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7351 if (!Subtarget->isPPC64())
7352 return;
7353
7354 // When we zero-extend from i32 to i64, we use a pattern like this:
7355 // def : Pat<(i64 (zext i32:$in)),
7356 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7357 // 0, 32)>;
7358 // There are several 32-bit shift/rotate instructions, however, that will
7359 // clear the higher-order bits of their output, rendering the RLDICL
7360 // unnecessary. When that happens, we remove it here, and redefine the
7361 // relevant 32-bit operation to be a 64-bit operation.
7362
7363 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7364
7365 bool MadeChange = false;
7366 while (Position != CurDAG->allnodes_begin()) {
7367 SDNode *N = &*--Position;
7368 // Skip dead nodes and any non-machine opcodes.
7369 if (N->use_empty() || !N->isMachineOpcode())
7370 continue;
7371
7372 if (N->getMachineOpcode() != PPC::RLDICL)
7373 continue;
7374
7375 if (N->getConstantOperandVal(1) != 0 ||
7376 N->getConstantOperandVal(2) != 32)
7377 continue;
7378
7379 SDValue ISR = N->getOperand(0);
7380 if (!ISR.isMachineOpcode() ||
7381 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7382 continue;
7383
7384 if (!ISR.hasOneUse())
7385 continue;
7386
7387 if (ISR.getConstantOperandVal(2) != PPC::sub_32)
7388 continue;
7389
7390 SDValue IDef = ISR.getOperand(0);
7391 if (!IDef.isMachineOpcode() ||
7392 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7393 continue;
7394
7395 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
7396 // can get rid of it.
7397
7398 SDValue Op32 = ISR->getOperand(1);
7399 if (!Op32.isMachineOpcode())
7400 continue;
7401
7402 // There are some 32-bit instructions that always clear the high-order 32
7403 // bits, there are also some instructions (like AND) that we can look
7404 // through.
7405 SmallPtrSet<SDNode *, 16> ToPromote;
7406 if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7407 continue;
7408
7409 // If the ToPromote set contains nodes that have uses outside of the set
7410 // (except for the original INSERT_SUBREG), then abort the transformation.
7411 bool OutsideUse = false;
7412 for (SDNode *PN : ToPromote) {
7413 for (SDNode *UN : PN->users()) {
7414 if (!ToPromote.count(UN) && UN != ISR.getNode()) {
7415 OutsideUse = true;
7416 break;
7417 }
7418 }
7419
7420 if (OutsideUse)
7421 break;
7422 }
7423 if (OutsideUse)
7424 continue;
7425
7426 MadeChange = true;
7427
7428 // We now know that this zero extension can be removed by promoting to
7429 // nodes in ToPromote to 64-bit operations, where for operations in the
7430 // frontier of the set, we need to insert INSERT_SUBREGs for their
7431 // operands.
7432 for (SDNode *PN : ToPromote) {
7433 unsigned NewOpcode;
7434 switch (PN->getMachineOpcode()) {
7435 default:
7436 llvm_unreachable("Don't know the 64-bit variant of this instruction");
7437 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7438 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7439 case PPC::SLW: NewOpcode = PPC::SLW8; break;
7440 case PPC::SRW: NewOpcode = PPC::SRW8; break;
7441 case PPC::LI: NewOpcode = PPC::LI8; break;
7442 case PPC::LIS: NewOpcode = PPC::LIS8; break;
7443 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7444 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7445 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7446 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7447 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7448 case PPC::OR: NewOpcode = PPC::OR8; break;
7449 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7450 case PPC::ORI: NewOpcode = PPC::ORI8; break;
7451 case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7452 case PPC::AND: NewOpcode = PPC::AND8; break;
7453 case PPC::ANDI_rec:
7454 NewOpcode = PPC::ANDI8_rec;
7455 break;
7456 case PPC::ANDIS_rec:
7457 NewOpcode = PPC::ANDIS8_rec;
7458 break;
7459 }
7460
7461 // Note: During the replacement process, the nodes will be in an
7462 // inconsistent state (some instructions will have operands with values
7463 // of the wrong type). Once done, however, everything should be right
7464 // again.
7465
7467 for (const SDValue &V : PN->ops()) {
7468 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
7469 !isa<ConstantSDNode>(V)) {
7470 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
7471 SDNode *ReplOp =
7472 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
7473 ISR.getNode()->getVTList(), ReplOpOps);
7474 Ops.push_back(SDValue(ReplOp, 0));
7475 } else {
7476 Ops.push_back(V);
7477 }
7478 }
7479
7480 // Because all to-be-promoted nodes only have users that are other
7481 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
7482 // the i32 result value type with i64.
7483
7484 SmallVector<EVT, 2> NewVTs;
7485 SDVTList VTs = PN->getVTList();
7486 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
7487 if (VTs.VTs[i] == MVT::i32)
7488 NewVTs.push_back(MVT::i64);
7489 else
7490 NewVTs.push_back(VTs.VTs[i]);
7491
7492 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7493 LLVM_DEBUG(PN->dump(CurDAG));
7494
7495 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
7496
7497 LLVM_DEBUG(dbgs() << "\nNew: ");
7498 LLVM_DEBUG(PN->dump(CurDAG));
7499 LLVM_DEBUG(dbgs() << "\n");
7500 }
7501
7502 // Now we replace the original zero extend and its associated INSERT_SUBREG
7503 // with the value feeding the INSERT_SUBREG (which has now been promoted to
7504 // return an i64).
7505
7506 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7507 LLVM_DEBUG(N->dump(CurDAG));
7508 LLVM_DEBUG(dbgs() << "\nNew: ");
7509 LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7510 LLVM_DEBUG(dbgs() << "\n");
7511
7512 ReplaceUses(N, Op32.getNode());
7513 }
7514
7515 if (MadeChange)
7516 CurDAG->RemoveDeadNodes();
7517}
7518
7519static bool isVSXSwap(SDValue N) {
7520 if (!N->isMachineOpcode())
7521 return false;
7522 unsigned Opc = N->getMachineOpcode();
7523
7524 // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7525 // operand is 2.
7526 if (Opc == PPC::XXPERMDIs) {
7527 return isa<ConstantSDNode>(N->getOperand(1)) &&
7528 N->getConstantOperandVal(1) == 2;
7529 } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
7530 return N->getOperand(0) == N->getOperand(1) &&
7531 isa<ConstantSDNode>(N->getOperand(2)) &&
7532 N->getConstantOperandVal(2) == 2;
7533 }
7534
7535 return false;
7536}
7537
7538// TODO: Make this complete and replace with a table-gen bit.
7540 if (!N->isMachineOpcode())
7541 return false;
7542 unsigned Opc = N->getMachineOpcode();
7543
7544 switch (Opc) {
7545 default:
7546 return false;
7547 case PPC::VAVGSB:
7548 case PPC::VAVGUB:
7549 case PPC::VAVGSH:
7550 case PPC::VAVGUH:
7551 case PPC::VAVGSW:
7552 case PPC::VAVGUW:
7553 case PPC::VMAXFP:
7554 case PPC::VMAXSB:
7555 case PPC::VMAXUB:
7556 case PPC::VMAXSH:
7557 case PPC::VMAXUH:
7558 case PPC::VMAXSW:
7559 case PPC::VMAXUW:
7560 case PPC::VMINFP:
7561 case PPC::VMINSB:
7562 case PPC::VMINUB:
7563 case PPC::VMINSH:
7564 case PPC::VMINUH:
7565 case PPC::VMINSW:
7566 case PPC::VMINUW:
7567 case PPC::VADDFP:
7568 case PPC::VADDUBM:
7569 case PPC::VADDUHM:
7570 case PPC::VADDUWM:
7571 case PPC::VSUBFP:
7572 case PPC::VSUBUBM:
7573 case PPC::VSUBUHM:
7574 case PPC::VSUBUWM:
7575 case PPC::VAND:
7576 case PPC::VANDC:
7577 case PPC::VOR:
7578 case PPC::VORC:
7579 case PPC::VXOR:
7580 case PPC::VNOR:
7581 case PPC::VMULUWM:
7582 return true;
7583 }
7584}
7585
7586// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7587// lane-insensitive.
7588static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
7589 // Our desired xxswap might be source of COPY_TO_REGCLASS.
7590 // TODO: Can we put this a common method for DAG?
7591 auto SkipRCCopy = [](SDValue V) {
7592 while (V->isMachineOpcode() &&
7593 V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7594 // All values in the chain should have single use.
7595 if (V->use_empty() || !V->user_begin()->isOnlyUserOf(V.getNode()))
7596 return SDValue();
7597 V = V->getOperand(0);
7598 }
7599 return V.hasOneUse() ? V : SDValue();
7600 };
7601
7602 SDValue VecOp = SkipRCCopy(N->getOperand(0));
7603 if (!VecOp || !isLaneInsensitive(VecOp))
7604 return;
7605
7606 SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
7607 RHS = SkipRCCopy(VecOp.getOperand(1));
7608 if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
7609 return;
7610
7611 // These swaps may still have chain-uses here, count on dead code elimination
7612 // in following passes to remove them.
7613 DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
7614 DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
7615 DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
7616}
7617
7618// Check if an SDValue has the 'aix-small-tls' global variable attribute.
7619static bool hasAIXSmallTLSAttr(SDValue Val) {
7621 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()))
7622 if (GV->hasAttribute("aix-small-tls"))
7623 return true;
7624
7625 return false;
7626}
7627
7628// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
7629// accesses?
7631 SDValue ADDIToFold) {
7632 // Check if ADDIToFold (the ADDI that we want to fold into local-exec
7633 // accesses), is truly an ADDI.
7634 if (!ADDIToFold.isMachineOpcode() ||
7635 (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7636 return false;
7637
7638 // Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
7639 // attribute or when the 'aix-small-tls' global variable attribute is present.
7640 const PPCSubtarget &Subtarget =
7642 SDValue TLSVarNode = ADDIToFold.getOperand(1);
7643 if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
7644 Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
7645 return false;
7646
7647 // The second operand of the ADDIToFold should be the global TLS address
7648 // (the local-exec TLS variable). We only perform the folding if the TLS
7649 // variable is the second operand.
7651 if (!GA)
7652 return false;
7653
7654 if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
7655 // The first operand of the ADDIToFold should be the thread pointer.
7656 // This transformation is only performed if the first operand of the
7657 // addi is the thread pointer.
7658 SDValue TPRegNode = ADDIToFold.getOperand(0);
7659 RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7660 if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7661 return false;
7662 }
7663
7664 // The local-[exec|dynamic] TLS variable should only have the
7665 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
7666 // performed otherwise if the flag is not set.
7667 unsigned TargetFlags = GA->getTargetFlags();
7668 if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
7669 TargetFlags == PPCII::MO_TLSLD_FLAG))
7670 return false;
7671
7672 // If all conditions are satisfied, the ADDI is valid for folding.
7673 return true;
7674}
7675
7676// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
7677// another addi, fold this sequence into a single addi if possible. Before this
7678// optimization, the sequence appears as:
7679// addi rN, r13, sym@[le|ld]
7680// addi rM, rN, imm
7681// After this optimization, we can fold the two addi into a single one:
7682// addi rM, r13, sym@[le|ld] + imm
7684 if (N->getMachineOpcode() != PPC::ADDI8)
7685 return;
7686
7687 // InitialADDI is the addi feeding into N (also an addi), and the addi that
7688 // we want optimized out.
7689 SDValue InitialADDI = N->getOperand(0);
7690
7691 if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
7692 return;
7693
7694 // The second operand of the InitialADDI should be the global TLS address
7695 // (the local-[exec|dynamic] TLS variable), with the
7696 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
7697 // isEligibleToFoldADDIForFasterLocalAccesses().
7698 SDValue TLSVarNode = InitialADDI.getOperand(1);
7700 assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7701 "local-[exec|dynamic] accesses!");
7702 unsigned TargetFlags = GA->getTargetFlags();
7703
7704 // The second operand of the addi that we want to preserve will be an
7705 // immediate. We add this immediate, together with the address of the TLS
7706 // variable found in InitialADDI, in order to preserve the correct TLS address
7707 // information during assembly printing. The offset is likely to be non-zero
7708 // when we end up in this case.
7709 int Offset = N->getConstantOperandVal(1);
7710 TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7711 Offset, TargetFlags);
7712
7713 (void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
7714 if (InitialADDI.getNode()->use_empty())
7715 DAG->RemoveDeadNode(InitialADDI.getNode());
7716}
7717
7718void PPCDAGToDAGISel::PeepholePPC64() {
7719 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7720
7721 while (Position != CurDAG->allnodes_begin()) {
7722 SDNode *N = &*--Position;
7723 // Skip dead nodes and any non-machine opcodes.
7724 if (N->use_empty() || !N->isMachineOpcode())
7725 continue;
7726
7727 if (isVSXSwap(SDValue(N, 0)))
7728 reduceVSXSwap(N, CurDAG);
7729
7730 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7731 // accesses.
7733
7734 unsigned FirstOp;
7735 unsigned StorageOpcode = N->getMachineOpcode();
7736 bool RequiresMod4Offset = false;
7737
7738 switch (StorageOpcode) {
7739 default: continue;
7740
7741 case PPC::LWA:
7742 case PPC::LD:
7743 case PPC::DFLOADf64:
7744 case PPC::DFLOADf32:
7745 RequiresMod4Offset = true;
7746 [[fallthrough]];
7747 case PPC::LBZ:
7748 case PPC::LBZ8:
7749 case PPC::LFD:
7750 case PPC::LFS:
7751 case PPC::LHA:
7752 case PPC::LHA8:
7753 case PPC::LHZ:
7754 case PPC::LHZ8:
7755 case PPC::LWZ:
7756 case PPC::LWZ8:
7757 FirstOp = 0;
7758 break;
7759
7760 case PPC::STD:
7761 case PPC::DFSTOREf64:
7762 case PPC::DFSTOREf32:
7763 RequiresMod4Offset = true;
7764 [[fallthrough]];
7765 case PPC::STB:
7766 case PPC::STB8:
7767 case PPC::STFD:
7768 case PPC::STFS:
7769 case PPC::STH:
7770 case PPC::STH8:
7771 case PPC::STW:
7772 case PPC::STW8:
7773 FirstOp = 1;
7774 break;
7775 }
7776
7777 // If this is a load or store with a zero offset, or within the alignment,
7778 // we may be able to fold an add-immediate into the memory operation.
7779 // The check against alignment is below, as it can't occur until we check
7780 // the arguments to N
7781 if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
7782 continue;
7783
7784 SDValue Base = N->getOperand(FirstOp + 1);
7785 if (!Base.isMachineOpcode())
7786 continue;
7787
7788 unsigned Flags = 0;
7789 bool ReplaceFlags = true;
7790
7791 // When the feeding operation is an add-immediate of some sort,
7792 // determine whether we need to add relocation information to the
7793 // target flags on the immediate operand when we fold it into the
7794 // load instruction.
7795 //
7796 // For something like ADDItocL8, the relocation information is
7797 // inferred from the opcode; when we process it in the AsmPrinter,
7798 // we add the necessary relocation there. A load, though, can receive
7799 // relocation from various flavors of ADDIxxx, so we need to carry
7800 // the relocation information in the target flags.
7801 switch (Base.getMachineOpcode()) {
7802 default: continue;
7803
7804 case PPC::ADDI8:
7805 case PPC::ADDI:
7806 // In some cases (such as TLS) the relocation information
7807 // is already in place on the operand, so copying the operand
7808 // is sufficient.
7809 ReplaceFlags = false;
7810 break;
7811 case PPC::ADDIdtprelL:
7813 break;
7814 case PPC::ADDItlsldL:
7816 break;
7817 case PPC::ADDItocL8:
7818 // Skip the following peephole optimizations for ADDItocL8 on AIX which
7819 // is used for toc-data access.
7820 if (Subtarget->isAIXABI())
7821 continue;
7823 break;
7824 }
7825
7826 SDValue ImmOpnd = Base.getOperand(1);
7827
7828 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7829 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7830 // we might have needed different @ha relocation values for the offset
7831 // pointers).
7832 int MaxDisplacement = 7;
7833 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7834 const GlobalValue *GV = GA->getGlobal();
7835 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7836 MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
7837 }
7838
7839 bool UpdateHBase = false;
7840 SDValue HBase = Base.getOperand(0);
7841
7842 int Offset = N->getConstantOperandVal(FirstOp);
7843 if (ReplaceFlags) {
7844 if (Offset < 0 || Offset > MaxDisplacement) {
7845 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7846 // one use, then we can do this for any offset, we just need to also
7847 // update the offset (i.e. the symbol addend) on the addis also.
7848 if (Base.getMachineOpcode() != PPC::ADDItocL8)
7849 continue;
7850
7851 if (!HBase.isMachineOpcode() ||
7852 HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7853 continue;
7854
7855 if (!Base.hasOneUse() || !HBase.hasOneUse())
7856 continue;
7857
7858 SDValue HImmOpnd = HBase.getOperand(1);
7859 if (HImmOpnd != ImmOpnd)
7860 continue;
7861
7862 UpdateHBase = true;
7863 }
7864 } else {
7865 // Global addresses can be folded, but only if they are sufficiently
7866 // aligned.
7867 if (RequiresMod4Offset) {
7868 if (GlobalAddressSDNode *GA =
7870 const GlobalValue *GV = GA->getGlobal();
7871 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7872 if (Alignment < 4)
7873 continue;
7874 }
7875 }
7876
7877 // If we're directly folding the addend from an addi instruction, then:
7878 // 1. In general, the offset on the memory access must be zero.
7879 // 2. If the addend is a constant, then it can be combined with a
7880 // non-zero offset, but only if the result meets the encoding
7881 // requirements.
7882 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
7883 Offset += C->getSExtValue();
7884
7885 if (RequiresMod4Offset && (Offset % 4) != 0)
7886 continue;
7887
7888 if (!isInt<16>(Offset))
7889 continue;
7890
7891 ImmOpnd = CurDAG->getSignedTargetConstant(Offset, SDLoc(ImmOpnd),
7892 ImmOpnd.getValueType());
7893 } else if (Offset != 0) {
7894 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7895 // accesses.
7897 // Add the non-zero offset information into the load or store
7898 // instruction to be used for non-TOC-based local-[exec|dynamic]
7899 // accesses.
7900 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
7901 assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7902 "addi into local-[exec|dynamic] accesses!");
7903 ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7904 MVT::i64, Offset,
7905 GA->getTargetFlags());
7906 } else
7907 continue;
7908 }
7909 }
7910
7911 // We found an opportunity. Reverse the operands from the add
7912 // immediate and substitute them into the load or store. If
7913 // needed, update the target flags for the immediate operand to
7914 // reflect the necessary relocation information.
7915 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7916 LLVM_DEBUG(Base->dump(CurDAG));
7917 LLVM_DEBUG(dbgs() << "\nN: ");
7918 LLVM_DEBUG(N->dump(CurDAG));
7919 LLVM_DEBUG(dbgs() << "\n");
7920
7921 // If the relocation information isn't already present on the
7922 // immediate operand, add it now.
7923 if (ReplaceFlags) {
7924 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7925 SDLoc dl(GA);
7926 const GlobalValue *GV = GA->getGlobal();
7927 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7928 // We can't perform this optimization for data whose alignment
7929 // is insufficient for the instruction encoding.
7930 if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
7931 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7932 continue;
7933 }
7934 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
7935 } else if (ConstantPoolSDNode *CP =
7937 const Constant *C = CP->getConstVal();
7938 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
7939 Offset, Flags);
7940 }
7941 }
7942
7943 if (FirstOp == 1) // Store
7944 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7945 Base.getOperand(0), N->getOperand(3));
7946 else // Load
7947 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7948 N->getOperand(2));
7949
7950 if (UpdateHBase)
7951 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
7952 ImmOpnd);
7953
7954 // The add-immediate may now be dead, in which case remove it.
7955 if (Base.getNode()->use_empty())
7956 CurDAG->RemoveDeadNode(Base.getNode());
7957 }
7958}
7959
7960/// createPPCISelDag - This pass converts a legalized DAG into a
7961/// PowerPC-specific DAG, ready for instruction scheduling.
7962///
7964 CodeGenOptLevel OptLevel) {
7965 return new PPCDAGToDAGISelLegacy(TM, OptLevel);
7966}
unsigned SubReg
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis false
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG, SDValue ADDIToFold)
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base)
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
static bool hasTocDataAttr(SDValue Val)
static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG)
static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG)
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode * > &ToPromote)
static bool isLaneInsensitive(SDValue N)
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N)
static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget, const TargetMachine &TM, const SDNode *Node)
static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, const PPCSubtarget *Subtarget)
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static unsigned getBranchHint(unsigned PCC, const FunctionLoweringInfo &FuncInfo, const SDValue &DestMBB)
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp)
static cl::opt< bool > EnableTLSOpt("ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden)
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
static cl::opt< ICmpInGPRType > CmpInGPR("ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result.")))
static SDNode * selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static SDNode * selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static bool hasAIXSmallTLSAttr(SDValue Val)
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden)
static bool isSWTestOp(SDValue N)
static SDNode * selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned *InstCnt=nullptr)
ICmpInGPRType
@ ICGPR_ZextI32
@ ICGPR_I64
@ ICGPR_All
@ ICGPR_None
@ ICGPR_NonExtIn
@ ICGPR_Sext
@ ICGPR_I32
@ ICGPR_SextI64
@ ICGPR_ZextI64
@ ICGPR_SextI32
@ ICGPR_Zext
static bool isVSXSwap(SDValue N)
static uint32_t findContiguousZerosAtLeast(uint64_t Imm, unsigned Num)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
LLVM_ABI APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition APInt.cpp:1165
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
MachineBasicBlock * getBasicBlock() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
int64_t getSExtValue() const
A debug info location.
Definition DebugLoc.h:123
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
BranchProbabilityInfo * BPI
MachineBasicBlock * MBB
MBB - The current block.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists.
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:470
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setROPProtectionHashSaveIndex(int Idx)
static int getRecordFormOpcode(unsigned Opcode)
bool is32BitELFABI() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCInstrInfo * getInstrInfo() const override
MCRegister getThreadPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
bool isTargetELF() const
CodeModel::Model getCodeModel(const TargetMachine &TM, const GlobalValue *GV) const
Calculates the effective code model for argument GV.
bool isELFv2ABI() const
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
LLVM_ABI void dump() const
Dump this node, for debugging.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
allnodes_const_iterator allnodes_begin() const
allnodes_const_iterator allnodes_end() const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
int getMaskElt(unsigned Idx) const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This class is used to represent ISD::STORE nodes.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Primary interface to the complete machine description for the target machine.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool isPositionIndependent() const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned getID() const
Return the register class ID number.
virtual const TargetLowering * getTargetLowering() const
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:963
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:818
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:778
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:852
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:843
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:664
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:795
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:849
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:810
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
@ MO_TLSLD_LO
Definition PPC.h:184
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:150
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:113
@ MO_DTPREL_LO
These values identify relocations on immediates folded into memory operations.
Definition PPC.h:183
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:140
@ MO_TOC_LO
Definition PPC.h:185
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Define
Register definition.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
int countl_one(T Value)
Count the number of ones from the most significant bit to the first zero bit.
Definition bit.h:280
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOptLevel OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Other
Any other memory.
Definition ModRef.h:68
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
unsigned int NumVTs